Code: Select all
/* Super Fast 'Looking for Magics' by Tord Romstad
* Made 'Super-Fast' by Syed Fahad, and Matthew Brades
*
* Description: This program is an improvement over Tord Romstad's Feeding in Randoms to generate magics. I (Syed Fahad) started this working on this program when I ran Tord's program on my andriod and it couldn't generate the numbers. It would always fail. So, I began to speedup his program. Then, when I was stuck fighting a bug that freezed the computation somewhere when generating Bishop Magics, I showed my code to my close friend, Matthew Brades, who fixed the code and and then further speeded up the code by Hardware Instructions.
*
* Results:
* BEFORE: This program took 2 seconds for generating all magics on Core Duo 2.8 GHz.
* AFTER: This program generates all magics almost instantly w/o compiler optimization on my 1 GHz Micromax Canvas Doodle 3 with 512 MB of RAM using GCC on C4Driod.
* Huge improvement, you see.
*
* There are a lot of possible speed ups still, so...
* Version: 1.0
*/
#include <stdio.h>
#include <stdlib.h>
#define USE_32_BIT_MULTIPLICATIONS
typedef unsigned long long uint64;
const uint64 BitMask[64] = {
0x1ULL, 0x2ULL, 0x4ULL, 0x8ULL, 0x10ULL, 0x20ULL, 0x40ULL, 0x80ULL,
0x100ULL, 0x200ULL, 0x400ULL, 0x800ULL, 0x1000ULL, 0x2000ULL,
0x4000ULL, 0x8000ULL, 0x10000ULL, 0x20000ULL, 0x40000ULL, 0x80000ULL,
0x100000ULL, 0x200000ULL, 0x400000ULL, 0x800000ULL, 0x1000000ULL,
0x2000000ULL, 0x4000000ULL, 0x8000000ULL, 0x10000000ULL, 0x20000000ULL,
0x40000000ULL, 0x80000000ULL, 0x100000000ULL, 0x200000000ULL,
0x400000000ULL, 0x800000000ULL, 0x1000000000ULL, 0x2000000000ULL,
0x4000000000ULL, 0x8000000000ULL, 0x10000000000ULL, 0x20000000000ULL,
0x40000000000ULL, 0x80000000000ULL, 0x100000000000ULL,
0x200000000000ULL, 0x400000000000ULL, 0x800000000000ULL,
0x1000000000000ULL, 0x2000000000000ULL, 0x4000000000000ULL,
0x8000000000000ULL, 0x10000000000000ULL, 0x20000000000000ULL,
0x40000000000000ULL, 0x80000000000000ULL, 0x100000000000000ULL,
0x200000000000000ULL, 0x400000000000000ULL, 0x800000000000000ULL,
0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL,
0x8000000000000000ULL
};
// uint64 random_uint64()
// {
/*
uint64 u1, u2, u3, u4; u1 = (uint64) (random()) & 0xFFFF; u2 = (uint64)
(random()) & 0xFFFF; u3 = (uint64) (random()) & 0xFFFF; u4 = (uint64)
(random()) & 0xFFFF; */
#define u1 ((uint64) ((unsigned short) (rand())))
#define u2 ((uint64) ((unsigned short) (rand())))
#define u3 ((uint64) ((unsigned short) (rand())))
#define u4 ((uint64) ((unsigned short) (rand())))
// return u1 | (u2 << 16) | (u3 << 32) | (u4 << 48);
// }
#define random_uint64() (u1 | (u2 << 16) | (u3 << 32) | (u4 << 48))
/*
uint64 random_uint64_fewbits() { return random_uint64() & random_uint64() &
random_uint64(); } */
#define random_uint64_fewbits() (random_uint64() & random_uint64() & random_uint64())
int count_1s(uint64 b)
{
// Original
// int r;
// for (r = 0; b; r++, b &= b - 1);
// Non-GCC users
/* unsigned w = (unsigned)b >> 32, v = (unsigned)b; v -= (v >> 1) &
0x55555555; // 0-2 in 2 bits w -= (w >> 1) & 0x55555555; v = ((v >> 2)
& 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits w = ((w >> 2) &
0x33333333) + (w & 0x33333333); v = ((v >> 4) + v + (w >> 4) + w) &
0x0F0F0F0F; return (int)((v * 0x01010101) >> 24); */
return __builtin_popcountll(b);
}
const int BitTable[64] =
{ 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23,
44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, 58, 20, 37, 17,
36, 8
};
int pop_1st_bit(uint64 * bb)
{
uint64 b = *bb ^ (*bb - 1);
unsigned int fold = (unsigned)((b & 0xffffffff) ^ (b >> 32));
*bb &= (*bb - 1);
return BitTable[(fold * 0x783a9b23) >> 26];
}
uint64 index_to_uint64(int index, int bits, uint64 m)
{
int i, j, k;
uint64 result = 0ULL;
for (i = 0; i < bits; i++)
{
j = pop_1st_bit(&m);
if ((index & (BitMask[i])) != 0)
result |= (BitMask[j]);
}
return result;
}
uint64 rmask(int sq)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1; r <= 6; r++)
result |= (BitMask[(fl + r * 8)]);
for (r = rk - 1; r >= 1; r--)
result |= (BitMask[(fl + r * 8)]);
for (f = fl + 1; f <= 6; f++)
result |= (BitMask[(f + rk * 8)]);
for (f = fl - 1; f >= 1; f--)
result |= (BitMask[(f + rk * 8)]);
return result;
}
uint64 bmask(int sq)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1, f = fl + 1; r <= 6 && f <= 6; r++, f++)
result |= (BitMask[(f + r * 8)]);
for (r = rk + 1, f = fl - 1; r <= 6 && f >= 1; r++, f--)
result |= (BitMask[(f + r * 8)]);
for (r = rk - 1, f = fl + 1; r >= 1 && f <= 6; r--, f++)
result |= (BitMask[(f + r * 8)]);
for (r = rk - 1, f = fl - 1; r >= 1 && f >= 1; r--, f--)
result |= (BitMask[(f + r * 8)]);
return result;
}
uint64 ratt(int sq, uint64 block)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1; r <= 7; r++)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
for (r = rk - 1; r >= 0; r--)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
for (f = fl + 1; f <= 7; f++)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
for (f = fl - 1; f >= 0; f--)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
return result;
}
uint64 batt(int sq, uint64 block)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1, f = fl + 1; r <= 7 && f <= 7; r++, f++)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
for (r = rk + 1, f = fl - 1; r <= 7 && f >= 0; r++, f--)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
for (r = rk - 1, f = fl + 1; r >= 0 && f <= 7; r--, f++)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
for (r = rk - 1, f = fl - 1; r >= 0 && f >= 0; r--, f--)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
return result;
}
int transform(uint64 b, uint64 magic, int bits)
{
#if defined(USE_32_BIT_MULTIPLICATIONS)
return (unsigned)((int)b * (int)magic ^ (int)(b >> 32) *
(int)(magic >> 32)) >> (32 - bits);
#else
return (int)((b * magic) >> (64 - bits));
#endif
}
uint64 find_magic(int sq, int m, int bishop)
{
uint64 mask, b[4096], a[4096], used[4096], magic;
int i, j, k, n, mbits, fail;
int c = 0;
mask = bishop ? bmask(sq) : rmask(sq);
n = count_1s(mask);
for (i = 0; i < (BitMask[n]); i++)
{
b[i] = index_to_uint64(i, n, mask);
a[i] = bishop ? batt(sq, b[i]) : ratt(sq, b[i]);
}
for (k = 0; k < 100000000; ++k)
{
magic = random_uint64_fewbits();
// I've seen it generate seemingly correct magics
// without this code:
if (count_1s((mask * magic) & 0xFF00000000000000ULL) < 6) continue;
// But I'm not sure, so let it be there.
//
for (i = 0; i < 4096; i++)
used[i] = 0ULL;
for (i = 0, fail = 0; (fail == 0) && i < (BitMask[n]); i++)
{
j = transform(b[i], magic, m);
if (used[j] == 0ULL)
used[j] = a[i];
else if (used[j] != a[i])
fail = 1;
}
if (fail != 0)
return magic;
}
printf("***Failed***\n");
return 0ULL;
}
int RBits[64] =
{ 12, 11, 11, 11, 11, 11, 11, 12, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10,
10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10,
10, 10,
11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 12, 11,
11, 11,
11, 11, 11, 12
};
int BBits[64] =
{ 6, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5,
5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5, 5,
5, 5, 5,
5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6
};
int main()
{
int square;
uint64 magic;
printf("const uint64 RMagic[64] = {\n");
for (square = 0; square < 64; square++)
printf(" 0x%llxULL,\n", find_magic(square, RBits[square], 0));
printf("};\n\n");
printf("const uint64 BMagic[64] = {\n");
for (square = 0; square < 64; square++)
printf(" 0x%llxULL,\n", find_magic(square, BBits[square], 1));
printf("};\n\n");
// fflush(stdout);
return 0;
}