Super Fast 'Looking for magics' 1.0

vittyvirus · Post by **vittyvirus** » Sat Oct 25, 2014 5:53 pm


/* Super Fast 'Looking for Magics' by Tord Romstad
  * Made 'Super-Fast' by Syed Fahad, and Matthew Brades
  *
  * Description&#58; This program is an improvement over Tord Romstad's Feeding in Randoms to generate magics. I &#40;Syed Fahad&#41; started this working on this program when I ran Tord's program on my andriod and it couldn't generate the numbers. It would always fail. So, I began to speedup his program. Then, when I was stuck fighting a bug that freezed the computation somewhere when generating Bishop Magics, I showed my code to my close friend, Matthew Brades, who fixed the code and and then further speeded up the code by Hardware Instructions.
  *
  * Results&#58;
  * BEFORE&#58; This program took 2 seconds for generating all magics on Core Duo 2.8 GHz.
  * AFTER&#58; This program generates all magics almost instantly w/o compiler optimization on my 1 GHz Micromax Canvas Doodle 3 with 512 MB of RAM using GCC on C4Driod.
  * Huge improvement, you see.
  *
  * There are a lot of possible speed ups still, so...
  * Version&#58; 1.0
 */
 
#include <stdio.h>
#include <stdlib.h>
#define USE_32_BIT_MULTIPLICATIONS

typedef unsigned long long uint64;

const uint64 BitMask&#91;64&#93; = &#123;
	0x1ULL, 0x2ULL, 0x4ULL, 0x8ULL, 0x10ULL, 0x20ULL, 0x40ULL, 0x80ULL,
	0x100ULL, 0x200ULL, 0x400ULL, 0x800ULL, 0x1000ULL, 0x2000ULL,
	0x4000ULL, 0x8000ULL, 0x10000ULL, 0x20000ULL, 0x40000ULL, 0x80000ULL,
	0x100000ULL, 0x200000ULL, 0x400000ULL, 0x800000ULL, 0x1000000ULL,
	0x2000000ULL, 0x4000000ULL, 0x8000000ULL, 0x10000000ULL, 0x20000000ULL,
	0x40000000ULL, 0x80000000ULL, 0x100000000ULL, 0x200000000ULL,
	0x400000000ULL, 0x800000000ULL, 0x1000000000ULL, 0x2000000000ULL,
	0x4000000000ULL, 0x8000000000ULL, 0x10000000000ULL, 0x20000000000ULL,
	0x40000000000ULL, 0x80000000000ULL, 0x100000000000ULL,
	0x200000000000ULL, 0x400000000000ULL, 0x800000000000ULL,
	0x1000000000000ULL, 0x2000000000000ULL, 0x4000000000000ULL,
	0x8000000000000ULL, 0x10000000000000ULL, 0x20000000000000ULL,
	0x40000000000000ULL, 0x80000000000000ULL, 0x100000000000000ULL,
	0x200000000000000ULL, 0x400000000000000ULL, 0x800000000000000ULL,
	0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL,
	0x8000000000000000ULL
&#125;;

// uint64 random_uint64&#40;)
// &#123;
/* 
   uint64 u1, u2, u3, u4; u1 = &#40;uint64&#41; &#40;random&#40;)) & 0xFFFF; u2 = &#40;uint64&#41;
   &#40;random&#40;)) & 0xFFFF; u3 = &#40;uint64&#41; &#40;random&#40;)) & 0xFFFF; u4 = &#40;uint64&#41;
   &#40;random&#40;)) & 0xFFFF; */
#define u1 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
#define u2 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
#define u3 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
#define u4 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
// return u1 | &#40;u2 << 16&#41; | &#40;u3 << 32&#41; | &#40;u4 << 48&#41;;
// &#125;
#define random_uint64&#40;) &#40;u1 | &#40;u2 << 16&#41; | &#40;u3 << 32&#41; | &#40;u4 << 48&#41;)
/* 
   uint64 random_uint64_fewbits&#40;) &#123; return random_uint64&#40;) & random_uint64&#40;) &
   random_uint64&#40;); &#125; */
#define random_uint64_fewbits&#40;) &#40;random_uint64&#40;) & random_uint64&#40;) & random_uint64&#40;))
int count_1s&#40;uint64 b&#41;
&#123;
	// Original
	// int r;
	// for &#40;r = 0; b; r++, b &= b - 1&#41;;
	// Non-GCC users
	/* unsigned w = &#40;unsigned&#41;b >> 32, v = &#40;unsigned&#41;b; v -= &#40;v >> 1&#41; &
	   0x55555555; // 0-2 in 2 bits w -= &#40;w >> 1&#41; & 0x55555555; v = (&#40;v >> 2&#41;
	   & 0x33333333&#41; + &#40;v & 0x33333333&#41;; // 0-4 in 4 bits w = (&#40;w >> 2&#41; &
	   0x33333333&#41; + &#40;w & 0x33333333&#41;; v = (&#40;v >> 4&#41; + v + &#40;w >> 4&#41; + w&#41; &
	   0x0F0F0F0F; return &#40;int&#41;(&#40;v * 0x01010101&#41; >> 24&#41;; */
	return __builtin_popcountll&#40;b&#41;;
&#125;

const int BitTable&#91;64&#93; =
	&#123; 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
	51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
	26, 60, 6, 23,
	44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, 58, 20, 37, 17,
	36, 8
&#125;;

int pop_1st_bit&#40;uint64 * bb&#41;
&#123;
	uint64 b = *bb ^ (*bb - 1&#41;;
	unsigned int fold = &#40;unsigned&#41;(&#40;b & 0xffffffff&#41; ^ &#40;b >> 32&#41;);
	*bb &= (*bb - 1&#41;;
	return BitTable&#91;&#40;fold * 0x783a9b23&#41; >> 26&#93;;
&#125;

uint64 index_to_uint64&#40;int index, int bits, uint64 m&#41;
&#123;
	int i, j, k;
	uint64 result = 0ULL;
	for &#40;i = 0; i < bits; i++)
	&#123;
		j = pop_1st_bit&#40;&m&#41;;
		if (&#40;index & &#40;BitMask&#91;i&#93;)) != 0&#41;
			result |= &#40;BitMask&#91;j&#93;);
	&#125;
	return result;
&#125;

uint64 rmask&#40;int sq&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1; r <= 6; r++)
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
	for &#40;r = rk - 1; r >= 1; r--)
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
	for &#40;f = fl + 1; f <= 6; f++)
		result |= &#40;BitMask&#91;&#40;f + rk * 8&#41;&#93;);
	for &#40;f = fl - 1; f >= 1; f--)
		result |= &#40;BitMask&#91;&#40;f + rk * 8&#41;&#93;);
	return result;
&#125;

uint64 bmask&#40;int sq&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1, f = fl + 1; r <= 6 && f <= 6; r++, f++)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	for &#40;r = rk + 1, f = fl - 1; r <= 6 && f >= 1; r++, f--)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	for &#40;r = rk - 1, f = fl + 1; r >= 1 && f <= 6; r--, f++)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	for &#40;r = rk - 1, f = fl - 1; r >= 1 && f >= 1; r--, f--)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	return result;
&#125;

uint64 ratt&#40;int sq, uint64 block&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1; r <= 7; r++)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk - 1; r >= 0; r--)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;f = fl + 1; f <= 7; f++)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;f = fl - 1; f >= 0; f--)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	return result;
&#125;

uint64 batt&#40;int sq, uint64 block&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1, f = fl + 1; r <= 7 && f <= 7; r++, f++)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk + 1, f = fl - 1; r <= 7 && f >= 0; r++, f--)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk - 1, f = fl + 1; r >= 0 && f <= 7; r--, f++)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk - 1, f = fl - 1; r >= 0 && f >= 0; r--, f--)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	return result;
&#125;

int transform&#40;uint64 b, uint64 magic, int bits&#41;
&#123;
#if defined&#40;USE_32_BIT_MULTIPLICATIONS&#41;
	return &#40;unsigned&#41;(&#40;int&#41;b * &#40;int&#41;magic ^ &#40;int&#41;&#40;b >> 32&#41; *
					  &#40;int&#41;&#40;magic >> 32&#41;) >> &#40;32 - bits&#41;;
#else
	return &#40;int&#41;(&#40;b * magic&#41; >> &#40;64 - bits&#41;);
#endif
&#125;

uint64 find_magic&#40;int sq, int m, int bishop&#41;
&#123;
	uint64 mask, b&#91;4096&#93;, a&#91;4096&#93;, used&#91;4096&#93;, magic;
	int i, j, k, n, mbits, fail;
	int c = 0;
	mask = bishop ? bmask&#40;sq&#41; &#58; rmask&#40;sq&#41;;
	n = count_1s&#40;mask&#41;;
	for &#40;i = 0; i < &#40;BitMask&#91;n&#93;); i++)
	&#123;
		b&#91;i&#93; = index_to_uint64&#40;i, n, mask&#41;;
		a&#91;i&#93; = bishop ? batt&#40;sq, b&#91;i&#93;) &#58; ratt&#40;sq, b&#91;i&#93;);
	&#125;

	for &#40;k = 0; k < 100000000; ++k&#41;
	&#123;
		magic = random_uint64_fewbits&#40;);
		// I've seen it generate seemingly correct magics
		// without this code&#58;
		if &#40;count_1s&#40;&#40;mask * magic&#41; & 0xFF00000000000000ULL&#41; < 6&#41; continue;
		// But I'm not sure, so let it be there.
		//
		
		for &#40;i = 0; i < 4096; i++)
			used&#91;i&#93; = 0ULL;
		for &#40;i = 0, fail = 0; &#40;fail == 0&#41; && i < &#40;BitMask&#91;n&#93;); i++)
		&#123;
			j = transform&#40;b&#91;i&#93;, magic, m&#41;;
			if &#40;used&#91;j&#93; == 0ULL&#41;
				used&#91;j&#93; = a&#91;i&#93;;
			else if &#40;used&#91;j&#93; != a&#91;i&#93;)
				fail = 1;
		&#125;
		if &#40;fail != 0&#41;
			return magic;
	&#125;
	printf&#40;"***Failed***\n");
	return 0ULL;
&#125;

int RBits&#91;64&#93; =
	&#123; 12, 11, 11, 11, 11, 11, 11, 12, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10,
	10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10,
	10, 10,
	11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 12, 11,
	11, 11,
	11, 11, 11, 12
&#125;;

int BBits&#91;64&#93; =
	&#123; 6, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5,
	5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5, 5,
	5, 5, 5,
	5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6
&#125;;

int main&#40;)
&#123;
	int square;
	uint64 magic;
	printf&#40;"const uint64 RMagic&#91;64&#93; = &#123;\n");
	for &#40;square = 0; square < 64; square++)
		printf&#40;" 0x%llxULL,\n", find_magic&#40;square, RBits&#91;square&#93;, 0&#41;);
	printf&#40;"&#125;;\n\n");
	printf&#40;"const uint64 BMagic&#91;64&#93; = &#123;\n");
	for &#40;square = 0; square < 64; square++)
		printf&#40;" 0x%llxULL,\n", find_magic&#40;square, BBits&#91;square&#93;, 1&#41;);
	printf&#40;"&#125;;\n\n");
	// fflush&#40;stdout&#41;;
	return 0;
&#125;

Please Please, suggest me a better name.

mcostalba · Post by **mcostalba** » Sat Oct 25, 2014 7:06 pm

This seems bogus:

Code: Select all

      if &#40;fail != 0&#41;
         return magic;

You are supposed to have found the magic when you exit the loop with 'fail' still at zero. Isn't it?

BTW, in Stockfish I have modified original Tord's code to find all the magics for rook and bishop in about 100ms at engine's startup, in case you are interested.

vittyvirus · Post by **vittyvirus** » Sat Oct 25, 2014 7:33 pm

mcostalba wrote:This seems bogus:
Code: Select all
      if &#40;fail != 0&#41;
         return magic; 
You are supposed to have found the magic when you exit the loop with 'fail' still at zero. Isn't it?

BTW, in Stockfish I have modified original Tord's code to find all the magics for rook and bishop in about 100ms at engine's startup, in case you are interested.

Oh, oh that was a silly mistake I made!

Thank you!

I've a question.
const int RBits[64] =
{ 12, 11, 11, 11, 11, 11, 11, 12, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10,
10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10,
10, 10,
11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 12, 11,
11, 11,
11, 11, 11, 12
};
Even if I change everything to 10 or 1, there's no change in output.

stegemma · Post by **stegemma** » Mon Oct 27, 2014 12:05 pm

If the compiler is not smart enough, you can replace this:

Code: Select all

int rk = sq / 8, fl = sq % 8, r, f;

with:

Code: Select all

int rk = sq >> 3, fl = sq & 7, r, f;

Sometimes it is faster to replace if statement with *&| operators, depending on what you are doing.

mar · Post by **mar** » Mon Oct 27, 2014 2:23 pm

stegemma wrote:Sometimes it is faster to replace if statement with *&| operators, depending on what you are doing.

Yes, this is true for signed divisions where the compiler can't do >> or & (unless it knows the value is never negative).
Arithmetic shift right on 2's complement machines is not the same as division by 2^n:
-1 >> 1 => -1 but -1/2 => 0 (and so on).
For unsigned types the compiler should be able to convert it to bit shifts (multiplications like *8, *4, *2 or (*8+1, *4+1, *2+1) possibly using displacement on x86/x64).

asanjuan · Post by **asanjuan** » Tue Oct 28, 2014 11:59 am

It can be a good exercise, but you only need to find the magic numbers once in your life... then copy paste the magic numbers as constant in your source code and you are done.

Spending hours or days trying to save 2 seconds doesn't make sense at all...
Just my opinion.

stegemma · Post by **stegemma** » Tue Oct 28, 2014 1:36 pm

asanjuan wrote:It can be a good exercise, but you only need to find the magic numbers once in your life... then copy paste the magic numbers as constant in your source code and you are done.

Spending hours or days trying to save 2 seconds doesn't make sense at all...
Just my opinion.

That's true but add it that i don't use magics, for me is interesting only the optimation itself, not the result, in this situation.

As you say, this is a good exercise. I've used things learned writing my chess programs in some commercial software, so anything could be helpfull, even if apparently useless and time-consuming..

asanjuan · Post by **asanjuan** » Tue Oct 28, 2014 3:45 pm

stegemma wrote:
asanjuan wrote:It can be a good exercise, but you only need to find the magic numbers once in your life... then copy paste the magic numbers as constant in your source code and you are done.

Spending hours or days trying to save 2 seconds doesn't make sense at all...
Just my opinion.
That's true but add it that i don't use magics, for me is interesting only the optimation itself, not the result, in this situation.

As you say, this is a good exercise. I've used things learned writing my chess programs in some commercial software, so anything could be helpfull, even if apparently useless and time-consuming..

I must admit that I have studied this code when I added magic bitboards to Rhetoric, but just because I wanted to understand the whole concept.

And it was a pretty good exercise... until I forgot everything learned two days after

tttony · Post by **tttony** » Wed Oct 29, 2014 6:13 am

Months ago I tested like 3/4 codes from the web and only the sample from Pradyumna Kannan worked fine, I gave up testing magics, looking for something more simple

Now that you posted this code, I think I tested a code like this, but didn't worked, I think is that I don't know how to generate the move database

I will give a try to this code and read some docs to understand how to generate the move database

Thanks for the code!!

Fabio Gobbato · Post by **Fabio Gobbato** » Wed Oct 29, 2014 8:32 pm

To generate the magic numbers and the databases you can have a look at this code: http://www.g-sei.org/wp-content/uploads ... ati_MB.zip
It is largely taken from the chessprogramming.

Super Fast 'Looking for magics' 1.0

Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0

Re: Super Fast 'Looking for magics' 1.0