Super Fast 'Looking for magics' 1.0

Discussion of chess software programming and technical issues.

Moderators: hgm, Rebel, chrisw

User avatar
vittyvirus
Posts: 646
Joined: Wed Jun 18, 2014 2:30 pm
Full name: Fahad Syed

Super Fast 'Looking for magics' 1.0

Post by vittyvirus »

Code: Select all


/* Super Fast 'Looking for Magics' by Tord Romstad
  * Made 'Super-Fast' by Syed Fahad, and Matthew Brades
  *
  * Description: This program is an improvement over Tord Romstad's Feeding in Randoms to generate magics. I (Syed Fahad) started this working on this program when I ran Tord's program on my andriod and it couldn't generate the numbers. It would always fail. So, I began to speedup his program. Then, when I was stuck fighting a bug that freezed the computation somewhere when generating Bishop Magics, I showed my code to my close friend, Matthew Brades, who fixed the code and and then further speeded up the code by Hardware Instructions.
  *
  * Results:
  * BEFORE: This program took 2 seconds for generating all magics on Core Duo 2.8 GHz.
  * AFTER: This program generates all magics almost instantly w/o compiler optimization on my 1 GHz Micromax Canvas Doodle 3 with 512 MB of RAM using GCC on C4Driod.
  * Huge improvement, you see.
  *
  * There are a lot of possible speed ups still, so...
  * Version: 1.0
 */
 
#include <stdio.h>
#include <stdlib.h>
#define USE_32_BIT_MULTIPLICATIONS

typedef unsigned long long uint64;

const uint64 BitMask&#91;64&#93; = &#123;
	0x1ULL, 0x2ULL, 0x4ULL, 0x8ULL, 0x10ULL, 0x20ULL, 0x40ULL, 0x80ULL,
	0x100ULL, 0x200ULL, 0x400ULL, 0x800ULL, 0x1000ULL, 0x2000ULL,
	0x4000ULL, 0x8000ULL, 0x10000ULL, 0x20000ULL, 0x40000ULL, 0x80000ULL,
	0x100000ULL, 0x200000ULL, 0x400000ULL, 0x800000ULL, 0x1000000ULL,
	0x2000000ULL, 0x4000000ULL, 0x8000000ULL, 0x10000000ULL, 0x20000000ULL,
	0x40000000ULL, 0x80000000ULL, 0x100000000ULL, 0x200000000ULL,
	0x400000000ULL, 0x800000000ULL, 0x1000000000ULL, 0x2000000000ULL,
	0x4000000000ULL, 0x8000000000ULL, 0x10000000000ULL, 0x20000000000ULL,
	0x40000000000ULL, 0x80000000000ULL, 0x100000000000ULL,
	0x200000000000ULL, 0x400000000000ULL, 0x800000000000ULL,
	0x1000000000000ULL, 0x2000000000000ULL, 0x4000000000000ULL,
	0x8000000000000ULL, 0x10000000000000ULL, 0x20000000000000ULL,
	0x40000000000000ULL, 0x80000000000000ULL, 0x100000000000000ULL,
	0x200000000000000ULL, 0x400000000000000ULL, 0x800000000000000ULL,
	0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL,
	0x8000000000000000ULL
&#125;;

// uint64 random_uint64&#40;)
// &#123;
/* 
   uint64 u1, u2, u3, u4; u1 = &#40;uint64&#41; &#40;random&#40;)) & 0xFFFF; u2 = &#40;uint64&#41;
   &#40;random&#40;)) & 0xFFFF; u3 = &#40;uint64&#41; &#40;random&#40;)) & 0xFFFF; u4 = &#40;uint64&#41;
   &#40;random&#40;)) & 0xFFFF; */
#define u1 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
#define u2 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
#define u3 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
#define u4 (&#40;uint64&#41; (&#40;unsigned short&#41; &#40;rand&#40;))))
// return u1 | &#40;u2 << 16&#41; | &#40;u3 << 32&#41; | &#40;u4 << 48&#41;;
// &#125;
#define random_uint64&#40;) &#40;u1 | &#40;u2 << 16&#41; | &#40;u3 << 32&#41; | &#40;u4 << 48&#41;)
/* 
   uint64 random_uint64_fewbits&#40;) &#123; return random_uint64&#40;) & random_uint64&#40;) &
   random_uint64&#40;); &#125; */
#define random_uint64_fewbits&#40;) &#40;random_uint64&#40;) & random_uint64&#40;) & random_uint64&#40;))
int count_1s&#40;uint64 b&#41;
&#123;
	// Original
	// int r;
	// for &#40;r = 0; b; r++, b &= b - 1&#41;;
	// Non-GCC users
	/* unsigned w = &#40;unsigned&#41;b >> 32, v = &#40;unsigned&#41;b; v -= &#40;v >> 1&#41; &
	   0x55555555; // 0-2 in 2 bits w -= &#40;w >> 1&#41; & 0x55555555; v = (&#40;v >> 2&#41;
	   & 0x33333333&#41; + &#40;v & 0x33333333&#41;; // 0-4 in 4 bits w = (&#40;w >> 2&#41; &
	   0x33333333&#41; + &#40;w & 0x33333333&#41;; v = (&#40;v >> 4&#41; + v + &#40;w >> 4&#41; + w&#41; &
	   0x0F0F0F0F; return &#40;int&#41;(&#40;v * 0x01010101&#41; >> 24&#41;; */
	return __builtin_popcountll&#40;b&#41;;
&#125;

const int BitTable&#91;64&#93; =
	&#123; 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
	51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
	26, 60, 6, 23,
	44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, 58, 20, 37, 17,
	36, 8
&#125;;

int pop_1st_bit&#40;uint64 * bb&#41;
&#123;
	uint64 b = *bb ^ (*bb - 1&#41;;
	unsigned int fold = &#40;unsigned&#41;(&#40;b & 0xffffffff&#41; ^ &#40;b >> 32&#41;);
	*bb &= (*bb - 1&#41;;
	return BitTable&#91;&#40;fold * 0x783a9b23&#41; >> 26&#93;;
&#125;

uint64 index_to_uint64&#40;int index, int bits, uint64 m&#41;
&#123;
	int i, j, k;
	uint64 result = 0ULL;
	for &#40;i = 0; i < bits; i++)
	&#123;
		j = pop_1st_bit&#40;&m&#41;;
		if (&#40;index & &#40;BitMask&#91;i&#93;)) != 0&#41;
			result |= &#40;BitMask&#91;j&#93;);
	&#125;
	return result;
&#125;

uint64 rmask&#40;int sq&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1; r <= 6; r++)
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
	for &#40;r = rk - 1; r >= 1; r--)
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
	for &#40;f = fl + 1; f <= 6; f++)
		result |= &#40;BitMask&#91;&#40;f + rk * 8&#41;&#93;);
	for &#40;f = fl - 1; f >= 1; f--)
		result |= &#40;BitMask&#91;&#40;f + rk * 8&#41;&#93;);
	return result;
&#125;

uint64 bmask&#40;int sq&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1, f = fl + 1; r <= 6 && f <= 6; r++, f++)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	for &#40;r = rk + 1, f = fl - 1; r <= 6 && f >= 1; r++, f--)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	for &#40;r = rk - 1, f = fl + 1; r >= 1 && f <= 6; r--, f++)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	for &#40;r = rk - 1, f = fl - 1; r >= 1 && f >= 1; r--, f--)
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
	return result;
&#125;

uint64 ratt&#40;int sq, uint64 block&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1; r <= 7; r++)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk - 1; r >= 0; r--)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;f = fl + 1; f <= 7; f++)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;f = fl - 1; f >= 0; f--)
	&#123;
		result |= &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;fl + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	return result;
&#125;

uint64 batt&#40;int sq, uint64 block&#41;
&#123;
	uint64 result = 0ULL;
	int rk = sq / 8, fl = sq % 8, r, f;
	for &#40;r = rk + 1, f = fl + 1; r <= 7 && f <= 7; r++, f++)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk + 1, f = fl - 1; r <= 7 && f >= 0; r++, f--)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk - 1, f = fl + 1; r >= 0 && f <= 7; r--, f++)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	for &#40;r = rk - 1, f = fl - 1; r >= 0 && f >= 0; r--, f--)
	&#123;
		result |= &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;);
		if (&#40;block & &#40;BitMask&#91;&#40;f + r * 8&#41;&#93;)) != 0&#41;
			break;
	&#125;
	return result;
&#125;

int transform&#40;uint64 b, uint64 magic, int bits&#41;
&#123;
#if defined&#40;USE_32_BIT_MULTIPLICATIONS&#41;
	return &#40;unsigned&#41;(&#40;int&#41;b * &#40;int&#41;magic ^ &#40;int&#41;&#40;b >> 32&#41; *
					  &#40;int&#41;&#40;magic >> 32&#41;) >> &#40;32 - bits&#41;;
#else
	return &#40;int&#41;(&#40;b * magic&#41; >> &#40;64 - bits&#41;);
#endif
&#125;

uint64 find_magic&#40;int sq, int m, int bishop&#41;
&#123;
	uint64 mask, b&#91;4096&#93;, a&#91;4096&#93;, used&#91;4096&#93;, magic;
	int i, j, k, n, mbits, fail;
	int c = 0;
	mask = bishop ? bmask&#40;sq&#41; &#58; rmask&#40;sq&#41;;
	n = count_1s&#40;mask&#41;;
	for &#40;i = 0; i < &#40;BitMask&#91;n&#93;); i++)
	&#123;
		b&#91;i&#93; = index_to_uint64&#40;i, n, mask&#41;;
		a&#91;i&#93; = bishop ? batt&#40;sq, b&#91;i&#93;) &#58; ratt&#40;sq, b&#91;i&#93;);
	&#125;

	for &#40;k = 0; k < 100000000; ++k&#41;
	&#123;
		magic = random_uint64_fewbits&#40;);
		// I've seen it generate seemingly correct magics
		// without this code&#58;
		if &#40;count_1s&#40;&#40;mask * magic&#41; & 0xFF00000000000000ULL&#41; < 6&#41; continue;
		// But I'm not sure, so let it be there.
		//
		
		for &#40;i = 0; i < 4096; i++)
			used&#91;i&#93; = 0ULL;
		for &#40;i = 0, fail = 0; &#40;fail == 0&#41; && i < &#40;BitMask&#91;n&#93;); i++)
		&#123;
			j = transform&#40;b&#91;i&#93;, magic, m&#41;;
			if &#40;used&#91;j&#93; == 0ULL&#41;
				used&#91;j&#93; = a&#91;i&#93;;
			else if &#40;used&#91;j&#93; != a&#91;i&#93;)
				fail = 1;
		&#125;
		if &#40;fail != 0&#41;
			return magic;
	&#125;
	printf&#40;"***Failed***\n");
	return 0ULL;
&#125;

int RBits&#91;64&#93; =
	&#123; 12, 11, 11, 11, 11, 11, 11, 12, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10,
	10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10,
	10, 10,
	11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 12, 11,
	11, 11,
	11, 11, 11, 12
&#125;;

int BBits&#91;64&#93; =
	&#123; 6, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5,
	5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5, 5,
	5, 5, 5,
	5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6
&#125;;

int main&#40;)
&#123;
	int square;
	uint64 magic;
	printf&#40;"const uint64 RMagic&#91;64&#93; = &#123;\n");
	for &#40;square = 0; square < 64; square++)
		printf&#40;" 0x%llxULL,\n", find_magic&#40;square, RBits&#91;square&#93;, 0&#41;);
	printf&#40;"&#125;;\n\n");
	printf&#40;"const uint64 BMagic&#91;64&#93; = &#123;\n");
	for &#40;square = 0; square < 64; square++)
		printf&#40;" 0x%llxULL,\n", find_magic&#40;square, BBits&#91;square&#93;, 1&#41;);
	printf&#40;"&#125;;\n\n");
	// fflush&#40;stdout&#41;;
	return 0;
&#125;
Please Please, suggest me a better name.
mcostalba
Posts: 2684
Joined: Sat Jun 14, 2008 9:17 pm

Re: Super Fast 'Looking for magics' 1.0

Post by mcostalba »

This seems bogus:

Code: Select all

      if &#40;fail != 0&#41;
         return magic; 
You are supposed to have found the magic when you exit the loop with 'fail' still at zero. Isn't it?


BTW, in Stockfish I have modified original Tord's code to find all the magics for rook and bishop in about 100ms at engine's startup, in case you are interested.
User avatar
vittyvirus
Posts: 646
Joined: Wed Jun 18, 2014 2:30 pm
Full name: Fahad Syed

Re: Super Fast 'Looking for magics' 1.0

Post by vittyvirus »

mcostalba wrote:This seems bogus:

Code: Select all

      if &#40;fail != 0&#41;
         return magic; 
You are supposed to have found the magic when you exit the loop with 'fail' still at zero. Isn't it?


BTW, in Stockfish I have modified original Tord's code to find all the magics for rook and bishop in about 100ms at engine's startup, in case you are interested.
Oh, oh that was a silly mistake I made!

Thank you!

I've a question.
const int RBits[64] =
{ 12, 11, 11, 11, 11, 11, 11, 12, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10,
10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10,
10, 10,
11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 12, 11,
11, 11,
11, 11, 11, 12
};
Even if I change everything to 10 or 1, there's no change in output.
User avatar
stegemma
Posts: 859
Joined: Mon Aug 10, 2009 10:05 pm
Location: Italy
Full name: Stefano Gemma

Re: Super Fast 'Looking for magics' 1.0

Post by stegemma »

If the compiler is not smart enough, you can replace this:

Code: Select all

int rk = sq / 8, fl = sq % 8, r, f;
with:

Code: Select all

int rk = sq >> 3, fl = sq & 7, r, f;
Sometimes it is faster to replace if statement with *&| operators, depending on what you are doing.
mar
Posts: 2555
Joined: Fri Nov 26, 2010 2:00 pm
Location: Czech Republic
Full name: Martin Sedlak

Re: Super Fast 'Looking for magics' 1.0

Post by mar »

stegemma wrote:Sometimes it is faster to replace if statement with *&| operators, depending on what you are doing.
Yes, this is true for signed divisions where the compiler can't do >> or & (unless it knows the value is never negative).
Arithmetic shift right on 2's complement machines is not the same as division by 2^n:
-1 >> 1 => -1 but -1/2 => 0 (and so on).
For unsigned types the compiler should be able to convert it to bit shifts (multiplications like *8, *4, *2 or (*8+1, *4+1, *2+1) possibly using displacement on x86/x64).
asanjuan
Posts: 214
Joined: Thu Sep 01, 2011 5:38 pm
Location: Seville, Spain

Re: Super Fast 'Looking for magics' 1.0

Post by asanjuan »

It can be a good exercise, but you only need to find the magic numbers once in your life... then copy paste the magic numbers as constant in your source code and you are done.

Spending hours or days trying to save 2 seconds doesn't make sense at all...
Just my opinion.
Still learning how to play chess...
knigths move in "L" shape ¿right?
User avatar
stegemma
Posts: 859
Joined: Mon Aug 10, 2009 10:05 pm
Location: Italy
Full name: Stefano Gemma

Re: Super Fast 'Looking for magics' 1.0

Post by stegemma »

asanjuan wrote:It can be a good exercise, but you only need to find the magic numbers once in your life... then copy paste the magic numbers as constant in your source code and you are done.

Spending hours or days trying to save 2 seconds doesn't make sense at all...
Just my opinion.
That's true but add it that i don't use magics, for me is interesting only the optimation itself, not the result, in this situation. :)

As you say, this is a good exercise. I've used things learned writing my chess programs in some commercial software, so anything could be helpfull, even if apparently useless and time-consuming..
asanjuan
Posts: 214
Joined: Thu Sep 01, 2011 5:38 pm
Location: Seville, Spain

Re: Super Fast 'Looking for magics' 1.0

Post by asanjuan »

stegemma wrote:
asanjuan wrote:It can be a good exercise, but you only need to find the magic numbers once in your life... then copy paste the magic numbers as constant in your source code and you are done.

Spending hours or days trying to save 2 seconds doesn't make sense at all...
Just my opinion.
That's true but add it that i don't use magics, for me is interesting only the optimation itself, not the result, in this situation. :)

As you say, this is a good exercise. I've used things learned writing my chess programs in some commercial software, so anything could be helpfull, even if apparently useless and time-consuming..
I must admit that I have studied this code when I added magic bitboards to Rhetoric, but just because I wanted to understand the whole concept.

And it was a pretty good exercise... until I forgot everything learned two days after :?
Still learning how to play chess...
knigths move in "L" shape ¿right?
tttony
Posts: 268
Joined: Sun Apr 24, 2011 12:33 am

Re: Super Fast 'Looking for magics' 1.0

Post by tttony »

Months ago I tested like 3/4 codes from the web and only the sample from Pradyumna Kannan worked fine, I gave up testing magics, looking for something more simple

Now that you posted this code, I think I tested a code like this, but didn't worked, I think is that I don't know how to generate the move database

I will give a try to this code and read some docs to understand how to generate the move database

Thanks for the code!!
User avatar
Fabio Gobbato
Posts: 217
Joined: Fri Apr 11, 2014 10:45 am
Full name: Fabio Gobbato

Re: Super Fast 'Looking for magics' 1.0

Post by Fabio Gobbato »

To generate the magic numbers and the databases you can have a look at this code: http://www.g-sei.org/wp-content/uploads ... ati_MB.zip
It is largely taken from the chessprogramming.