Ok People, i have the benchmark done.
Mine is not a move generator one, but it is a Magic Number generator ( these numbers that one use in a magic bitboards implementation)
I took the base code from here:
https://www.chessprogramming.org/Looking_for_Magics
And i changed it a little to make it portable so the less differences between one language and the other, the better.
The C Code:
Code: Select all
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define USE_32_BIT_MULTIPLICATIONS
typedef unsigned long long uint64;
int seed = 1804289383;
int random()
{
int x = seed;
x ^= x << 13;
x ^= x << 17;
x ^= x >> 5;
seed = x;
return x;
}
uint64 random_uint64() {
uint64 u1, u2, u3, u4;
u1 = (uint64)(random()) & 0xFFFF; u2 = (uint64)(random()) & 0xFFFF;
u3 = (uint64)(random()) & 0xFFFF; u4 = (uint64)(random()) & 0xFFFF;
return u1 | (u2 << 16) | (u3 << 32) | (u4 << 48);
}
uint64 random_uint64_fewbits() {
return random_uint64() & random_uint64() & random_uint64();
}
int count_1s(uint64 b) {
int count = 0;
while (b)
{
b &= (b - 1);
++count;
}
return count;
}
const int BitTable[64] = {
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
58, 20, 37, 17, 36, 8
};
int pop_1st_bit(uint64 *bb) {
uint64 b = *bb ^ (*bb - 1);
unsigned int fold = (unsigned) ((b & 0xffffffff) ^ (b >> 32));
*bb &= (*bb - 1);
return BitTable[(fold * 0x783a9b23) >> 26];
}
uint64 index_to_uint64(int index, int bits, uint64 m) {
int i, j;
uint64 result = 0ULL;
for(i = 0; i < bits; i++) {
j = pop_1st_bit(&m);
if(index & (1 << i)) result |= (1ULL << j);
}
return result;
}
uint64 rmask(int sq) {
uint64 result = 0ULL;
int rk = sq/8, fl = sq%8, r, f;
for(r = rk+1; r <= 6; r++) result |= (1ULL << (fl + r*8));
for(r = rk-1; r >= 1; r--) result |= (1ULL << (fl + r*8));
for(f = fl+1; f <= 6; f++) result |= (1ULL << (f + rk*8));
for(f = fl-1; f >= 1; f--) result |= (1ULL << (f + rk*8));
return result;
}
uint64 bmask(int sq) {
uint64 result = 0ULL;
int rk = sq/8, fl = sq%8, r, f;
for(r=rk+1, f=fl+1; r<=6 && f<=6; r++, f++) result |= (1ULL << (f + r*8));
for(r=rk+1, f=fl-1; r<=6 && f>=1; r++, f--) result |= (1ULL << (f + r*8));
for(r=rk-1, f=fl+1; r>=1 && f<=6; r--, f++) result |= (1ULL << (f + r*8));
for(r=rk-1, f=fl-1; r>=1 && f>=1; r--, f--) result |= (1ULL << (f + r*8));
return result;
}
uint64 ratt(int sq, uint64 block) {
uint64 result = 0ULL;
int rk = sq/8, fl = sq%8, r, f;
for(r = rk+1; r <= 7; r++) {
result |= (1ULL << (fl + r*8));
if(block & (1ULL << (fl + r*8))) break;
}
for(r = rk-1; r >= 0; r--) {
result |= (1ULL << (fl + r*8));
if(block & (1ULL << (fl + r*8))) break;
}
for(f = fl+1; f <= 7; f++) {
result |= (1ULL << (f + rk*8));
if(block & (1ULL << (f + rk*8))) break;
}
for(f = fl-1; f >= 0; f--) {
result |= (1ULL << (f + rk*8));
if(block & (1ULL << (f + rk*8))) break;
}
return result;
}
uint64 batt(int sq, uint64 block) {
uint64 result = 0ULL;
int rk = sq/8, fl = sq%8, r, f;
for(r = rk+1, f = fl+1; r <= 7 && f <= 7; r++, f++) {
result |= (1ULL << (f + r*8));
if(block & (1ULL << (f + r * 8))) break;
}
for(r = rk+1, f = fl-1; r <= 7 && f >= 0; r++, f--) {
result |= (1ULL << (f + r*8));
if(block & (1ULL << (f + r * 8))) break;
}
for(r = rk-1, f = fl+1; r >= 0 && f <= 7; r--, f++) {
result |= (1ULL << (f + r*8));
if(block & (1ULL << (f + r * 8))) break;
}
for(r = rk-1, f = fl-1; r >= 0 && f >= 0; r--, f--) {
result |= (1ULL << (f + r*8));
if(block & (1ULL << (f + r * 8))) break;
}
return result;
}
int transform(uint64 b, uint64 magic, int bits) {
#if defined(USE_32_BIT_MULTIPLICATIONS)
return
(unsigned)((int)b*(int)magic ^ (int)(b>>32)*(int)(magic>>32)) >> (32-bits);
#else
return (int)((b * magic) >> (64 - bits));
#endif
}
uint64 find_magic(int sq, int m, int bishop) {
uint64 mask, b[4096], a[4096], used[4096], magic;
int i, j, k, n, fail;
mask = bishop? bmask(sq) : rmask(sq);
n = count_1s(mask);
for(i = 0; i < (1 << n); i++) {
b[i] = index_to_uint64(i, n, mask);
a[i] = bishop? batt(sq, b[i]) : ratt(sq, b[i]);
}
for(k = 0; k < 100000000; k++) {
magic = random_uint64_fewbits();
if(count_1s((mask * magic) & 0xFF00000000000000ULL) < 6) continue;
for(i = 0; i < 4096; i++) used[i] = 0ULL;
for(i = 0, fail = 0; !fail && i < (1 << n); i++) {
j = transform(b[i], magic, m);
if(used[j] == 0ULL) used[j] = a[i];
else if(used[j] != a[i]) fail = 1;
}
if(!fail) return magic;
}
printf("***Failed***\n");
return 0ULL;
}
int RBits[64] = {
12, 11, 11, 11, 11, 11, 11, 12,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
12, 11, 11, 11, 11, 11, 11, 12
};
int BBits[64] = {
6, 5, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 7, 7, 7, 7, 5, 5,
5, 5, 7, 9, 9, 7, 5, 5,
5, 5, 7, 9, 9, 7, 5, 5,
5, 5, 7, 7, 7, 7, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5,
6, 5, 5, 5, 5, 5, 5, 6
};
int main() {
int square;
printf("Get Magics performance test C Version. \n");
struct timespec start,end;
clock_gettime(CLOCK_MONOTONIC, &start);
for(square = 0; square < 64; square++)
find_magic(square, RBits[square], 0);
for(square = 0; square < 64; square++)
find_magic(square, BBits[square], 1);
clock_gettime(CLOCK_MONOTONIC, &end);
uint64 delta_us = ((end.tv_sec - start.tv_sec) * 1000) + ((end.tv_nsec - start.tv_nsec) / 1000000) ;
printf("\n Computing done. It took %d ms", delta_us);
getchar();
return 0;
}
The C# Version:
Code: Select all
#define USE_32_BIT_MULTIPLICATIONS
using System;
using Stopwatch = System.Diagnostics.Stopwatch;
using System.Reflection;
static class Program
{
static int seed = 1804289383;
static int random()
{
int x = seed;
x ^= x << 13;
x ^= x << 17;
x ^= x >> 5;
seed = x;
return x;
}
static ulong random_ulong()
{
ulong u1, u2, u3, u4;
u1 = (ulong)(random()) & 0xFFFF; u2 = (ulong)(random()) & 0xFFFF;
u3 = (ulong)(random()) & 0xFFFF; u4 = (ulong)(random()) & 0xFFFF;
return u1 | (u2 << 16) | (u3 << 32) | (u4 << 48);
}
static ulong random_ulong_fewbits()
{
return random_ulong() & random_ulong() & random_ulong();
}
static int count_1s(ulong b)
{
int count = 0;
while (b > 0)
{
b &= (b - 1);
++count;
}
return count;
}
static readonly int[] BitTable = {
63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23, 44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28,
58, 20, 37, 17, 36, 8
};
static int pop_1st_bit(ref ulong bb)
{
ulong b = bb ^ (bb - 1);
uint fold = (uint)((b & 0xffffffff) ^ (b >> 32));
bb &= (bb - 1);
return BitTable[(fold * 0x783a9b23) >> 26];
}
static ulong index_to_ulong(int index, int bits, ulong m)
{
int i, j;
ulong result = 0UL;
for (i = 0; i < bits; i++)
{
j = pop_1st_bit(ref m);
if ((index & (1 << i)) > 0) result |= (1UL << j);
}
return result;
}
static ulong rmask(int sq)
{
ulong result = 0UL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1; r <= 6; r++) result |= (1UL << (fl + r * 8));
for (r = rk - 1; r >= 1; r--) result |= (1UL << (fl + r * 8));
for (f = fl + 1; f <= 6; f++) result |= (1UL << (f + rk * 8));
for (f = fl - 1; f >= 1; f--) result |= (1UL << (f + rk * 8));
return result;
}
static ulong bmask(int sq)
{
ulong result = 0UL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1, f = fl + 1; r <= 6 && f <= 6; r++, f++) result |= (1UL << (f + r * 8));
for (r = rk + 1, f = fl - 1; r <= 6 && f >= 1; r++, f--) result |= (1UL << (f + r * 8));
for (r = rk - 1, f = fl + 1; r >= 1 && f <= 6; r--, f++) result |= (1UL << (f + r * 8));
for (r = rk - 1, f = fl - 1; r >= 1 && f >= 1; r--, f--) result |= (1UL << (f + r * 8));
return result;
}
static ulong ratt(int sq, ulong block)
{
ulong result = 0UL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1; r <= 7; r++)
{
result |= (1UL << (fl + r * 8));
if ((block & (1UL << (fl + r * 8))) > 0) break;
}
for (r = rk - 1; r >= 0; r--)
{
result |= (1UL << (fl + r * 8));
if ((block & (1UL << (fl + r * 8))) > 0) break;
}
for (f = fl + 1; f <= 7; f++)
{
result |= (1UL << (f + rk * 8));
if ((block & (1UL << (f + rk * 8))) > 0) break;
}
for (f = fl - 1; f >= 0; f--)
{
result |= (1UL << (f + rk * 8));
if ((block & (1UL << (f + rk * 8))) > 0) break;
}
return result;
}
static ulong batt(int sq, ulong block)
{
ulong result = 0UL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1, f = fl + 1; r <= 7 && f <= 7; r++, f++)
{
result |= (1UL << (f + r * 8));
if ((block & (1UL << (f + r * 8))) > 0) break;
}
for (r = rk + 1, f = fl - 1; r <= 7 && f >= 0; r++, f--)
{
result |= (1UL << (f + r * 8));
if ((block & (1UL << (f + r * 8))) > 0) break;
}
for (r = rk - 1, f = fl + 1; r >= 0 && f <= 7; r--, f++)
{
result |= (1UL << (f + r * 8));
if ((block & (1UL << (f + r * 8))) > 0) break;
}
for (r = rk - 1, f = fl - 1; r >= 0 && f >= 0; r--, f--)
{
result |= (1UL << (f + r * 8));
if ((block & (1UL << (f + r * 8))) > 0) break;
}
return result;
}
static int transform(ulong b, ulong magic, int bits)
{
#if USE_32_BIT_MULTIPLICATIONS
return (int)((uint)((int)b * (int)magic ^ (int)(b >> 32) * (int)(magic >> 32)) >> (32 - bits));
#else
return (int)((b * magic) >> (64 - bits));
#endif
}
static ulong find_magic(int sq, int m, int bishop)
{
ulong mask, magic;
ulong[] b = new ulong[4096], a = new ulong[4096], used = new ulong[4096];
int i, j, k, n, fail;
mask = bishop > 0 ? bmask(sq) : rmask(sq);
n = count_1s(mask);
for (i = 0; i < (1 << n); i++)
{
b[i] = index_to_ulong(i, n, mask);
a[i] = bishop > 0 ? batt(sq, b[i]) : ratt(sq, b[i]);
}
for (k = 0; k < 100000000; k++)
{
magic = random_ulong_fewbits();
if (count_1s((mask * magic) & 0xFF00000000000000UL) < 6) continue;
for (i = 0; i < 4096; i++) used[i] = 0UL;
for (i = 0, fail = 0; fail < 1 && i < (1 << n); i++)
{
j = transform(b[i], magic, m);
if (used[j] == 0UL) used[j] = a[i];
else if (used[j] != a[i]) fail = 1;
}
if (fail < 1) return magic;
}
Console.Write("***Failed***\n");
return 0UL;
}
static int[] RBits = {
12, 11, 11, 11, 11, 11, 11, 12,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
11, 10, 10, 10, 10, 10, 10, 11,
12, 11, 11, 11, 11, 11, 11, 12
};
static int[] BBits =
{
6, 5, 5, 5, 5, 5, 5, 6,
5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 7, 7, 7, 7, 5, 5,
5, 5, 7, 9, 9, 7, 5, 5,
5, 5, 7, 9, 9, 7, 5, 5,
5, 5, 7, 7, 7, 7, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5,
6, 5, 5, 5, 5, 5, 5, 6
};
static void PreJIT()
{
foreach (var type in Assembly.GetExecutingAssembly().GetTypes())
{
foreach (var method in type.GetMethods(BindingFlags.DeclaredOnly |
BindingFlags.NonPublic |
BindingFlags.Public | BindingFlags.Instance |
BindingFlags.Static))
{
if ((method.Attributes & MethodAttributes.Abstract) == MethodAttributes.Abstract || method.ContainsGenericParameters)
{
continue;
}
System.Runtime.CompilerServices.RuntimeHelpers.PrepareMethod(method.MethodHandle);
}
}
}
static void Main(string[] args)
{
PreJIT();
int square = 0;
Console.Write("Get Magics performance test C# Version. \n");
Stopwatch sw = new Stopwatch();
sw.Start();
for (square = 0; square < 64; square++)
find_magic(square, RBits[square], 0);
for (square = 0; square < 64; square++)
find_magic(square, BBits[square], 1);
Console.Write("\n Computing done. It took {0} ms", sw.ElapsedMilliseconds);
Console.ReadKey();
}
}
I only added a "PreJIT" method to initialize the whole class to avoid any slowdown because of JIT
C version is compiled with gcc 10.3.0 with optimization flags:
C# version was compiled with dotnet CLI with optimization enabled in the project xml file:
Code: Select all
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net5.0</TargetFramework>
<optimize>true</optimize>
</PropertyGroup>
</Project>
Compiled used "Release" configuration
Results
Single thread, on my AMD FX8350 CPU:
C Version:
Code: Select all
Get Magics performance test C Version.
Computing done. It took 1036 ms
C# Version:
Code: Select all
Get Magics performance test C# Version.
Computing done. It took 1412 ms
In my results the C version was only 36% faster than the C# version.
Pretty impressive, i was expecting at least 2x faster.
I removed the prining on the run during the magic numbers generation because i know that printing text into the console is very slow.