My Digital Mars compiler takes 22.1 secs to complete.
GCC 4.6.1 (32 bit) takes 22.2 secs
GCC 4.6.1 (64-bit) takes 22.0 secs
The hand tuned ASM version (BUBBLES=1) takes 14.7 seconds.
I compiled GCC with the "-Ofast" option. Perhaps that's not the right one as I am new to GCC.
Code: Select all
#include <stdio.h>
#include <stdlib.h>
#define MAX_ENTRIES 6000
#define BUBBLES 0 // 0=C | 1=ASM
static int time1,time2;
static unsigned int key1[MAX_ENTRIES+3];
static unsigned int key2[MAX_ENTRIES+3];
static unsigned char byte1[MAX_ENTRIES+3];
static unsigned char byte2[MAX_ENTRIES+3];
// Create random 32 bit keys
// =========================
static unsigned long x_random[55] = {
1410651636UL, 3012776752UL, 3497475623UL, 2892145026UL, 1571949714UL,
3253082284UL, 3489895018UL, 387949491UL, 2597396737UL, 1981903553UL,
3160251843UL, 129444464UL, 1851443344UL, 4156445905UL, 224604922UL,
1455067070UL, 3953493484UL, 1460937157UL, 2528362617UL, 317430674UL,
3229354360UL, 117491133UL, 832845075UL, 1961600170UL, 1321557429UL,
747750121UL, 545747446UL, 810476036UL, 503334515UL, 4088144633UL,
2824216555UL, 3738252341UL, 3493754131UL, 3672533954UL, 29494241UL,
1180928407UL, 4213624418UL, 33062851UL, 3221315737UL, 1145213552UL,
2957984897UL, 4078668503UL, 2262661702UL, 65478801UL, 2527208841UL,
1960622036UL, 315685891UL, 1196037864UL, 804614524UL, 1421733266UL,
2017105031UL, 3882325900UL, 810735053UL, 384606609UL, 2393861397UL };
static int init_random = 1;
static unsigned long y_random[55];
static int j_random, k_random;
static unsigned long ul_random;
void zobrist(void);
void bubbles(void);
int main (int argc,char *argv[])
{ int x;
printf("Creating %d Zobrist keys\n\n",MAX_ENTRIES);
zobrist(); // make 6.000 random 32-bit keys
printf("Bubbling now... ");
time1=clock();
bubbles();
time2=clock();
printf("Done in %d\n\n",time2-time1);
// for (x=0; x<20; x++) printf("%8x\t %8x\t (%2x) (%2x)\n", // debug ASM code
// key1[x],key2[x],byte1[x],byte2[x]);
}
void zobrist()
// Create random 32-bit integers
// =============================
{ int x;
if (init_random)
{ int i;
init_random = 0;
for (i = 0; i < 55; i++) y_random[i] = x_random[i];
j_random = 24 - 1;
k_random = 55 - 1; }
for (x=0; x<MAX_ENTRIES; x++)
{ ul_random = (y_random[k_random] += y_random[j_random]);
if (--j_random < 0) j_random = 55 - 1;
if (--k_random < 0) k_random = 55 - 1;
key1[x]=0; key1[x]=key1[x] ^ ul_random; // key1
key2[x]=key1[x]; // key2 (copy to check proper swapping)
byte1[x]=key1[x]; // byte1 (lower 8 bits to check proper swapping)
byte2[x]=key1[x]>>24; // byte1 (first 8 bits to check proper swapping)
}
// for (x=0; x<20; x++) printf("%8x\t %8x\t (%2x) (%2x)\n", // debug ASM code
// key1[x],key2[x],byte1[x],byte2[x]);
// printf("\n");
}
#if BUBBLES==0
void bubbles()
{ unsigned int zz; unsigned char ch; int r1,r2;
sort05: r1=0; r2=-1;
sort10: r1++; r2++;
if (r1>=MAX_ENTRIES) return;
if (key1[r2] <= key1[r1]) goto sort10;
zz=key1[r1]; key1[r1]=key1[r2]; key1[r2]=zz; // swap
zz=key2[r1]; key2[r1]=key2[r2]; key2[r2]=zz;
ch=byte1[r1]; byte1[r1]=byte1[r2]; byte1[r2]=ch;
ch=byte2[r1]; byte2[r1]=byte2[r2]; byte2[r2]=ch;
goto sort05; // again
}
#endif
#if BUBBLES==1
void bubbles()
{ asm {
sort05: xor EDX,EDX // r1=0
mov EBX,0FFFFFFFFh // r2=-1
sort10: add EDX,1 // r1++
add EBX,1 // r2++
cmp EDX,MAX_ENTRIES-1 // if (r1 >= MAX_ENTRIES) return
jge done
mov EAX,dword ptr key1[EBX*4] // eax=key1[r2]
cmp EAX,dword ptr key1[EDX*4]
jbe sort10
mov ECX,dword ptr key1[EDX*4] // ecx=key1[r1]
mov dword ptr key1[EDX*4],EAX
mov dword ptr key1[EBX*4],ECX // swap key1
mov EDI,dword ptr key2[EBX*4] // swap key2
mov ESI,dword ptr key2[EDX*4]
mov dword ptr key2[EBX*4],ESI
mov dword ptr key2[EDX*4],EDI
mov AL,byte ptr byte1[EDX] // swap byte 1 & 2
mov AH,byte ptr byte1[EBX]
mov CL,byte ptr byte2[EDX]
mov CH,byte ptr byte2[EBX]
mov byte ptr byte1[EDX],AH
mov byte ptr byte1[EBX],AL
mov byte ptr byte2[EDX],CH
mov byte ptr byte2[EBX],CL
jmp sort05
} // end of asm
done: return;
}
#endif