I got fascinated by the new BMI2 instructions PEXT and PDEP, and wanted to see how much it may compress rook and bishop attacks compared to magic bitboards.
No compression at all gives 841k as already noted, and by using PDEP and shift, one easily comes down to 209k+, also already noted by others.
By using the fact that some attacks are *always* set, I got the total size
down to 152k, combined rook and bishop attacks. The smallest set of a rook attacks can be generated by
Code: Select all
RookAttacksMin[sqr] = KingAttacks[sqr] & MaskRook[sqr];
I added the bishop attacks after the rook attacks, and since there were more than enough space left, all bishop attacks could be given the same shift, 55, so they all lie in the higher MSBs of the attacks.
Heres usage and initialisation code.
Code: Select all
U64 AttacksPDEP[152*1024]; // Attacks table rook + bishop
U64 MaskRookPDEP[64]; // Masks for expanding attacks by PDEP
int RookPosPDEP[64]; // Basic square
int RookShiftPDEP[64]; // Shifts within shared attacks
U64 RookAttacks(TSquare s, U64 Pcs)
{
U64 Atk = AttacksPDEP[RookPosPDEP[s] + pext(Pcs, MaskRook64[s])];
Atk = pdep(Atk>>RookShiftPDEP[s], MaskRookPDEP[s]);
return Atk | (KingAttacks[s] & MaskRook[s]);
}
U64 BishopAttacks(TSquare s, U64 Pcs)
{
U64 Atk = AttacksPDEP[BishPosPDEP[s] + pext(Pcs, MaskBishop64[s])];
Atk = pdep(Atk>>55, MaskBishPDEP[s]);
return Atk | (KingAttacks[s] & MaskBishop[s]);
}
Code: Select all
const TSquare SqrQueRook[64] = {
H8,A8,H1,A1, // 12 bit ( 4)
G8,F8,E8,D8,C8,B8, // 11 bit (24)
H7,H6,H5,H4,H3,H2,
A7,A6,A5,A4,A3,A2,
G1,F1,E1,D1,C1,B1,
G7,F7,E7,D7,C7,B7, // 10 bit (36)
G6,F6,E6,D6,C6,B6,
G5,F5,E5,D5,C5,B5,
G4,F4,E4,D4,C4,B4,
G3,F3,E3,D3,C3,B3,
G2,F2,E2,D2,C2,B2
};
pos = 0;
RookPosPDEP[H8] = pos; RookShiftPDEP[H8] = 0; // 12 bit
RookPosPDEP[A8] = pos; RookShiftPDEP[A8] = 12;
RookPosPDEP[A1] = pos; RookShiftPDEP[A1] = 24;
RookPosPDEP[H1] = pos; RookShiftPDEP[H1] = 36;
RookPosPDEP[G8] = pos; RookShiftPDEP[G8] = 48; // 11 bit
RookPosPDEP[F8] = 2048+pos; RookShiftPDEP[F8] = 48; // 59-63 free (5)
pos += 4096;
RookPosPDEP[E8] = pos; RookShiftPDEP[E8] = 0; // 11 bit
RookPosPDEP[D8] = pos; RookShiftPDEP[D8] = 11;
RookPosPDEP[C8] = pos; RookShiftPDEP[C8] = 22;
RookPosPDEP[B8] = pos; RookShiftPDEP[B8] = 33;
RookPosPDEP[H7] = pos; RookShiftPDEP[H7] = 44; // 55-63 free (9)
pos += 2048;
etc....
for (n = 0; n < 64; n++) {
sqr = SqrQueRook[n];
MaskRookPDEP[sqr] = RookAttacks5(sqr, 0) ^ (KingAttacks[sqr] & MaskRook[sqr]);
for (i=0; i<64; i++)
for (j=0; j<64; j++) {
Ptn = GenRookOcc(sqr,i,j);
Atk = RookAttacks5(sqr, Ptn); // Normal attack generation
Atk ^= KingAttacks[sqr] & MaskRook[sqr]; // Remove always set bits
Atk = pext(Atk, MaskRookPDEP[sqr]) << RookShiftPDEP[sqr];
AttacksPDEP[RookPosPDEP[sqr] + pext(Ptn, MaskRook64[sqr])] |= Atk;
}
}
const TSquare SqrQueBish[64] = {
D5,E5,D4,E4, // 9 bit ( 4)
C6,D6,E6,F6,F5,F4,F3,E3,D3,C3,C4,C5,// 7 bit (12)
A8,H8,A1,H1, // 6 bit ( 4)
B7,C7,D7,E7,F7,G7,G6,G5,G4,G3,G2, // 5 bit (44)
F2,E2,D2,C2,B2,B3,B4,B5,B6,
B8,C8,D8,E8,F8,G8,H7,H6,H5,H4,H3,H2,
G1,F1,E1,D1,C1,B1,A2,A3,A4,A5,A6,A7
};
pos = RookPosPDEP[E8]; // 9 free bits, starting at 55
BishPosPDEP[D5] = pos; BishShiftPDEP[D5] = 55; // 9bit
BishPosPDEP[E5] = 512+pos; BishShiftPDEP[E5] = 55;
BishPosPDEP[D4] = 1024+pos; BishShiftPDEP[D4] = 55;
BishPosPDEP[E4] = 1536+pos; BishShiftPDEP[E4] = 55;
pos = RookPosPDEP[H6]; // 9 free bits, starting at 55
BishPosPDEP[C6] = pos; BishShiftPDEP[C6] = 55; // 7 bit
BishPosPDEP[D6] = 128+pos; BishShiftPDEP[D6] = 55;
BishPosPDEP[E6] = 256+pos; BishShiftPDEP[E6] = 55;
BishPosPDEP[F6] = 384+pos; BishShiftPDEP[F6] = 55;
BishPosPDEP[F5] = 512+pos; BishShiftPDEP[F5] = 55;
etc....
for (n = 0; n < 64; n++) {
sqr = SqrQueBish[n];
MaskBishPDEP[sqr] = BishopAttacks5(sqr, 0) ^ (KingAttacks[sqr] & MaskBishop[sqr]);
for (i=0; i<64; i++)
for (j=0; j<64; j++) {
Ptn = GenBishOcc(sqr,i,j);
Atk = BishopAttacks5(sqr, Ptn); // Normal attack generation
Atk ^= KingAttacks[sqr] & MaskBishop[sqr]; // Remove always set bits
Atk = pext(Atk, MaskBishPDEP[sqr]) << BishShiftPDEP[sqr];
AttacksPDEP[BishPosPDEP[sqr] + pext(Ptn, MaskBishop64[sqr])] |= Atk;
}
}
To get an attack table of around 209k instead of 841k, one need to do an additional PDEP and shift (or??). The difference between 209k and 152k is an extra OR in series. The AND between KingAttacks and Mask might be done in parallell. Also, I believe that KingAttacks and Masks are normally present in the cache, since they are much used.
Anyway it's yet another slider attack generator
Regards, Lasse