CookieCat goes mulithreaded with the installation of a parallel perft() (bulk counting at penultimate nodes and no transposition assistance). As this is my first foray with multithreaded Pascal, I started with the simplest possible task; later, a multithreaded general search will be implemented.
The new multithreaded perft command mtperft has been tested on Mac OS/X, Linux, and (gasp!) Windows 7. Everything appears to work.
As a matter of personal preference, I try to minimize the amount of conditionally compiled code as I feel that too much of this leads to a hard-to-understand source which is also hard-to-fix. And if conditionally compiled code is needed, then it should be isolated to a small region of the source.
With CookieCat, the addition of multithreading has added one line of conditionally compiled code: the cthreads unit is referenced if the target is Unix-like. There is no way around this with the current implementation of Free Pascal.
There is a second line of conditionally compiled code: if the target is a PowerPC CPU, then the inline routine attribute is ignored. This is needed to work around a compiler bug.
That's it for conditionally compiled code, at least for now.
CookieCat goes mulithreaded
Moderators: hgm, Rebel, chrisw
-
- Posts: 4675
- Joined: Mon Mar 13, 2006 7:43 pm
Re: CookieCat goes mulithreaded
Here's the multithreaded visit-every-node perft code. Note that the only thread specific system routine is BeginThread() called in the main driver once-per-move loop.
Code: Select all
type
{ Perft block parameter record }
perftblocktype =
record
ifen: fentype; { Position FEN }
draft: Integer; { Draft of calculation }
prior: santype; { Prior move SAN }
mpcount: nctype; { Result movepath count }
doprint: Boolean; { Subtotal output enable flag }
completed: Boolean { Completion flag }
end;
perftblockptrtype = ^perftblocktype;
function PosPerftTask(ptr: Pointer): PtrInt;
var
perftblockptr: perftblockptrtype;
pos: postype;
function PosPerftSimpleFull(depth: Integer): nctype;
var
myresult: nctype;
gms: gmstype;
index: Integer;
begin
if depth = 0 then
myresult := 1
else
begin
myresult := 0;
PosGenerate(pos, gms);
with gms do
for index := 0 to movecount - 1 do
begin
PosExecute(pos, moves[index]);
Inc(myresult, PosPerftSimpleFull(depth - 1));
PosRetract(pos)
end
end;
PosPerftSimpleFull := myresult
end; { PosPerftSimpleFull }
begin
perftblockptr := ptr;
with perftblockptr^ do
begin
PosInit(pos);
PosDecode(pos, ifen);
mpcount := PosPerftSimpleFull(draft);
PosTerm(pos);
if doprint then
writeln(prior, ' ', mpcount);
completed := True
end;
PosPerftTask := 0
end; { PosPerftTask }
function PosPerftTaskDriver(var pos: postype; limit: Integer; printflag: Boolean): nctype;
var
index: Integer;
gms: gmstype;
blocks: array[gentype] of perftblocktype;
function CalcFinished: Integer;
var
myresult: Integer;
index: Integer;
begin
myresult := 0;
for index := 0 to gms.movecount - 1 do
if blocks[index].completed then
Inc(myresult);
CalcFinished := myresult
end; { CalcFinished }
function CalcSum: nctype;
var
myresult: nctype;
index: Integer;
begin
myresult := 0;
for index := 0 to gms.movecount - 1 do
Inc(myresult, blocks[index].mpcount);
CalcSum := myresult
end; { CalcSum }
begin
PosMetaGenCanonical(pos, gms);
with gms do
for index := 0 to movecount - 1 do
begin
PosExecute(pos, moves[index]);
with blocks[index] do
begin
ifen := PosEncode(pos);
draft := limit - 1;
prior := MoveEncode(moves[index]);
mpcount := 0;
doprint := printflag;
completed := False;
BeginThread(@PosPerftTask, @blocks[index])
end;
PosRetract(pos)
end;
while CalcFinished <> gms.movecount do
Sleep(100);
PosPerftTaskDriver := CalcSum
end; { PosPerftTaskDriver }
-
- Posts: 4675
- Joined: Mon Mar 13, 2006 7:43 pm
Limiting the thread count to the core count
Here's a revised version of the code which limits the calculation thread count to the processor core count. Compared to spamming the processor(s) with a separate thread for each root move, this increases the overall speed, but not by very much.
The CatNap routine used here sleeps the calling thread for 100 milliseconds. RatNap goes for 10 milliseconds while DogNap snoozes for a full second.
The CatNap routine used here sleeps the calling thread for 100 milliseconds. RatNap goes for 10 milliseconds while DogNap snoozes for a full second.
Code: Select all
function PosPerftTask(ptr: Pointer): PtrInt;
var
perftblockptr: perftblockptrtype;
pos: postype;
function PosPerftSimpleFull(depth: Integer): nctype;
var
myresult: nctype;
gms: gmstype;
index: Integer;
begin
if depth = 0 then
myresult := 1
else
begin
myresult := 0;
PosGenerate(pos, gms);
with gms do
for index := 0 to movecount - 1 do
begin
PosExecute(pos, moves[index]);
Inc(myresult, PosPerftSimpleFull(depth - 1));
PosRetract(pos)
end
end;
PosPerftSimpleFull := myresult
end; { PosPerftSimpleFull }
begin
perftblockptr := ptr;
with perftblockptr^ do
begin
PosInit(pos);
PosDecode(pos, ifen);
mpcount := PosPerftSimpleFull(draft);
PosTerm(pos);
completed := True
end;
PosPerftTask := 0
end; { PosPerftTask }
function PosPerftTaskDriver(var pos: postype; limit: Integer; printflag: Boolean): nctype;
var
myresult: nctype;
corecount, activecount: Integer;
moveindex: Integer;
gms: gmstype;
completedcount: Integer;
nextmoveindex: Integer;
coreindex: Integer;
activevec: array[coretype] of Integer;
blocks: array[gentype] of perftblocktype;
function FindFirstFree: Integer;
var
myresult: Integer;
coreindex: Integer;
begin
myresult := -1;
coreindex := 0;
while (myresult < 0) and (coreindex < corecount) do
if activevec[coreindex] < 0 then
myresult := coreindex
else
Inc(coreindex);
FindFirstFree := myresult
end; { FindFirstFree }
function FindFirstCompleted: Integer;
var
myresult: Integer;
coreindex: Integer;
begin
myresult := -1;
coreindex := 0;
while (myresult < 0) and (coreindex < corecount) do
if (activevec[coreindex] >= 0) and blocks[activevec[coreindex]].completed then
myresult := coreindex
else
Inc(coreindex);
FindFirstCompleted := myresult
end; { FindFirstCompleted }
procedure Dispatch(moveindex: Integer);
var
coreindex: Integer;
begin
coreindex := FindFirstFree;
activevec[coreindex] := moveindex;
with blocks[moveindex] do
begin
started := True;
threadid := BeginThread(@PosPerftTask, @blocks[moveindex])
end;
Inc(activecount)
end; { Dispatch }
procedure Reclaim(coreindex: Integer);
begin
WaitForThreadTerminate(blocks[activevec[coreindex]].threadid, 0);
activevec[coreindex] := -1;
Dec(activecount)
end; { TaskDeactivate }
begin
{ Initialize counts and moves }
myresult := 0;
corecount := CalcCoreCount;
activecount := 0;
completedcount := 0;
PosMetaGenCanonical(pos, gms);
{ Initialize the active core vector }
for coreindex := coremin to coremax do
activevec[coreindex] := -1;
{ Initialize the thread parameter blocks }
for moveindex := 0 to gms.movecount - 1 do
begin
PosExecute(pos, gms.moves[moveindex]);
with blocks[moveindex] do
begin
threadid := TThreadID(0);
ifen := PosEncode(pos);
draft := limit - 1;
prior := MoveEncode(gms.moves[moveindex]);
mpcount := 0;
started := False;
completed := False
end;
PosRetract(pos)
end;
{ Cycle }
nextmoveindex := 0;
while completedcount < gms.movecount do
if (nextmoveindex < gms.movecount) and (activecount < corecount) then
begin
Dispatch(nextmoveindex);
Inc(nextmoveindex)
end
else
begin
coreindex := FindFirstCompleted;
if coreindex >= 0 then
begin
Reclaim(coreindex);
Inc(completedcount)
end
else
CatNap
end;
{ Scan/print results }
for moveindex := 0 to gms.movecount - 1 do
with blocks[moveindex] do
begin
if printflag then
WriteStrNL(Output, prior + ' ' + EncodeUi64Type(mpcount));
Inc(myresult, mpcount)
end;
{ Assign final result and exit }
PosPerftTaskDriver := myresult
end; { PosPerftTaskDriver }