First, some preliminary considerations about data structures. Because I originally designed Jazz as a library of chess-related functions rather than as a chess program (or engine) there is almost no global state anywhere in the program. Most functions take either a board_t struct describing a chess position, or a gamestate_t struct that holds information about the current game (including the board position, move history, repetition table, transposition table, killer lists, thinking time etc.). The only data that is actually global are "read only" tables for the move generator and evaluation function. This is fortunate, because it means that I had to change very little in the structure of the program itself.
Jazz uses pseudo-legal move generation, and I generate all moves up front in one go.
The move from the hash table, if it is legal, is then tried first and if it generates a cut-off, the search returns immediately. If it doesn't, the remaining moves are scored and sorted according to their characteristics (killer moves, winning captures, some history-like heuristics, static ordering based on piece square tables). We then loop until we've tried all moves in the list. The move list is a structure that stores the move list, as well as the score (for each move), the permutation, the number of moves and the current move number.
For the parallel search I use two extra data structures: a "split point", which holds information about the position where the parallel search is started and that threads use to communicate, and a "thread", which holds information about each thread used in the search:
Code: Select all
typedef struct split_t {
struct gamestate_t *game;
movelist_t *movelist;
int static_score;
volatile uint32_t workers;
volatile int alpha;
volatile int beta;
volatile int score;
volatile int draft;
volatile int depth;
volatile move_t best_move;
volatile bool stop;
struct split_t *parent;
lock_t lock;
} split_t;
"static_score" is the static evaluation at the split point and is passed around
because it is used for some reduction/pruning decisions. "Workers" is a bitfield that is used to keep track of which threads are working on this split point (the corresponding bit is set to 1). Variables "alpha", "beta", "score", "draft", "depth" and "best_move" are used to control the search and return the result. "stop" becomes true in the case of a beta-cutoff, "parent" can be used to backup a split point to implement a "helpful master" (not currently used, but inherited from the Viper implementation). Finally, "lock" is a spinlock (or mutex) that is set when the split point needs to be updated.
The thread-specific data looks like this:
Code: Select all
typedef struct thread_t {
struct gamestate_t *game; /* Work space for each thread */
split_t *splitpoint; /* Current split point */
int thread_id; /* Thread ID number */
volatile int active_splits; /* Number of active splits for this thread */
volatile uint8_t state; /* The current state (flag) of the thread */
volatile bool must_die; /* Thread will exit if this becomes true (FIXME: should be a state flag?) */
pthread_t pt; /* pthreads thread ID (for pthread_join) */
char barrier[128]; /* Make sure data for different threads is on different cache lines */
} thread_t;
Code: Select all
#define THREAD_WORKING 0x01 /* Thread is currently searching */
#define THREAD_SLEEPING 0x02 /* Thread is sleeping */
#define THREAD_WAITING 0x04 /* Thread is waiting to be assigned work */
#define THREAD_SCHEDULED 0x08 /* Thread has been assigned work but has not started searching yet */
Code: Select all
static void *thread_func(void *arg)
{
/* Initialisation */
thread_t *self = (thread_t *)arg;
while (!self->must_die) {
if (self->state == THREAD_SLEEPING) {
assert(self->thread_id != 0);
pthread_mutex_lock(&sleep_mutex);
pthread_cond_wait(&sleep_condition, &sleep_mutex);
pthread_mutex_unlock(&sleep_mutex);
self->state = THREAD_WAITING;
}
/* Wait for work */
while (self->state == THREAD_WAITING && !self->must_die);
/* If we're scheduled to do work, start working */
while (self->state == THREAD_SCHEDULED);
if (self->state == THREAD_WORKING) {
/* Call SMP search */
search_split(self->game, self->splitpoint);
/* Mark thread as waiting for work. */
self->state = THREAD_WAITING;
}
}
return NULL;
}
The search_split() function is a stripped-down version of the normal search
function:
Code: Select all
void search_split(gamestate_t *game, split_t *split)
{
/* Initialisation */
bool check = in_check(0);
int me = game->side_to_move;
/* Lock the split point */
acquire_lock(&split->lock);
assert(split->workers & (1<<game->thread_id));
int depth = split->depth;
int draft = split->draft;
int static_score = split->static_score;
int num_moves = split->movelist->num_moves;
game->num_moves[depth] = split->movelist->num_moves;
int alpha = split->alpha;
int beta = split->beta;
bool pv_node = beta > alpha+1;
while(split->movelist->cur_move < num_moves) {
if (thread_should_stop(game->thread_id)) break;
if (alpha >= beta) break;
move_t move = get_next_move(split->movelist);
int move_score = get_move_score(split->movelist);
/* Release the lock while we don't need it */
release_lock(&split->lock);
int local_extension = check*PLY; /* Check extension */
/* Play move and evaluate the resulting position recursively -
* assuming the move was legal.
*/
playmove(game, move);
if (!check && move_leaves_player_in_check(game, move, me)) { /* Illegal move! */
takeback(game);
acquire_lock(&split->lock);
continue;
}
assert(!player_in_check(game, me));
moves_searched++;
int score = -alphabeta(game, depth+1, draft+local_extension-PLY, -alpha-1, -alpha);
/* Research moves that were reduced but now fail high (improve alpha)
* on normal depth (confirm the fail high).
*/
if (score > alpha && local_extension < 0) {
score = -alphabeta(game, depth+1, draft-PLY, -beta, -alpha);
}
takeback(game);
/* Acquire the lock so we can update the split point */
acquire_lock(&split->lock);
/* TODO: keep track of maximum nodes searched in a sub tree and the
* move that goes with it.
* For updating ALL-nodes.
*/
if (abort_search)
stop_split_search(split);
if (!thread_should_stop(game->thread_id)) {
if (score > split->score) {
split->score = score;
if (score > split->alpha) {
split->alpha = score;
/* Store best move */
split->best_move = move;
/* Beta cutoff, terminate all workers */
if (split->alpha >= split->beta) {
stop_split_search(split);
break;
}
}
}
alpha = split->alpha;
}
}
/* We're done with this split point */
assert(split->workers & (1<<game->thread_id));
split->workers ^= 1<<game->thread_id;
/* Update node counters */
split->game->positions_evaluated += game->positions_evaluated;
split->game->moves_searched += game->moves_searched;
split->game->positions_in_hashtable += game->positions_in_hashtable;
split->game->branches_pruned += game->branches_pruned;
split->game->branches_pruned_1st += game->branches_pruned_1st;
release_lock(&split->lock);
}
Code: Select all
void split(gamestate_t *game, movelist_t *movelist, int static_score, int *alpha, int *beta, int depth, int draft, int *score, move_t *best_move)
{
int me = game->thread_id;
int n;
if (threads_created <= 1) return;
/* We have to manipulate the split point stack */
acquire_lock(&split_mutex);
/* Maximum number of split points reached? */
if (thread[me].active_splits == MAX_SPLIT) goto done;
/* Test if there is an idle thread waiting */
if (!thread_available(me)) goto done;
/* There are threads available, initialise a split point */
split_t *sp = &splitpoint[me][thread[me].active_splits];
thread[me].active_splits++;
/* Copy data */
TRACE("Creating split point, ply = %d depth = %d [a,b]=[%d, %d]\n", depth, draft/PLY, *alpha, *beta);
copy_game_state_to_splitpoint(game, sp->game);
sp->movelist = movelist;
sp->parent = thread[me].splitpoint;
sp->alpha = *alpha;
sp->beta = *beta;
sp->score = *score;
sp->depth = depth;
sp->draft = draft;
sp->best_move = *best_move;
sp->stop = false;
sp->workers = 0;
thread[me].splitpoint = sp;
/* Assign all workers */
int num_workers = 0;
for (n=0; n<threads_created; n++) {
sp->workers |= (1<<me);
if (thread_can_help(n, me) || n == me) {
TRACE("Scheduling work for thread %d\n", n);
thread[n].state = THREAD_SCHEDULED;
thread[n].splitpoint = sp;
copy_game_state_from_splitpoint(sp->game, thread[n].game);
sp->workers |= (1<<n);
num_workers++;
}
}
/* We should have more than one worker available, otherwise we would have exited
* this function at the top.
*/
assert(num_workers > 1);
/* We can release the split lock since workers have already been marked as
* "scheduled"
*/
release_lock(&split_mutex);
/* Now kick all workers into action */
TRACE("Starting threads\n");
for (n=0; n<threads_created; n++) {
if (sp->workers & (1<<n)) {
thread[n].state = THREAD_WORKING;
}
}
/* Enter thread loop for the master thread */
TRACE("Entering loop (#%d)\n", me);
search_split(thread[me].game, thread[me].splitpoint);
assert( (sp->workers & (1<<me)) == 0);
while (sp->workers);
TRACE("Exit loop (#%d)\n", me);
/* Update variables from the split point and return */
acquire_lock(&split_mutex);
*alpha = sp->alpha;
*beta = sp->beta;
*score = sp->score;
*best_move = sp->best_move;
thread[me].active_splits--;
thread[me].splitpoint = sp->parent;
TRACE("Joined split point, ply = %d depth = %d [a,b]=[%d, %d]\n", depth, draft/PLY, *alpha, *beta);
game->positions_evaluated += sp->game->positions_evaluated;
game->moves_searched += sp->game->moves_searched;
game->positions_in_hashtable += sp->game->positions_in_hashtable;
game->branches_pruned += sp->game->branches_pruned;
game->branches_pruned_1st += sp->game->branches_pruned_1st;
done:
release_lock(&split_mutex);
return;
}
Code: Select all
/* Search remaining moves if no cut-off yet */
if (alpha < beta) {
/* Get the static evaluation score - if we haven't calculated it
* already and are not in check.
*/
if (!in_check(0) && static_score == -CHECKMATE)
static_score = static_evaluation(game, me, alpha, beta);
sort_moves(movelist.perm, movelist.num_moves, movelist.score);
get_next_move(&movelist); /* Skip the first move, already searched */
assert(best == movelist.perm[0]);
if (!pv_node && split_ok && draft > 4*PLY)
split(game, &movelist, static_score, &alpha, &beta, depth, draft, &best_score, &hash_move);
while(movelist.cur_move < movelist.num_moves && alpha < beta) {
/* Search moves */
}
}
the function that checks for available threads is largely taken from Viper:
Code: Select all
static bool thread_can_help(int helper, int master)
{
if (helper == master) return false; /* Can't help self */
if (thread[helper].state != THREAD_WAITING) return false; /* Can't help if busy */
int active_splits = thread[helper].active_splits;
if (active_splits == 0) return true; /* Can always help if idle */
/* If we're "the other thread" we can always help. */
if (threads_created == 2) return true;
/* TODO: if the helper has active splits, but they are on the same branch as the
* master is searching, then it can still help.
*/
if (splitpoint[helper][active_splits - 1].workers & (1<<master))
return true;
return false;
}
/* Returns true if there are any threads available to help the master thread */
static bool thread_available(int master)
{
int n;
for (n=0; n<threads_created; n++) {
if (thread_can_help(n, master))
return true;
}
return false;
}
Code: Select all
static thread_t thread[MAX_THREADS];
static int threads_created = 0;
static split_t splitpoint[MAX_THREADS][MAX_SPLIT];
/* Condition variable for threads to go to sleep */
pthread_cond_t sleep_condition;
pthread_mutex_t sleep_mutex;
lock_t split_mutex;
/* Return the total number of cores on the current machine.
* NB: returns the number of "logical" cores on hyper-threading machines.
*/
int get_number_of_cores(void)
{
#ifdef WIN32
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
return sysinfo.dwNumberOfProcessors;
#else
return sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
/* Kick all threads into action */
void wake_all_threads(void)
{
pthread_mutex_lock(&sleep_mutex);
pthread_cond_broadcast(&sleep_condition);
pthread_mutex_unlock(&sleep_mutex);
}
/* Tell all threads to go to sleep */
void sleep_all_threads(void)
{
int n;
for (n=1; n<threads_created; n++)
thread[n].state = THREAD_SLEEPING;
}
/* Initialise data structures for the specified number of threads */
void init_threads(int num_threads)
{
int n;
if (num_threads > MAX_THREADS) num_threads = MAX_THREADS;
if (threads_created)
kill_threads();
threads_created = 0;
if (num_threads < 2) return;
/* Initialise global condition and mutex variables */
pthread_cond_init(&sleep_condition, NULL);
pthread_mutex_init(&sleep_mutex, NULL);
init_lock(&split_mutex);
/* Initialise split points */
for (n=0; n<num_threads; n++) {
for (int k=0; k<MAX_SPLIT; k++) {
splitpoint[n][k].game = create_game();
init_lock(&splitpoint[n][k].lock);
}
}
/* Set options for created threads: run them detached from the main thread */
pthread_attr_t attributes;
pthread_attr_init(&attributes);
pthread_attr_setdetachstate(&attributes, PTHREAD_CREATE_DETACHED);
/* Initialise threads */
memset(&thread, 0, sizeof thread);
for (n=0; n<num_threads; n++) {
gamestate_t *game = create_game();
thread[n].thread_id = n;
thread[n].must_die = false;
thread[n].game = game;
if (n == 0) continue;
thread[n].state = THREAD_SLEEPING;
if (pthread_create(&thread[n].pt, NULL, thread_func, &thread[n]) != 0) {
printf("Failed to launch thread %d\n", n);
exit(0);
}
}
thread[0].state = THREAD_WORKING;
thread[0].must_die = false;
threads_created = num_threads;
}
/* Terminate all threads */
void kill_threads(void)
{
int n;
for (n=1; n<MAX_THREADS; n++) {
thread[n].must_die = true;
}
wake_all_threads();
for (n=1; n<threads_created; n++) {
pthread_join(thread[n].pt, NULL);
thread[n].game->transposition_table = NULL; /* The transposition table is shared */
thread[n].game->eval_hash = NULL; /* The transposition table is shared */
end_game(thread[n].game);
}
threads_created = 0;
}
/* Set status word: terminate search at the secified split point, in case of a
* beta-cutoff.
*/
void stop_split_search(split_t *split)
{
split->stop = true;
}
I'm sure it's not the best implementation in the world and there is much that can be improved, but at least it seems to be working so far and hopefully this write-up will be useful to someone later on.