We present two patches to be applied above Stockfish 1.7.1, the first does some small rework of current SMP code and is a prerequisite for the second one that does the actual work of DTS-ification of YBWC.
The patch was started by me, but was then deeply reworked and improved by Joona that can be considered the author.
1) PREREQUISITE PATCH to be applied as first
Code: Select all
Subject: [PATCH 1/2] Prerequisite patch
Retire splitPoint->cpus field. It is needed by next
patch that does the actual work of late-joining.
---
diff --git a/src/search.cpp b/src/search.cpp
index 0fa7824..e6de59f 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -1905,7 +1905,6 @@ namespace {
/* Here we have the lock still grabbed */
sp->slaves[threadID] = 0;
- sp->cpus--;
lock_release(&(sp->lock));
}
@@ -2028,7 +2027,6 @@ namespace {
/* Here we have the lock still grabbed */
sp->slaves[threadID] = 0;
- sp->cpus--;
lock_release(&(sp->lock));
}
@@ -2700,10 +2698,18 @@ namespace {
threads[threadID].state = THREAD_AVAILABLE;
}
- // If this thread is the master of a split point and all threads have
+ // If this thread is the master of a split point and all slaves have
// finished their work at this split point, return from the idle loop.
- if (waitSp != NULL && waitSp->cpus == 0)
+ int i = 0;
+ for ( ; waitSp && i < ActiveThreads && !waitSp->slaves[i]; i++) {}
+
+ if (i == ActiveThreads)
{
+ // Because sp->slaves[] is reset under lock protection,
+ // be sure sp->lock has been released before to return.
+ lock_grab(&(waitSp->lock));
+ lock_release(&(waitSp->lock));
+
assert(threads[threadID].state == THREAD_AVAILABLE);
threads[threadID].state = THREAD_SEARCHING;
@@ -2882,9 +2888,8 @@ namespace {
// data that must be copied to the helper threads (the current position and
// search stack, alpha, beta, the search depth, etc.), and we tell our
// helper threads that they have been assigned work. This will cause them
- // to instantly leave their idle loops and call sp_search_pv(). When all
- // threads have returned from sp_search_pv (or, equivalently, when
- // splitPoint->cpus becomes 0), split() returns true.
+ // to instantly leave their idle loops and call sp_search(). When all
+ // threads have returned from sp_search() then split() returns true.
bool ThreadsManager::split(const Position& p, SearchStack* sstck, int ply,
Value* alpha, const Value beta, Value* bestValue,
@@ -2931,7 +2936,6 @@ namespace {
splitPoint->master = master;
splitPoint->mp = mp;
splitPoint->moves = *moves;
- splitPoint->cpus = 1;
splitPoint->pos = &p;
splitPoint->parentSstack = sstck;
for (int i = 0; i < ActiveThreads; i++)
@@ -2943,17 +2947,19 @@ namespace {
// If we are here it means we are not available
assert(threads[master].state != THREAD_AVAILABLE);
+ int workersCnt = 1; // At least the master is included
+
// Allocate available threads setting state to THREAD_BOOKED
- for (int i = 0; i < ActiveThreads && splitPoint->cpus < MaxThreadsPerSplitPoint; i++)
+ for (int i = 0; i < ActiveThreads && workersCnt < MaxThreadsPerSplitPoint; i++)
if (thread_is_available(i, master))
{
threads[i].state = THREAD_BOOKED;
threads[i].splitPoint = splitPoint;
splitPoint->slaves[i] = 1;
- splitPoint->cpus++;
+ workersCnt++;
}
- assert(splitPoint->cpus > 1);
+ assert(workersCnt > 1);
// We can release the lock because slave threads are already booked and master is not available
lock_release(&MPLock);
@@ -2974,8 +2980,7 @@ namespace {
// which it will instantly launch a search, because its state is
// THREAD_WORKISWAITING. We send the split point as a second parameter to the
// idle loop, which means that the main thread will return from the idle
- // loop when all threads have finished their work at this split point
- // (i.e. when splitPoint->cpus == 0).
+ // loop when all threads have finished their work at this split point.
idle_loop(master, splitPoint);
// We have returned from the idle loop, which means that all threads are
diff --git a/src/thread.h b/src/thread.h
index 8697784..c70800c 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -55,7 +55,7 @@ struct SplitPoint {
Depth depth;
bool mateThreat;
Value beta;
- int ply, master, slaves[MAX_THREADS];
+ int ply, master;
SearchStack sstack[MAX_THREADS][PLY_MAX_PLUS_2];
// Const pointers to shared data
@@ -67,8 +67,8 @@ struct SplitPoint {
volatile Value alpha;
volatile Value bestValue;
volatile int moves;
- volatile int cpus;
volatile bool stopRequest;
+ volatile int slaves[MAX_THREADS];
};
// ThreadState type is used to represent thread's current state
--
1.6.5.1.1367.gcd48
2) MAIN PATCH to be applied as second
Code: Select all
Subject: [PATCH 2/2] Joona's late joining
In YBWC when one slave threads finishes it waits for
some master to pick it up. In DTS instead actively
searches for some work to do.
This patch is a DTS-ification of YBWC in which the slave
patch when finishes try to join an active split point instead
of going to waiting.
---
diff --git a/src/search.cpp b/src/search.cpp
index e6de59f..2bb8620 100644
--- a/src/search.cpp
+++ b/src/search.cpp
@@ -78,11 +78,13 @@ namespace {
int64_t nodes_searched() const;
void get_beta_counters(Color us, int64_t& our, int64_t& their) const;
bool available_thread_exists(int master) const;
+ bool HMC_criterion(const SplitPoint *ancestor, const SplitPoint *sp) const;
bool thread_is_available(int slave, int master) const;
bool thread_should_stop(int threadID) const;
void wake_sleeping_threads();
void put_threads_to_sleep();
void idle_loop(int threadID, SplitPoint* waitSp);
+ bool try_late_join(int threadID, SplitPoint *idleSp);
bool split(const Position& pos, SearchStack* ss, int ply, Value* alpha, const Value beta, Value* bestValue,
Depth depth, bool mateThreat, int* moves, MovePicker* mp, int master, bool pvNode);
@@ -1904,6 +1906,9 @@ namespace {
/* Here we have the lock still grabbed */
+ // No more work to do, so we are leaving.
+ // Prevent other threads from joining
+ sp->allowLateJoin = false;
sp->slaves[threadID] = 0;
lock_release(&(sp->lock));
@@ -2026,6 +2031,9 @@ namespace {
/* Here we have the lock still grabbed */
+ // No more work to do, so we are leaving.
+ // Prevent other threads from joining
+ sp->allowLateJoin = false;
sp->slaves[threadID] = 0;
lock_release(&(sp->lock));
@@ -2639,6 +2647,64 @@ namespace {
}
}
+ bool ThreadsManager::try_late_join(int threadID, SplitPoint *idleSp) {
+
+ int spIdx = 0, master = 0;
+ Depth maxDepth = Depth(0);
+
+ // Find active SplitPoint with highest possible depth
+ for (int i = 0; i != threadID && i < ActiveThreads; i++)
+ {
+ int actSp = threads[i].activeSplitPoints;
+
+ for (int j = 0; j < actSp; j++)
+ {
+ SplitPoint* sp = &SplitPointStack[i][j];
+ if ( sp->depth > maxDepth
+ && sp->allowLateJoin
+ && (!idleSp || HMC_criterion(idleSp, sp)))
+ {
+ maxDepth = sp->depth;
+ master = i;
+ spIdx = j;
+ }
+ }
+ }
+
+ if (maxDepth == Depth(0))
+ return false;
+
+ // This will be our new split point
+ SplitPoint* splitPoint = &SplitPointStack[master][spIdx];
+
+ // Grab the SplitPoint lock to prevent splitpoint from dying
+ lock_grab(&(splitPoint->lock));
+
+ // Recheck under lock protection
+ if ( splitPoint->allowLateJoin
+ && (!idleSp || HMC_criterion(idleSp, splitPoint)))
+ {
+ /* nothing */
+ }
+ else
+ {
+ lock_release(&(splitPoint->lock));
+ return false;
+ }
+
+ SearchStack* sstck = splitPoint->parentSstack;
+ int ply = splitPoint->ply;
+
+ // At last copy the data and start the thread !
+ memcpy(splitPoint->sstack[threadID] + ply - 1, sstck + ply - 1, 4 * sizeof(SearchStack));
+
+ threads[threadID].splitPoint = splitPoint;
+ splitPoint->slaves[threadID] = 1;
+ threads[threadID].state = THREAD_WORKISWAITING;
+
+ lock_release(&(splitPoint->lock));
+ return true;
+ }
// idle_loop() is where the threads are parked when they have no work to do.
// The parameter "waitSp", if non-NULL, is a pointer to an active SplitPoint
@@ -2695,7 +2761,11 @@ namespace {
assert(threads[threadID].state == THREAD_SEARCHING);
- threads[threadID].state = THREAD_AVAILABLE;
+ // If is not the master of this split point then try to
+ // late join some other sp immediately. Otherwise mark thread
+ // as available for future splits.
+ if (ActiveThreads <= 2 || !try_late_join(threadID, waitSp))
+ threads[threadID].state = THREAD_AVAILABLE;
}
// If this thread is the master of a split point and all slaves have
@@ -2705,11 +2775,6 @@ namespace {
if (i == ActiveThreads)
{
- // Because sp->slaves[] is reset under lock protection,
- // be sure sp->lock has been released before to return.
- lock_grab(&(waitSp->lock));
- lock_release(&(waitSp->lock));
-
assert(threads[threadID].state == THREAD_AVAILABLE);
threads[threadID].state = THREAD_SEARCHING;
@@ -2854,15 +2919,24 @@ namespace {
if (ActiveThreads == 2)
return true;
- // Apply the "helpful master" concept if possible. Use localActiveSplitPoints
- // that is known to be > 0, instead of threads[slave].activeSplitPoints that
- // could have been set to 0 by another thread leading to an out of bound access.
- if (SplitPointStack[slave][localActiveSplitPoints - 1].slaves[master])
+ // Helpful master concept.
+ // Check if 'slave'-candidate is an ancestor of master.
+ const SplitPoint* ancestor = &(SplitPointStack[slave][localActiveSplitPoints - 1]);
+ SplitPoint* sp = threads[master].splitPoint;
+ if (HMC_criterion(ancestor, sp))
return true;
return false;
}
+ bool ThreadsManager::HMC_criterion(const SplitPoint *ancestor, const SplitPoint *sp) const {
+
+ for (; sp != NULL; sp = sp->parent)
+ if (sp == ancestor)
+ return true;
+
+ return false;
+ }
// available_thread_exists() tries to find an idle thread which is available as
// a slave for the thread with threadID "master".
@@ -2923,6 +2997,8 @@ namespace {
// Pick the next available split point object from the split point stack
splitPoint = &SplitPointStack[master][threads[master].activeSplitPoints];
+ assert(splitPoint->allowLateJoin == false);
+
// Initialize the split point object
splitPoint->parent = threads[master].splitPoint;
splitPoint->stopRequest = false;
@@ -2976,6 +3052,9 @@ namespace {
threads[i].state = THREAD_WORKISWAITING; // This makes the slave to exit from idle_loop()
}
+ // Allow threads becoming idle to late join
+ splitPoint->allowLateJoin = true;
+
// Everything is set up. The master thread enters the idle loop, from
// which it will instantly launch a search, because its state is
// THREAD_WORKISWAITING. We send the split point as a second parameter to the
diff --git a/src/thread.h b/src/thread.h
index c70800c..8ac1138 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -68,6 +68,7 @@ struct SplitPoint {
volatile Value bestValue;
volatile int moves;
volatile bool stopRequest;
+ volatile bool allowLateJoin;
volatile int slaves[MAX_THREADS];
};
--
1.6.5.1.1367.gcd48