goto thread (split)

sje · Post by **sje** » Fri Aug 02, 2013 3:32 pm

Don wrote:... work he has done with the chess standards of PGN, FEN, EPD...

Well, if I hadn't done those, eventually someone else would have and maybe they would have done a better job. Instead of being thanked, perhaps I should be criticized for not distributing them earlier; nearly all that work had been in existence for seven years or more before they became public. But this was all in the pre-Web days, so I can't be blamed too much.

sje · Post by **sje** » Fri Aug 02, 2013 3:50 pm

Here is some sample code; Symbolic's move maker routine. Notice how null moves are handled without fuss.

Code: Select all

void Position&#58;&#58;Execute&#40;const Move move&#41;
&#123;
  // Sanity test

  if &#40;PfMvExecute&#41;
    if &#40;IsEvilInCheck&#40;))
      Die&#40;"Position&#58;&#58;Execute", "Evil king in check/0");

  const Cabs savecabs = cabs;
  const Sq   saveepsq = epsq;

  // Save the move

  MoveNodePtr movenodeptr = movepool.AllocateNode&#40;);

  movenodeptr->PutMove&#40;move&#41;;

  // Save restorable items

  PSPRNodePtr psprnodeptr = psprpool.AllocateNode&#40;);

  psprnodeptr->PutGood&#40;good&#41;;
  psprnodeptr->PutEvil&#40;evil&#41;;
  psprnodeptr->PutCabs&#40;cabs&#41;;
  psprnodeptr->PutEpsq&#40;epsq&#41;;
  psprnodeptr->PutHmvc&#40;hmvc&#41;;
  psprnodeptr->PutFmvn&#40;fmvn&#41;;
  psprnodeptr->PutInch&#40;inch&#41;;
  psprnodeptr->PutIndc&#40;indc&#41;;
  psprnodeptr->PutPmbb&#40;pmbb&#41;;
  psprnodeptr->PutFmbb&#40;fmbb&#41;;

  psprnodeptr->PutFullHash&#40;fullhash&#41;;
  psprnodeptr->PutMtrlHash&#40;mtrlhash&#41;;
  psprnodeptr->PutPawnHash&#40;pawnhash&#41;;

  // Constant initialization

  const bool notnullflag = move.IsNotNull&#40;);
  const Sq frsq = move.GetFrSq&#40;), tosq = move.GetToSq&#40;);
  const Man frman = move.GetFrMan&#40;), toman = move.GetToMan&#40;);
  const Mc mc = move.GetMc&#40;);

  // Men movement

  if &#40;notnullflag&#41;
  &#123;
    switch &#40;mc&#41;
    &#123;
      case McReg&#58;

        if &#40;IsManNotVacant&#40;toman&#41;)
        &#123;
          Capture&#40;frsq, tosq, good, evil, frman, toman&#41;;
          tracker.TargetCapture&#40;tosq&#41;;
          tracker.SimpleMovement&#40;frsq, tosq&#41;;
        &#125;
        else
        &#123;
          MoveMan&#40;frsq, tosq, good, frman&#41;;
          tracker.SimpleMovement&#40;frsq, tosq&#41;;
        &#125;;
        break;

      case McEPC&#58;
        &#123;
          const Sq vpsq = EPVictimSquare&#40;);

          DelMan&#40;vpsq, evil, SynthPawn&#91;evil&#93;);
          MoveMan&#40;frsq, tosq, good, frman&#41;;
          tracker.TargetCapture&#40;vpsq&#41;;
          tracker.SimpleMovement&#40;frsq, tosq&#41;;
        &#125;;
        break;

      case McCQS&#58;
      case McCKS&#58;
        &#123;
          const Castling castling = CvColorMcToCastling&#91;good&#93;&#91;mc&#93;;
          const Sq rhsq = CvCastlingToRookHomeSq&#91;castling&#93;;
          const Sq rasq = CvCastlingToRookAwaySq&#91;castling&#93;;

          MoveMan&#40;frsq, tosq, good, frman&#41;;
          MoveMan&#40;rhsq, rasq, good, SynthRook&#91;good&#93;);
          tracker.SimpleMovement&#40;frsq, tosq&#41;;
          tracker.SimpleMovement&#40;rhsq, rasq&#41;;
        &#125;;
        break;

      case McPPN&#58;
      case McPPB&#58;
      case McPPR&#58;
      case McPPQ&#58;
        if &#40;IsManNotVacant&#40;toman&#41;)
        &#123;
          DelMan&#40;tosq, evil, toman&#41;;
          tracker.TargetCapture&#40;tosq&#41;;
        &#125;;
        DelMan&#40;frsq, good, frman&#41;;
        AddMan&#40;tosq, good, CvColorMcToMan&#91;good&#93;&#91;mc&#93;);
        tracker.SimpleMovement&#40;frsq, tosq&#41;;
        break;

      default&#58;
        SwitchFault&#40;"Position&#58;&#58;Execute");
        break;
    &#125;;
  &#125;;

  // Scalar adjustment&#58; colors

  FlipColor&#40;good&#41;;
  FlipColor&#40;evil&#41;;

  // Scalar adjustment&#58; castlings

  if &#40;notnullflag&#41;
    if &#40;cabs != 0&#41;
      cabs = &#40;Cabs&#41; (&#40;ui&#41; cabs & ~&#40;CastlingCancel&#91;frsq&#93; | CastlingCancel&#91;tosq&#93;));

  // Scalar adjustment&#58; en passant target

  epsq = SqNil;
  if &#40;notnullflag&#41;
    if &#40;move.IsPawnMove&#40;) && (&#40;frsq ^ tosq&#41; == &#40;FileLen * 2&#41;))
    &#123;
      const Sq candepsq = &#40;Sq&#41; &#40;frsq + CvColorToForwardDelta&#91;evil&#93;);

      if &#40;HasAtLeastOneEPCapture&#40;candepsq&#41;)
        epsq = candepsq;
    &#125;;

  // Scalar adjustment&#58; half move counter

  if &#40;move.IsResetsHmvc&#40;))
    hmvc = 0;
  else
    hmvc++;

  // Scalar adjustment&#58; full move number

  if &#40;IsColorWhite&#40;good&#41;)
    fmvn++;

  // Hash adjustment&#58; castlings

  if &#40;cabs != savecabs&#41;
    for &#40;Castling castling = &#40;Castling&#41; 0; castling < CastlingLen; IncrCastling&#40;castling&#41;)
      if (&#40;cabs & BX&#40;castling&#41;) != &#40;savecabs & BX&#40;castling&#41;))
        fullhash.FoldHashCastling&#40;castling&#41;;

  // Hash adjustment&#58; en passant target

  if &#40;epsq != saveepsq&#41;
  &#123;
    if &#40;IsSqNotNil&#40;saveepsq&#41;) fullhash.FoldHashEpsq&#40;saveepsq&#41;;
    if &#40;IsSqNotNil&#40;epsq&#41;)     fullhash.FoldHashEpsq&#40;epsq&#41;;
  &#125;;

  // Regenerate various status items

  RegenerateStatus&#40;);

  // Sanity test

  if &#40;PfMvExecute&#41;
    if &#40;IsEvilInCheck&#40;))
      Die&#40;"Position&#58;&#58;Execute", "Evil king in check/1");
&#125;

Don · Post by **Don** » Fri Aug 02, 2013 3:57 pm

sje wrote:I am not saying that recursion is bad. Symbolic uses recursion as part of its perft(). But perft() is much different from a regular search as I don't expect a perft() call to be interrupted by a time-out or user intervention. A regular search must be able to stop gracefully with its partial results intact. Throwing an exception or calling longjmp() should be used for a truly exceptional condition, like power failure or processor-on-fire.

A new problem occurs with the introduction of a multithreaded search where the search continues while individual threads must be repeatedly re-run with different data. In addition to the requirement to stop gracefully, there is now the need to share data among the threads. It's tough to do this when a lot of interesting data is hidden on the inaccessible activation frames on different stacks local to each thread. With an old style recursive approach, it can be very hard to pause a thread, save its state, and later resume that thread.

Now, with an non-recursive approach, all of the above problems may be solved without excessive anguish. Also, with the details of a thread's state explicitly available and not hidden, a thread could even be restarted on a different machine with a different architecture as long as the semantics remained constant. Indeed, this is one of my goals with Symbolic; to allow multiple machines on the LAN to contribute to a search in a very integrated, yet portable, manner.

Now this is actually a very compelling argument in favor of your approach - a breath of fresh air to see on this forum so often devoid of real content. I am happy to see there actually is a method to your madness!

General comment:

I have noted over many years of professional experience that if a shop allows coders to use unstructured techniques, then the result is nearly always crappy code. It is also expensive code, for it much more often needs costly maintenance and modification. Yet this is tolerated because that extra expense may be pushed to the next business quarter. Sadly, this has become the American Way.

Don · Post by **Don** » Fri Aug 02, 2013 4:08 pm

sje wrote:
Don wrote:... work he has done with the chess standards of PGN, FEN, EPD...
Well, if I hadn't done those, eventually someone else would have and maybe they would have done a better job. Instead of being thanked, perhaps I should be criticized for not distributing them earlier; nearly all that work had been in existence for seven years or more before they became public. But this was all in the pre-Web days, so I can't be blamed too much.

Nobody is blaming you on this, I think it's a really solid standard. There is nothing above nit-picking but this is close to perfect in my opinion.

There is one issue that I think you (or was it someone else?) got wrong and that is the ep field in FEN - but I think even you admitted that once. The issue is that the spec says to set the field after any double pawn move - even if there is no enemy pawn is poised to capture. That makes many semantically equivalent positions look different and is a real pain to deal with in those cases where you are processing fen records for books usage and such. When I export FEN I just don't set that field and it's still correct fen.

Still, that is a minor annoyance - the standard is close to perfect in my opinion and obviously well thought out and your documentation of it is something that brings sanity to the world of computer chess when so often documentation for things like this is so poorly written.

sje · Post by **sje** » Fri Aug 02, 2013 6:14 pm

Don wrote:
sje wrote:Now, with an non-recursive approach, all of the above problems may be solved without excessive anguish. Also, with the details of a thread's state explicitly available and not hidden, a thread could even be restarted on a different machine with a different architecture as long as the semantics remained constant. Indeed, this is one of my goals with Symbolic; to allow multiple machines on the LAN to contribute to a search in a very integrated, yet portable, manner.
Now this is actually a very compelling argument in favor of your approach - a breath of fresh air to see on this forum so often devoid of real content. I am happy to see there actually is a method to your madness!

Yes; I don't recall anyone having tried a general heterogeneous distributed, parallel sub-tree search. I do remember Jonathan Schaffer's Sun Phoenix program which ran on multiple Sun workstation, but I think that effort was a simple split-at-root approach.

In Symbolic, a sub-tree search state is kept in an instance of the class Branch which contains stuff like the root position, the current position, various search input parameters, and various output parameters (PV, expectation). It also contains a linked list of instances of the PlyRec class, one for each currently open in the search. Each PlyRec instance contains all the state information for a ply including the node processing phase. When the Node() routine is called, it just starts or restarts based entirely on the sub-tree state as given by the Branch instance.

Just try doing that with a traditional recursive search.

An open question is: How fast can a Branch instance be sent on a local LAN? There's a lot of data there, and the total size grows with the depth of the search and also the length of the game as a position also includes all position state history. Perhaps a millisecond or two, rather slower than assigning the instance to a local thread.

There is also the problem of having local machines with vastly varying throughput limits. How does this affect task assignment? What is the overhead of having to schedule distributed tasks. What about collecting the results? Also, should task receiving machines themselves be allowed to delegate non-local sub-tasks? Can I hijack my neighbor's WiFi and machine to assist the effort?

michiguel · Post by **michiguel** » Fri Aug 02, 2013 9:19 pm

sje wrote:
Don wrote:
sje wrote:Now, with an non-recursive approach, all of the above problems may be solved without excessive anguish. Also, with the details of a thread's state explicitly available and not hidden, a thread could even be restarted on a different machine with a different architecture as long as the semantics remained constant. Indeed, this is one of my goals with Symbolic; to allow multiple machines on the LAN to contribute to a search in a very integrated, yet portable, manner.
Now this is actually a very compelling argument in favor of your approach - a breath of fresh air to see on this forum so often devoid of real content. I am happy to see there actually is a method to your madness!
Yes; I don't recall anyone having tried a general heterogeneous distributed, parallel sub-tree search. I do remember Jonathan Schaffer's Sun Phoenix program which ran on multiple Sun workstation, but I think that effort was a simple split-at-root approach.

In Symbolic, a sub-tree search state is kept in an instance of the class Branch which contains stuff like the root position, the current position, various search input parameters, and various output parameters (PV, expectation). It also contains a linked list of instances of the PlyRec class, one for each currently open in the search. Each PlyRec instance contains all the state information for a ply including the node processing phase. When the Node() routine is called, it just starts or restarts based entirely on the sub-tree state as given by the Branch instance.

Just try doing that with a traditional recursive search.

I am not sure why this is a problem. The main difference between a recursion approach and iteration is that in the latter you have your own stack. So, you can do recursion and have your own independent stack too, and no local variables. Or at least, keep local only what you want.

In my SMP approach I have all the information of nodes being analyzed in parallel (people call this split nodes if I am not mistaken) in an "external" type of stack. That is addressed with pointers. It is likely I am missing something, but I do not see the advantage of making my search iterative.

Miguel

An open question is: How fast can a Branch instance be sent on a local LAN? There's a lot of data there, and the total size grows with the depth of the search and also the length of the game as a position also includes all position state history. Perhaps a millisecond or two, rather slower than assigning the instance to a local thread.

There is also the problem of having local machines with vastly varying throughput limits. How does this affect task assignment? What is the overhead of having to schedule distributed tasks. What about collecting the results? Also, should task receiving machines themselves be allowed to delegate non-local sub-tasks? Can I hijack my neighbor's WiFi and machine to assist the effort?

Don · Post by **Don** » Fri Aug 02, 2013 10:29 pm

michiguel wrote:
sje wrote:
Don wrote:
sje wrote:Now, with an non-recursive approach, all of the above problems may be solved without excessive anguish. Also, with the details of a thread's state explicitly available and not hidden, a thread could even be restarted on a different machine with a different architecture as long as the semantics remained constant. Indeed, this is one of my goals with Symbolic; to allow multiple machines on the LAN to contribute to a search in a very integrated, yet portable, manner.
Now this is actually a very compelling argument in favor of your approach - a breath of fresh air to see on this forum so often devoid of real content. I am happy to see there actually is a method to your madness!
Yes; I don't recall anyone having tried a general heterogeneous distributed, parallel sub-tree search. I do remember Jonathan Schaffer's Sun Phoenix program which ran on multiple Sun workstation, but I think that effort was a simple split-at-root approach.

In Symbolic, a sub-tree search state is kept in an instance of the class Branch which contains stuff like the root position, the current position, various search input parameters, and various output parameters (PV, expectation). It also contains a linked list of instances of the PlyRec class, one for each currently open in the search. Each PlyRec instance contains all the state information for a ply including the node processing phase. When the Node() routine is called, it just starts or restarts based entirely on the sub-tree state as given by the Branch instance.

Just try doing that with a traditional recursive search.

I am not sure why this is a problem. The main difference between a recursion approach and iteration is that in the latter you have your own stack. So, you can do recursion and have your own independent stack too, and no local variables. Or at least, keep local only what you want.

It's obviously not an unsolvable problem or else we would not have MP programs.

With this approach (which I have some familiarity with due the palm checkers program I wrote) you can exit a search without unwinding the stack. Just exit the search routine and it's done. Or you can stop the search and return to it a week later (by saving the state on disk.)

That was basically a requirement in the PALM OS, you basically had to leave the application on demand and then present the illusion that it was just sitting there ready to be continued even though the reality is that the programmer had to create that illusion by saving the state on exit and restoring it on startup.

In my SMP approach I have all the information of nodes being analyzed in parallel (people call this split nodes if I am not mistaken) in an "external" type of stack. That is addressed with pointers. It is likely I am missing something, but I do not see the advantage of making my search iterative.

I think there was a time when recursion represented a fairly big loss in performance if the routines being recursed were small but that has not been so for years. But it is possible to do this without it being too ugly.

Miguel

An open question is: How fast can a Branch instance be sent on a local LAN? There's a lot of data there, and the total size grows with the depth of the search and also the length of the game as a position also includes all position state history. Perhaps a millisecond or two, rather slower than assigning the instance to a local thread.

There is also the problem of having local machines with vastly varying throughput limits. How does this affect task assignment? What is the overhead of having to schedule distributed tasks. What about collecting the results? Also, should task receiving machines themselves be allowed to delegate non-local sub-tasks? Can I hijack my neighbor's WiFi and machine to assist the effort?

bob · Post by **bob** » Sat Aug 03, 2013 6:12 am

Don wrote:
sje wrote:I am not saying that recursion is bad. Symbolic uses recursion as part of its perft(). But perft() is much different from a regular search as I don't expect a perft() call to be interrupted by a time-out or user intervention. A regular search must be able to stop gracefully with its partial results intact. Throwing an exception or calling longjmp() should be used for a truly exceptional condition, like power failure or processor-on-fire.

A new problem occurs with the introduction of a multithreaded search where the search continues while individual threads must be repeatedly re-run with different data. In addition to the requirement to stop gracefully, there is now the need to share data among the threads. It's tough to do this when a lot of interesting data is hidden on the inaccessible activation frames on different stacks local to each thread. With an old style recursive approach, it can be very hard to pause a thread, save its state, and later resume that thread.

Now, with an non-recursive approach, all of the above problems may be solved without excessive anguish. Also, with the details of a thread's state explicitly available and not hidden, a thread could even be restarted on a different machine with a different architecture as long as the semantics remained constant. Indeed, this is one of my goals with Symbolic; to allow multiple machines on the LAN to contribute to a search in a very integrated, yet portable, manner.

Now this is actually a very compelling argument in favor of your approach - a breath of fresh air to see on this forum so often devoid of real content. I am happy to see there actually is a method to your madness!

General comment:

I have noted over many years of professional experience that if a shop allows coders to use unstructured techniques, then the result is nearly always crappy code. It is also expensive code, for it much more often needs costly maintenance and modification. Yet this is tolerated because that extra expense may be pushed to the next business quarter. Sadly, this has become the American Way.

Iterated search is a good idea. See DTS. But it does offer drawbacks as well, one of which is less intuitive code (recursion is quite nice).

bob · Post by **bob** » Sat Aug 03, 2013 6:14 am

michiguel wrote:
sje wrote:
Don wrote:
sje wrote:Now, with an non-recursive approach, all of the above problems may be solved without excessive anguish. Also, with the details of a thread's state explicitly available and not hidden, a thread could even be restarted on a different machine with a different architecture as long as the semantics remained constant. Indeed, this is one of my goals with Symbolic; to allow multiple machines on the LAN to contribute to a search in a very integrated, yet portable, manner.
Now this is actually a very compelling argument in favor of your approach - a breath of fresh air to see on this forum so often devoid of real content. I am happy to see there actually is a method to your madness!
Yes; I don't recall anyone having tried a general heterogeneous distributed, parallel sub-tree search. I do remember Jonathan Schaffer's Sun Phoenix program which ran on multiple Sun workstation, but I think that effort was a simple split-at-root approach.

In Symbolic, a sub-tree search state is kept in an instance of the class Branch which contains stuff like the root position, the current position, various search input parameters, and various output parameters (PV, expectation). It also contains a linked list of instances of the PlyRec class, one for each currently open in the search. Each PlyRec instance contains all the state information for a ply including the node processing phase. When the Node() routine is called, it just starts or restarts based entirely on the sub-tree state as given by the Branch instance.

Just try doing that with a traditional recursive search.

I am not sure why this is a problem. The main difference between a recursion approach and iteration is that in the latter you have your own stack. So, you can do recursion and have your own independent stack too, and no local variables. Or at least, keep local only what you want.

In my SMP approach I have all the information of nodes being analyzed in parallel (people call this split nodes if I am not mistaken) in an "external" type of stack. That is addressed with pointers. It is likely I am missing something, but I do not see the advantage of making my search iterative.

Miguel

An open question is: How fast can a Branch instance be sent on a local LAN? There's a lot of data there, and the total size grows with the depth of the search and also the length of the game as a position also includes all position state history. Perhaps a millisecond or two, rather slower than assigning the instance to a local thread.

There is also the problem of having local machines with vastly varying throughput limits. How does this affect task assignment? What is the overhead of having to schedule distributed tasks. What about collecting the results? Also, should task receiving machines themselves be allowed to delegate non-local sub-tasks? Can I hijack my neighbor's WiFi and machine to assist the effort?

The advantage is this: When you choose to do a split, you can see the state of ALL parts of the tree being searched. You can pick any node in any active path and split there instantly, without having to abort or anything. But the code is not as clean as a recursive search, which is why I have (so far) elected to stick with recursive. With larger numbers of processors, seeing everything is probably going to tip the balance back toward iterated search. But I hate to give up the cleanliness of a recursive search.

sje · Post by **sje** » Sat Aug 03, 2013 7:11 am

bob wrote:Iterated search is a good idea. See DTS. But it does offer drawbacks as well, one of which is less intuitive code (recursion is quite nice).

You're right: the first time I did one of these, yes, it was counter intuitive to an extent. The phase (case body of the big switch) which moved down one ply and the corresponding phase which handled the result from a lower ply were both tricky to write. Now that I've done it, it seems easy!

To help with the design, I decided to let the "caller" (less deeper ply in a transition) do all the work of executing the move, setting up input parameters, etc. for the "callee" (more deeper ply in a transition), and later recovering output parameter values. Only within the "caller" phase does the code access anything other than the current ply record.

Hint: It can be helpful to first write a non-recursive perft().

Hint: Doing IID or the like means adding an extra phase pair (down and up) corresponding to each recursive call in a recursive design.

Here is a recent version of Symbolic's PlyRec class specification. Note the member variables: constants, inputs, outputs, and working storage. That's everything needed to contain the state fof the Node() routine at any ply.

Code: Select all

class PlyRec
&#123;
public&#58;
  PlyRec&#40;const ui plyvalue, BranchPtr branchptrvalue&#41;;
  ~PlyRec&#40;void&#41; &#123;&#125;

  void Reset&#40;void&#41;;

  void Generate&#40;void&#41;;
  void GenerateCanonical&#40;void&#41;;
  void GenerateChecks&#40;void&#41;;
  void GenerateGainers&#40;void&#41;;
  
  ui        conply;      // Constant&#58; Ply
  bool      isply0;      // Constant&#58; True if at ply zero &#40;root&#41;
  bool      isply1;      // Constant&#58; True if at ply one
  bool      isp0good;    // Constant&#58; True if ply zero mover is active
  bool      isp0evil;    // Constant&#58; True if ply zero mover is not active

  Phase     phase;       // Input parameter&#58; Initial phase &#40;mutable&#41;
  Window    basewindow;  // Input parameter&#58; A/B window &#40;constant&#41;
  si        depth;       // Input parameter&#58; Remaining search depth &#40;constant&#41;

  MoveList  PV;          // Output parameter&#58; Predicted variation
  SV        expectation; // Output parameter&#58; Predicted expectation

  NodeKind  nodekind;    // Directs move generation and scanning
  Window    window;      // Mutable scoring window
  GMVec     gmvec;       // Move generation vector
  Move      move;        // Current move of interest
  ui        trycount;    // Count of moves tried so far
  NC        nodecount;   // Cumulative number of nodes

private&#58;
  BranchPtr branchptr;   // Constant&#58; Pointer to the containing Branch instance
&#125;;

And here's the current list of the Node() phase names (needs comments):

Code: Select all

typedef enum
&#123;
  PhaseNil = -1,
  PhaseNodeEnter,
  PhaseNodeDetermineKind,
  PhaseNodeGenerate,
  PhaseNodeTryNext,
  PhaseNodePostRetract,
  PhaseNodePostScan,
  PhaseNodeExit,
  PhaseSearchEnter,
  PhaseSearchExit,
  PhaseSwitchExit
&#125; Phase;

goto thread (split)

Re: Some statistics about promotions and underpromotions.

Sample code

Re: Bull

Re: Some statistics about promotions and underpromotions.

Re: Bull

Re: Bull

Re: Bull

Re: Bull

Re: Bull

Re: Bull