Strange results when TT's and quiescence are combined??

Bombax · Post by **Bombax** » Fri Mar 29, 2013 6:42 pm

Hi all.

We're having some problems with a chess program we're developing. The program right now uses negascout (fail-soft) with PV move ordering, iterative deepening and an always-replace transposition table. It also uses SEE-ordered quiescence search (fail-hard).

The problem occurs when both quiescence search and transposition tables are activated. In this case the program produces moves/scores that seem "off". The test position at the moment is this one:

FEN: r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 1

The program plays the same move (Bxa6) but has a worse score with both activated (-226) than with transposition tables off (732) or both off (1034), and plays worse moves after this one. We're wondering if we've made an implementation mistake.

I have taken the time to produce a clean pseudo-code version of our program below. I would be delighted if anyone could explain what we might be doing wrong, or what some good steps to debug the program is, or test positions to debug, and so on, so we can proceed. Thanks!

Code: Select all

int maxd;

tt_replace&#40;u64 zobrist, int score, int scoretype, int depth&#41;;

void start&#40;Position pos, int d&#41; &#123;
    for &#40;maxd=1; maxd<d; maxd++)
        negascout_best&#40;pos, 0&#41;;
&#125;

int negascout_best&#40;Position pos, int d&#41; &#123;
    if &#40;fifty_move_rule&#40;pos&#41;) return 0;
    if &#40;repetition_rule&#40;pos&#41;) return 0;
    
    if &#40;tt_contains_entry&#40;pos.zobrist&#41;) &#123;
        entry = tt_get_entry&#40;pos.zobrist&#41;;
        if &#40;entry.depth >= maxd-d && is_exact_score&#40;entry&#41;) &#123;
            return entry.score;
        &#125;
    &#125;

    if &#40;d == maxd&#41; &#123;
        score = quiescence&#40;pos, 0, -INF_SCORE, INF_SCORE&#41;;
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, 0&#41;;
        return score;
    &#125;

    b = INF_SCORE; maxscore = -INF_SCORE;
    children = get_children_pv&#40;pos&#41;; // ordered using the pv
    for &#40;all children&#41; &#123;
        if &#40;child_is_pv&#40;child&#41;)
            score = -negascout_best&#40;child, d+1&#41;;
        else
            score = -negascout&#40;child, d+1, -b, -maxscore&#41;;
        if &#40;score > maxscore&#41; &#123; // score improved
            if &#40;b == INF_SCORE || d >= maxd-2&#41; &#123; // acc. to negascout paper score is always correct up to this depth
                maxscore = score;
            &#125; else &#123; // re-search
                maxscore = -negascout&#40;child, d+1, -INF_SCORE, -score&#41;;
            &#125;
            savepv&#40;child&#41;;
        &#125;
        b = maxscore+1; // set null window
    &#125;

    if &#40;had_children&#41; &#123;
        tt_replace&#40;pos.zobrist, maxscore, EXACT_SCORE, maxd-d&#41;;
        return maxscore;
    &#125; else &#123;
        if &#40;checkmate&#40;pos&#41;) &#123;
            tt_replace&#40;pos.zobrist, -CHECKMATE_SCORE, EXACT_SCORE, maxd-d&#41;;
            return -CHECKMATE_SCORE;
        &#125; else &#123; // stalemate
            tt_replace&#40;pos.zobrist, 0, EXACT_SCORE, maxd-d&#41;;
            return 0;
        &#125;
    &#125;
&#125;

int negascout&#40;Position pos, int d, int alpha, int beta&#41; &#123;
    if &#40;fifty_move_rule&#40;pos&#41;) return 0;
    if &#40;repetition_rule&#40;pos&#41;) return 0;
    
    if &#40;tt_contains_entry&#40;pos.zobrist&#41;) &#123;
        entry = tt_get_entry&#40;pos.zobrist&#41;;
        if &#40;entry.depth >= maxd-d&#41; &#123;
            if &#40;is_exact_score&#40;entry&#41;)
                return entry.score;
            else if &#40;is_lower_bound&#40;entry&#41; && entry.score >= beta&#41;
                return entry.score;
            else if &#40;is_upper_bound&#40;entry&#41; && entry.score <= alpha&#41;
                return entry.score;
        &#125;
    &#125;

    if &#40;d == maxd&#41; &#123;
        score = quiescence&#40;pos, 0, alpha, beta&#41;;
        tt_replace&#40;pos.zobrist, score, get_score_type&#40;score, alpha, beta&#41;, 0&#41;;
        return score;
    &#125;

    b = beta; maxscore = -INF_SCORE; a = alpha;
    children = get_children&#40;pos&#41;;
    for &#40;all children&#41; &#123;
        score = -negascout&#40;child, d+1, -b, -a&#41;;
        if &#40;score > maxscore&#41; &#123; // score improved
            if &#40;b == beta || d >= maxd-2&#41; &#123; // acc. to negascout paper score is always correct up to this depth
                maxscore = score;
            &#125; else &#123; // re-search
                maxscore = -negascout&#40;child, d+1, -beta, -score&#41;;
            &#125;
            savepv&#40;child&#41;;
        &#125;
        if &#40;maxscore >= beta&#41; &#123;
            tt_replace&#40;pos.zobrist, maxscore, LOWER_BOUND&#41;
            return maxscore;
        &#125;
        a = max&#40;maxscore, alpha&#41;;
        b = a+1; // set null window
    &#125;

    if &#40;had_children&#41; &#123;
        tt_replace&#40;pos.zobrist, maxscore, get_score_type&#40;maxscore, alpha, beta&#41;, maxd-d&#41;;
        return maxscore;
    &#125; else &#123;
        if &#40;checkmate&#40;pos&#41;) &#123;
            tt_replace&#40;pos.zobrist, -CHECKMATE_SCORE, get_score_type&#40;-CHECKMATE_SCORE, alpha, beta&#41;, maxd-d&#41;;
            return -CHECKMATE_SCORE;
        &#125; else &#123; // stalemate
            tt_replace&#40;pos.zobrist, 0, get_score_type&#40;0, alpha, beta&#41;, maxd-d&#41;;
            return 0;
        &#125;
    &#125;
&#125;

int quiescence&#40;Position pos, int depth, int alpha, int beta&#41; &#123;
    stand_pat = pos.to_move * evaluate&#40;pos&#41;;

    if &#40;stand_pat >= beta&#41; &#123;
        return beta;
    &#125;
    if &#40;stand_pat > alpha&#41; &#123;
        alpha = stand_pat;
    &#125;

    children = get_dramatic_moves&#40;pos&#41;; // gets captures whose SEE value is larger than 0 &#40;and ordered after them&#41;
    for &#40;all children&#41; &#123;
        score = -quiescence&#40;child, depth+1, -beta, -alpha&#41;;
        if &#40;score >= beta&#41; &#123;
            return beta;
        &#125;
        if &#40;score > alpha&#41; &#123;
            alpha = score;
        &#125;
    &#125;
    return alpha;
&#125;

hgm · Post by **hgm** » Fri Mar 29, 2013 7:34 pm

If you want to get anywhere developing a Chess program, you must develop a method to debug behavior like this. It seems your scores are totally off, if the numbers you quote are centi-Pawns.

My favorite method is to put a conditional print statement directly after the return of the recursive call, where I print ply:noeDepth:iidDepth (for identification), move, score and best score so far. (sometimes also alpha and beta). The condition is if(PATH), where I #define PATH to trigger only in a specific node, or the path to it. E.g.

#define PATH ply < 2

would print only in the root, and

#define PATH ply < 2 || path[1] == "e2e4" && (ply < 3 || path[2] == "e7e5")

to print everthing in the node after e2e4, e7e6. If you end on a TT hit, you can put an if(PATH) at the probe code to print the hash key, and at the store code you can put a printf that prints only when a certain key is is hashed, printing ply and the path[] to it, so that you can figure out where the score came from.

For reproducible errors you would always be able to find them this way. Even if they occur at 23 ply.

Bombax · Post by **Bombax** » Fri Mar 29, 2013 9:00 pm

I should clarify, the scores are in millipawns. The big problem is that they produce weird moves, however.

I've tried debugging with a lot of different methods but maybe I'll try yours.

Codesquid · Post by **Codesquid** » Fri Mar 29, 2013 9:10 pm

Could the lack of check(mate) handling in your qsearch cause this?

Chan Rasjid · Post by **Chan Rasjid** » Fri Mar 29, 2013 10:14 pm

Hello,

You're giving us an impossible task. We can only tell that your program is acting strange- which means bugs.

You already are implementing a fairly complex engine. Usually there is no need for 2 full-width search until bugs have been cleared; but you seem to already have a specialized one for searching pv-nodes - this means added complexity. With just 1 full-width search, a program should still be able play at about the same strength. Then there is researching in negascout; it is a fairly "advanced" topic. If your search accepts results from a research and they are wrong, your engine is going to play strange. There is also search instability when you do re-search. With so much interactions, creating pseudo codes to represent your actual codes is not useful.

I notice certain things that cause doubts:
1) You do fail-soft but not within QS. Most would be failing soft everywhere. The only time care is needed is when we need to return an artificial score outside of bounds and the score is not a returned value from "score = -search()" when we must only fail hard; or we really know what we are returning.
2) My guess is your trouble is most likely from TT-hashing - bugs in storing hash scores.
3) re-search + pv-node-search + fail-soft + TT means a very volatile environment to hit bugs.

EDIT: the fact that only turning hashing on causes the strange behavior means it is in the hashing!

Best Regards,
Rasjid.

Edsel Apostol · Post by **Edsel Apostol** » Sun Mar 31, 2013 3:50 pm

Code: Select all

if &#40;d == maxd&#41; &#123; 
        score = quiescence&#40;pos, 0, -INF_SCORE, INF_SCORE&#41;; 
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, 0&#41;; 
        return score; 
    &#125;

This seems not correct. I don't know the details of your implementation of "tt_replace" but my guess from your pseudo-code is that it should be like this:

Code: Select all

if &#40;d == maxd&#41; &#123; 
        score = quiescence&#40;pos, d, -INF_SCORE, INF_SCORE&#41;; 
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, maxd-d&#41;; 
        return score; 
    &#125;

Take note of the "depth" parameters in "quiescence" and "tt_replace".

Sven · Post by **Sven** » Sun Mar 31, 2013 8:03 pm

Edsel Apostol wrote:
Code: Select all
if &#40;d == maxd&#41; &#123; 
        score = quiescence&#40;pos, 0, -INF_SCORE, INF_SCORE&#41;; 
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, 0&#41;; 
        return score; 
    &#125; 
This seems not correct. I don't know the details of your implementation of "tt_replace" but my guess from your pseudo-code is that it should be like this:
Code: Select all
if &#40;d == maxd&#41; &#123; 
        score = quiescence&#40;pos, d, -INF_SCORE, INF_SCORE&#41;; 
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, maxd-d&#41;; 
        return score; 
    &#125; 
Take note of the "depth" parameters in "quiescence" and "tt_replace".

These two versions are in fact identical since "d == maxd" is equivalent to "maxd-d == 0" and, according to the pseudocode, the "quiescence()" function does not use its "depth" parameter at all so it does not matter whether you pass 0 or something else to it. The version you propose would be more consistent to the other code but I don't see how the original version is incorrect.

Sven

Edsel Apostol · Post by **Edsel Apostol** » Mon Apr 01, 2013 1:40 am

Sven Schüle wrote:
Edsel Apostol wrote:
Code: Select all
if &#40;d == maxd&#41; &#123; 
        score = quiescence&#40;pos, 0, -INF_SCORE, INF_SCORE&#41;; 
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, 0&#41;; 
        return score; 
    &#125; 
This seems not correct. I don't know the details of your implementation of "tt_replace" but my guess from your pseudo-code is that it should be like this:
Code: Select all
if &#40;d == maxd&#41; &#123; 
        score = quiescence&#40;pos, d, -INF_SCORE, INF_SCORE&#41;; 
        tt_replace&#40;pos.zobrist, score, EXACT_SCORE, maxd-d&#41;; 
        return score; 
    &#125; 
Take note of the "depth" parameters in "quiescence" and "tt_replace".
These two versions are in fact identical since "d == maxd" is equivalent to "maxd-d == 0" and, according to the pseudocode, the "quiescence()" function does not use its "depth" parameter at all so it does not matter whether you pass 0 or something else to it. The version you propose would be more consistent to the other code but I don't see how the original version is incorrect.

Sven

You are correct, they should be identical. I've got confused with the "depth" convention used different than what I'm used to.

Anyways some observation regarding the pseudo-code:
-it only stores the result of the first call to the "quiescence". Most engines I'm aware of that uses hashing in quiescence search stores all the results in the hash table.
-there are some potential for search instabilities in the implementation of nega-scout, especially in this block of code:

Code: Select all

        if &#40;score > maxscore&#41; &#123; // score improved 
            if &#40;b == INF_SCORE || d >= maxd-2&#41; &#123; // acc. to negascout paper score is always correct up to this depth 
                maxscore = score; 
            &#125; else &#123; // re-search 
                maxscore = -negascout&#40;child, d+1, -INF_SCORE, -score&#41;; 
            &#125; 
            savepv&#40;child&#41;; 
        &#125;

The re-search has the tendency to fail-low, and it should not be used to update "maxscore".

I guess the main problem here is search instability which in turn saves the wrong search scores to the hash table.

I would suggest a more stable PVS algorithm using zero-window on all non-PV nodes: http://chessprogramming.wikispaces.com/ ... ion+Search

Strange results when TT's and quiescence are combined??

Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??

Re: Strange results when TT's and quiescence are combined??