python script to optimize bin books using https://chessdb.cn/queryc_en/

Discussion of chess software programming and technical issues.

Moderator: Ras

Jonathan003
Posts: 243
Joined: Fri Jul 06, 2018 4:23 pm
Full name: Jonathan Cremers

python script to optimize bin books using https://chessdb.cn/queryc_en/

Post by Jonathan003 »

I try to make a python script to optimize bin books.
But it doesn't work and I'm stuck.
I hope someone can help to get the script fixed.
(Don't copy the text from the python script because for some reason the script will not run. Just enlarge and download the entire python script. Do the same for the Requirements Overview md).

This is the script ChessDBCNBookOptimizer.py:


And this is the Requirements Overview:


And here is a small bin book "gm2600.bin" that comes with Scid that can be used to test the script:
User avatar
Jim Ablett
Posts: 2077
Joined: Fri Jul 14, 2006 7:56 am
Location: London, England
Full name: Jim Ablett

Re: python script to optimize bin books using https://chessdb.cn/queryc_en/

Post by Jim Ablett »

Hi Jonathan,

try this revised script >

Code: Select all

import os, sys, subprocess, logging, struct, time, requests, chess, chess.polyglot
from urllib.parse import quote
from collections import defaultdict, deque
from typing import Dict, Set, Optional

def install_deps():
    for pkg in ['requests', 'chess']:
        try: __import__(pkg)
        except ImportError: subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg])

install_deps()

class PolyglotWriter:
    def __init__(self, file_path): self.file_path, self.entries = file_path, []
    def __enter__(self): return self
    def __exit__(self, *args):
        self.entries.sort(key=lambda x: x[0])
        with open(self.file_path, "wb") as f:
            for entry in self.entries: f.write(entry[1])
    def append(self, key, move, weight):
        move_int = (move.from_square << 6) | move.to_square
        if move.promotion: move_int |= (move.promotion - 1) << 12
        entry = struct.pack('>QHHI', key, move_int, max(0, min(weight, 65535)), 0)
        self.entries.append((key, entry))

def setup_logs(output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for h in logging.root.handlers[:]: logging.root.removeHandler(h)
    logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s",
        handlers=[logging.FileHandler(os.path.join(output_dir, "manual_review.log"), mode='w'),
                 logging.StreamHandler(sys.stdout)])
    star_logger = logging.getLogger("star_moves")
    star_logger.handlers.clear()
    star_handler = logging.FileHandler(os.path.join(output_dir, "star_moves.log"), mode='w')
    star_handler.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
    star_logger.addHandler(star_handler)
    star_logger.propagate = False
    return star_logger

def query_chessdbcn(fen):
    try:
        response = requests.get(f"http://www.chessdb.cn/chessdb.php?action=querybest&board={quote(fen)}", timeout=10)
        if response.status_code != 200: return {}
        moves = {}
        for line in response.text.strip().splitlines():
            if line.startswith("move:"):
                try:
                    parts = line.split(',')
                    move_uci = parts[0].split(':')[1].strip()
                    score = int(parts[1].split(':')[1]) if len(parts) > 1 and ':' in parts[1] else 0
                    symbol = parts[2].split(':')[1].strip() if len(parts) > 2 and ':' in parts[2] else ""
                    moves[move_uci] = {"score": score, "symbol": symbol}
                except: continue
        return moves
    except: return {}

def build_fen_map(entries_dict: Dict[int, list], max_depth: int = 50) -> Dict[int, str]:
    """
    Build FEN mappings using improved breadth-first search with optimizations.
    
    Args:
        entries_dict: Dictionary mapping zobrist keys to position entries
        max_depth: Maximum search depth to prevent infinite loops
        
    Returns:
        Dictionary mapping zobrist keys to FEN strings
    """
    fens = {}
    try:
        start_board = chess.Board()
        start_key = chess.polyglot.zobrist_hash(start_board)
        fens[start_key] = start_board.fen()
        
        # Use a queue with depth tracking to prevent infinite expansion
        queue = deque([(start_board, 0)])  # (board, depth)
        processed_keys = {start_key}
        
        # Track progress for large books
        total_positions = len(entries_dict)
        mapped_count = 1
        
        while queue:
            current_board, depth = queue.popleft()
            
            # Prevent excessive depth
            if depth >= max_depth:
                continue
                
            current_key = chess.polyglot.zobrist_hash(current_board)
            current_entries = entries_dict.get(current_key, [])
            
            # Process all valid moves from current position
            valid_moves_processed = 0
            for entry in current_entries:
                try:
                    # Validate move is legal
                    if entry.move not in current_board.legal_moves:
                        continue
                        
                    # Create new board state
                    new_board = current_board.copy()
                    new_board.push(entry.move)
                    child_key = chess.polyglot.zobrist_hash(new_board)
                    
                    # Store FEN if not already mapped
                    if child_key not in fens:
                        fens[child_key] = new_board.fen()
                        mapped_count += 1
                        
                        # Progress tracking for large books
                        if mapped_count % 1000 == 0:
                            coverage = (mapped_count / total_positions) * 100
                            print(f"FEN mapping progress: {mapped_count}/{total_positions} ({coverage:.1f}%)")
                    
                    # Add to queue if position exists in book and not processed
                    if (child_key in entries_dict and 
                        child_key not in processed_keys and
                        depth < max_depth - 1):
                        processed_keys.add(child_key)
                        queue.append((new_board, depth + 1))
                        valid_moves_processed += 1
                        
                except (ValueError, IndexError) as e:
                    # Skip invalid moves/positions
                    logging.warning(f"Skipped invalid move at depth {depth}: {e}")
                    continue
            
            # Early termination if no valid moves processed
            if valid_moves_processed == 0 and depth > 0:
                continue
                
    except Exception as e:
        logging.error(f"Error in FEN mapping: {e}")
        # Return partial results if error occurs
        
    # Final statistics
    coverage_rate = (len(fens) / len(entries_dict)) * 100 if entries_dict else 0
    logging.info(f"FEN mapping complete: {len(fens)}/{len(entries_dict)} positions ({coverage_rate:.1f}% coverage)")
    
    return fens

def validate_fen_mapping(fens: Dict[int, str], entries_dict: Dict[int, list]) -> Dict[str, int]:
    """
    Validate FEN mappings and return statistics.
    
    Returns:
        Dictionary with validation statistics
    """
    stats = {
        'total_positions': len(entries_dict),
        'mapped_positions': len(fens),
        'unmapped_positions': len(entries_dict) - len(fens),
        'invalid_fens': 0
    }
    
    # Check for invalid FENs
    for key, fen in fens.items():
        try:
            chess.Board(fen)
        except ValueError:
            stats['invalid_fens'] += 1
            logging.warning(f"Invalid FEN detected: {fen}")
    
    return stats

def optimize_book(input_path, output_dir):
    setup_logs(output_dir)
    star_logger = logging.getLogger("star_moves")
    output_path = os.path.join(output_dir, "optimized_book.bin")
    
    # Test API
    try:
        response = requests.get("http://www.chessdb.cn/chessdb.php?action=querybest&board=rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR%20w%20KQkq%20-%200%201", timeout=5)
        if response.status_code != 200: raise Exception("API unreachable")
    except: 
        print("ChessDBCN API connection failed.")
        sys.exit(1)
    
    # Load entries
    print("Loading entries...")
    entries = defaultdict(list)
    try:
        with chess.polyglot.open_reader(input_path) as reader:
            for entry in reader: entries[entry.key].append(entry)
    except:
        print("Failed to read input file.")
        sys.exit(1)
    
    print(f"Loaded {sum(len(v) for v in entries.values())} entries across {len(entries)} positions")
    
    # Build FEN mappings with improved algorithm
    print("Building position mappings...")
    fens = build_fen_map(entries)
    
    # Validate mappings
    validation_stats = validate_fen_mapping(fens, entries)
    print(f"FEN mapping results:")
    print(f"  - Mapped: {validation_stats['mapped_positions']}/{validation_stats['total_positions']} positions")
    print(f"  - Coverage: {(validation_stats['mapped_positions']/validation_stats['total_positions']*100):.1f}%")
    print(f"  - Invalid FENs: {validation_stats['invalid_fens']}")
    
    # Process positions
    processed = adjusted = 0
    start_time = time.time()
    total_positions = len(entries)
    
    with PolyglotWriter(output_path) as writer:
        for key, position_entries in entries.items():
            processed += 1
            
            # Real-time progress logging
            if processed % 50 == 0:
                elapsed = time.time() - start_time
                percent = (processed / total_positions) * 100
                rate = processed / elapsed if elapsed > 0 else 0
                eta = (total_positions - processed) / rate if rate > 0 else 0
                print(f"Progress: {processed}/{total_positions} ({percent:.1f}%) - {rate:.1f} pos/s - ETA: {eta:.0f}s")
            
            fen = fens.get(key)
            if not fen:
                for entry in position_entries: writer.append(key, entry.move, entry.weight)
                continue
            
            chessdb_moves = query_chessdbcn(fen)
            if not chessdb_moves:
                for entry in position_entries: writer.append(key, entry.move, entry.weight)
                continue
            
            # Get bin moves and check for good moves
            bin_moves = {entry.move.uci() for entry in position_entries}
            good_moves = [m for m in bin_moves if chessdb_moves.get(m, {}).get("symbol") in ["!", "*"]]
            
            # If no moves in the bin book have "!" or "*", do not make adjustments
            if not good_moves:
                logging.info(f"Manual review - Position: {fen}, No good moves in bin book")
                for entry in position_entries: writer.append(key, entry.move, entry.weight)
                continue
            
            # Check for questionable top moves
            max_weight = max(entry.weight for entry in position_entries)
            top_moves = [entry.move.uci() for entry in position_entries if entry.weight == max_weight]
            questionable_tops = [m for m in top_moves if chessdb_moves.get(m, {}).get("symbol") == "?"]
            
            if questionable_tops:
                # Sort good moves by priority: "!" first, then by score
                best_moves = sorted(good_moves, 
                    key=lambda m: (chessdb_moves[m]["symbol"] == "!", chessdb_moves[m]["score"]), reverse=True)
                
                # Check for star moves usage
                star_moves_used = [m for m in best_moves if chessdb_moves[m]["symbol"] == "*"]
                exclamation_moves_available = [m for m in best_moves if chessdb_moves[m]["symbol"] == "!"]
                
                # Adjust weights
                for entry in position_entries:
                    move_uci = entry.move.uci()
                    if move_uci in best_moves:
                        writer.append(key, entry.move, max_weight + 1)
                    else:
                        writer.append(key, entry.move, max(1, entry.weight // 2))
                
                # Log if star moves were used instead of exclamation moves
                if star_moves_used:
                    if exclamation_moves_available:
                        star_logger.info(f"Position: {fen}, Used '*' moves: {star_moves_used} (when '!' moves available: {exclamation_moves_available})")
                    else:
                        star_logger.info(f"Position: {fen}, Used '*' moves: {star_moves_used}")
                
                adjusted += 1
                print(f"Adjusted position {adjusted}: {fen[:50]}...")
            else:
                for entry in position_entries: writer.append(key, entry.move, entry.weight)
    
    elapsed = time.time() - start_time
    success_rate = (len(fens) / len(entries)) * 100 if entries else 0
    
    print(f"\nOptimization complete!")
    print(f"Processed: {processed} positions")
    print(f"Adjusted: {adjusted} positions")
    print(f"Time: {elapsed:.1f} seconds")
    print(f"FEN success rate: {success_rate:.1f}%")
    logging.info(f"Optimization complete: {processed} positions ({adjusted} adjusted) in {elapsed:.1f}s")
    
    return output_path

def main():
    print("ChessDBCN Book Optimizer - Optimizes chess opening books using ChessDBCN rankings.")
    input_path = input("Enter input bin book path: ").strip().strip('"')
    output_dir = input("Enter output directory: ").strip().strip('"')
    
    os.makedirs(output_dir, exist_ok=True)
    
    try:
        result = optimize_book(input_path, output_dir)
        print(f"Success! Output: {result}")
        print("Files created: optimized_book.bin, manual_review.log, star_moves.log")
    except Exception as e:
        print(f"Error: {e}")
        sys.exit(1)

if __name__ == "__main__": main()
Jim.