#!/usr/bin/env python3
"""
Wrap PGN movetext to lines of at most N columns (default 79),
preserving tag pairs and basic PGN structure.
- Tag-pair lines like [Event "..."] are passed through unchanged.
- Movetext is reflowed by whitespace into lines ≤ width.
- Brace comments {...} are kept intact as single tokens.
- Game boundaries (blank line or next tag) are preserved.
Usage:
python3 wrap_pgn.py input.pgn output.pgn --width 79
"""
from __future__ import annotations
import argparse
import sys
from typing import List, Iterable
RESULT_TOKENS = {"1-0", "0-1", "1/2-1/2", "*"}
def tokenize_movetext(s: str) -> List[str]:
"""
Tokenize movetext into whitespace-separated tokens, but keep {...}
comments as single tokens (which may contain spaces). Parentheses
for variations are left as normal tokens; their contents can wrap.
"""
tokens: List[str] = []
i, n = 0, len(s)
while i < n:
# Skip whitespace
if s[i].isspace():
i += 1
continue
# Brace comment as single token
if s[i] == '{':
j = i + 1
depth = 1
# Capture until the matching '}' (no nesting per PGN, but be defensive)
while j < n and depth > 0:
if s[j] == '{':
depth += 1
elif s[j] == '}':
depth -= 1
j += 1
tokens.append(s[i:j]) # includes closing brace (or EOF)
i = j
continue
# Regular token: read until next whitespace
j = i + 1
while j < n and not s[j].isspace():
j += 1
tokens.append(s[i:j])
i = j
return tokens
def wrap_tokens(tokens: Iterable[str], width: int) -> str:
"""
Greedy wrap of tokens into lines of at most `width` chars.
Does not split tokens; if a single token is longer than width,
it will be placed on its own line.
"""
lines: List[str] = []
current: List[str] = []
cur_len = 0
def flush_line():
nonlocal current, cur_len
if current:
lines.append(" ".join(current))
current = []
cur_len = 0
for tok in tokens:
tok_len = len(tok)
if not current:
current.append(tok)
cur_len = tok_len
continue
# +1 for the space if we add tok
if cur_len + 1 + tok_len <= width:
current.append(tok)
cur_len += 1 + tok_len
else:
flush_line()
current.append(tok)
cur_len = tok_len
flush_line()
return "\n".join(lines)
def process_pgn(stream: Iterable[str], width: int) -> Iterable[str]:
"""
Read PGN lines, emit wrapped output lines (as an iterator of strings with newlines).
"""
state = "tags" # "tags" or "moves"
movelines: List[str] = []
def flush_moves():
nonlocal movelines
if not movelines:
return []
# Join movetext lines with single spaces, then tokenize & wrap
joined = " ".join(line.strip() for line in movelines if line.strip() != "")
tokens = tokenize_movetext(joined)
wrapped = wrap_tokens(tokens, width)
movelines = []
# Ensure trailing newline
return [wrapped + "\n"]
for raw in stream:
line = raw.rstrip("\n")
# Detect start of a tag-pair line
is_tag = line.startswith("[") and "]" in line
if state == "tags":
if is_tag or line.strip() == "":
# Pass through tag lines and blank lines during tag state
yield raw
# When we encounter the blank line separating tags and movetext,
# switch to movetext on the next non-empty, non-tag line.
if line.strip() == "":
state = "moves" # expect movetext next
else:
# Some PGNs omit the blank line; treat this as movetext
state = "moves"
movelines.append(line)
else: # state == "moves"
if is_tag:
# Next game begins; flush current movetext, add a blank line,
# then output the tag line and switch to tags.
for out in flush_moves():
yield out
# Ensure a blank line between games if not already present
yield "\n"
yield raw
state = "tags"
elif line.strip() == "":
# End of movetext for this game
for out in flush_moves():
yield out
yield raw # preserve the blank line
state = "tags"
else:
movelines.append(line)
# End of file: flush any pending movetext
if state == "moves" and movelines:
for out in flush_moves():
yield out
def main() -> int:
ap = argparse.ArgumentParser(description="Wrap PGN movetext to lines of at most N columns.")
ap.add_argument("input", help="Input PGN file")
ap.add_argument("output", help="Output PGN file")
ap.add_argument("--width", type=int, default=79, help="Maximum line width for movetext (default: 79)")
args = ap.parse_args()
try:
with open(args.input, "r", encoding="utf-8") as f_in, \
open(args.output, "w", encoding="utf-8", newline="\n") as f_out:
for out_line in process_pgn(f_in, args.width):
f_out.write(out_line)
except FileNotFoundError as e:
print(f"Error: {e}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
sys.exit(main())
It (intentionally) leaves comments intact on the same line, but I guess that is fine (and perhaps splitting them causes problems).
OliverBr wrote: ↑Sun Oct 05, 2025 3:38 am
This raises actually another question: Why I am the only one who actually cares since the release of 4.9.1?
I have xboard-4.9.1 on my PC, and I can load and view the PGNs you linked to just fine (even before running it through the script).
I just recompiled it directly from source, and the PGNs still work fine.
(I had to make "ics_type" in backend.h and "currPvInfo" in evalgraph.h extern to make the linker happy.)
int
ReadLine ()
{ // Read one line from the input file, and append to the buffer
int c; char *start = inPtr;
if(fromString) return 0; // parsing string, so the end is a hard end
if(!inputFile) return 0;
int k = 0;
while((c = fgetc(inputFile)) != EOF) {
*inPtr++ = c;
k++;
if(c == '\n' || (k > 200 && isspace(c))) { *inPtr = NULLCHAR; return 1; }
if(inPtr - inputBuf > PARSEBUFSIZE-2) inPtr--; //prevent crash on overflow
}
if(inPtr == start) return 0;
*inPtr++ = '\n', *inPtr = NULLCHAR; // repair missing linefeed at EOF
return 1;
}
(The changes are the lines that refer to int k.)
Not as pretty as ChatGPT would do I'm sure, but it seems it should work?
I'm not sure why even my unmodified xboard does not have a problem with the PGN that clearly has a line that is much longer than PARSEBUFSIZE (10000 by default).
OliverBr wrote: ↑Sun Oct 05, 2025 3:38 am
But CCRL hosts ten thousands of PGN files that don't conform to the PGN specification as authored by Steven J. Edwards.
LiChess, too, hosts even much more than thousands of PGN files with the same issue.
Maybe there are more.
Now what does this information actually help? Can we complain to CCRL and LiChess about their faulty PGN-files and they change it? Also that they correct all existing pgn-files?
If you're a software developer and you want to contribute a patch to Lichess, they might accept it. You don't know until you try. If you're not a software developer, you can still file a bug report with them and ask your friends to +1 it. The more people report an issue, the more likely it is to get fixed.
OliverBr wrote: ↑Sun Oct 05, 2025 3:38 am
This raises actually another question: Why I am the only one who actually cares since the release of 4.9.1?
I'm not sure precisely what you mean by that. But hey, people have busy lives--it's all volunteer work. There's nobody stopping you from getting involved to scratch your itches.
abulmo2 wrote: ↑Wed Sep 24, 2025 8:10 am
The bug is in the pgn. According to the pgn specification a line should not exceed 255 characters:
This maybe so and I thought so, too, but this is a official pgn from CCRL.
Also, any other tool I tested, which handles pgn, could read it. Including talkchess.
That is the raw pgn from Graham's live broadcast isn't it ? I'd say it only actually becomes an "official" CCRL game once he submits it for processing (which coincidentally he does himself) and it is parsed and becomes available for download from the CCRL web site. How does that same game look when downloaded from the CCRL website once it has been processed through their scripts ?
Modern Times wrote: ↑Sun Oct 05, 2025 8:22 am
That is the raw pgn from Graham's live broadcast isn't it ? I'd say it only actually becomes an "official" CCRL game once he submits it for processing (which coincidentally he does himself) and it is parsed and becomes available for download from the CCRL web site. How does that same game look when downloaded from the CCRL website once it has been processed through their scripts ?
All those files are final there and contain all the moves and variants in one single line and none is less than 256 characters.
As I mentioned before pgn Files from LiChess have the same "Issue".
Dave Gomboc wrote: ↑Sun Oct 05, 2025 7:24 am
If you're a software developer and you want to contribute a patch to Lichess, they might accept it. You don't know until you try. If you're not a software developer, you can still file a bug report with them and ask your friends to +1 it. The more people report an issue, the more likely it is to get fixed.
Correct. But looks as I am the only one who encountered it. Strange.
OliverBr wrote: ↑Sun Oct 05, 2025 3:38 am
I'm not sure precisely what you mean by that. But hey, people have busy lives--it's all volunteer work. There's nobody stopping you from getting involved to scratch your itches.
I am just wondering that the issue didn't came up sooner by somebody else since xboard 4.9.1 was released in 2016 ... That is really strange and I have no explanation.
Of course, now, when it's clear those pgn files don't respect the specification, it's not a bug (I wrote "Maybe" in the title) per definition. I would be great if Xboard could load such pgn Files, too.
Last edited by OliverBr on Sun Oct 05, 2025 9:41 am, edited 3 times in total.
OliverBr wrote: ↑Sun Oct 05, 2025 3:38 am
This raises actually another question: Why I am the only one who actually cares since the release of 4.9.1?
I have xboard-4.9.1 on my PC, and I can load and view the PGNs you linked to just fine (even before running it through the script).
Really, the big one with 29K, means 29000 characters in one line?
The one from LiChess works here ,too. It was one example that the 255 character limit is not respected there either.