#!/bin/bash # Data set name: Chess games from the Lichess Data Base for standard rated games # in November 2023 data=lichess_db_standard_rated_2023-11 # Minimum "ELO" rating of black and white players min_rating=2000 min_ply=20 # Check if file exists and download iff not if [ -f "${data}.fen" ]; then echo "File '${data}.fen' already exists, assuming job already done." echo "To rerun delete (rename) the files '${data}.pgn.zst' and/or '${data}.fen'" else # First, compile `png2fen` make pgn2fen # Download the PGN data base via `wegt` if not found. # The flag `-q` suppresses `wget`s own output and `-O-` tells `wget` to # stream the downloaded file to `stdout`. # Otherwise, use the file on disk directly. # Decompress the stream with `zstdcat` (no temporary files) # The uncompressed PGN data is then piped into `pgn2fen` which converts # the PGN data base into a list of FEN strings while filtering only # positions with evaluation. The `--scored` parameter specifies to extract # a position evaluation from the PGN and ONLY write positions with scores. # That is, positions without a score are removed! Parameter `--rating` # filters games where both players have at least the minimum rating, and # `--ply` specifies to only consider positions from `$min_ply` onward. if [ -f "${data}.pgn.zst" ]; then zstdcat ${data}.pgn.zst \ | ./pgn2fen --scored --rating $min_rating --ply $min_ply \ > ${data}.fen else wget -qO- https://database.lichess.org/standard/${data}.pgn.zst | zstdcat \ | ./pgn2fen --scored --rating $min_rating --ply $min_ply \ > ${data}.fen fi fi