#!/bin/bash # Data set name: Chess games from the Lichess Data Base for standard rated games # in November 2023 data=lichess_db_standard_rated_2023-11 # Check if file exists and download iff not if [ -f "${data}.fen" ]; then echo "File '${data}.fen' already exists, assuming job already done." echo "To rerun delete (rename) the files '${data}.pgn.zst' and/or '${data}.fen'" else # First, compile `png2fen` make pgn2fen # Download the PGN data base via `wegt` if not found. # The flag `-q` suppresses `wget`s own output and `-O-` tells `wget` to # stream the downloaded file to `stdout`. # Otherwise, use the file on disk directly. # Decompress the stream with `zstdcat` (no temporary files) # The uncompressed PGN data is then piped into `pgn2fen` which converts # the PGN data base into a list of FEN strings while filtering only # positions with evaluation. The `--scored` parameter specifies to extract # a position evaluation from the PGN and ONLY write positions with scores. # That is, positions without a score are removed! if [ -f "${data}.pgn.zst" ]; then zstdcat ${data}.pgn.zst | ./pgn2fen --scored > ${data}.fen else wget -qO- https://database.lichess.org/standard/${data}.pgn.zst \ | zstdcat | ./pgn2fen --scored > ${data}.fen fi fi