41 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			41 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/bash
 | 
						|
 | 
						|
# Data set name: Chess games from the Lichess Data Base for standard rated games
 | 
						|
# in November 2023
 | 
						|
data=lichess_db_standard_rated_2023-11
 | 
						|
 | 
						|
# Minimum "ELO" rating of black and white players
 | 
						|
min_rating=2000
 | 
						|
min_ply=20
 | 
						|
 | 
						|
# Check if file exists and download iff not
 | 
						|
if [ -f "${data}.fen" ]; then
 | 
						|
    echo "File '${data}.fen' already exists, assuming job already done."
 | 
						|
    echo "To rerun delete (rename) the files '${data}.pgn.zst' and/or '${data}.fen'"
 | 
						|
else
 | 
						|
    # First, compile `png2fen`
 | 
						|
    make pgn2fen
 | 
						|
 | 
						|
    # Download the PGN data base via `wegt` if not found.
 | 
						|
    #   The flag `-q` suppresses `wget`s own output and `-O-` tells `wget` to
 | 
						|
    #   stream the downloaded file to `stdout`.
 | 
						|
    # Otherwise, use the file on disk directly.
 | 
						|
    # Decompress the stream with `zstdcat` (no temporary files)
 | 
						|
    # The uncompressed PGN data is then piped into `pgn2fen` which converts
 | 
						|
    #   the PGN data base into a list of FEN strings while filtering only
 | 
						|
    #   positions with evaluation. The `--scored` parameter specifies to extract
 | 
						|
    #   a position evaluation from the PGN and ONLY write positions with scores.
 | 
						|
    #   That is, positions without a score are removed! Parameter `--rating`
 | 
						|
    #   filters games where both players have at least the minimum rating, and
 | 
						|
    #   `--ply` specifies to only consider positions from `$min_ply` onward.
 | 
						|
    if [ -f "${data}.pgn.zst" ]; then
 | 
						|
        zstdcat ${data}.pgn.zst \
 | 
						|
            | ./pgn2fen --scored --rating $min_rating --ply $min_ply \
 | 
						|
            > ${data}.fen
 | 
						|
    else
 | 
						|
        wget -qO- https://database.lichess.org/standard/${data}.pgn.zst | zstdcat \
 | 
						|
            | ./pgn2fen --scored --rating $min_rating --ply $min_ply \
 | 
						|
            > ${data}.fen
 | 
						|
    fi
 | 
						|
fi
 |