tensor_predictors/dataAnalysis/chess/pgn2fen.cpp

218 lines
7.6 KiB
C++

#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <sstream>
#include "utils.h"
#include "Board.h"
#include "Move.h"
#include "search.h"
#include "uci.h"
static const std::string usage{"usage: pgn2fen [--scored] [--rating <rating>] [--ply <ply>] [<input>]"};
// Convert PGN (Portable Game Notation) input stream to single FENs
// streamed to stdout
void pgn2fen(
std::istream& input,
const bool scored,
const unsigned long rating,
const unsigned long ply
) {
// Instantiate Boards, the start of every game as well as the current state
// of the Board while processing a PGN game
Board startpos, pos;
// Parse white and black ELO ratings
unsigned long whiteElo = 0;
unsigned long blackElo = 0;
// Read input line by line
std::string line;
while (std::getline(input, line)) {
// read rating metadata lines
if (rating != static_cast<unsigned long>(-1)) {
// [WhiteElo "1111"]
// [BlackElo "999"]
try {
if (line.rfind("[WhiteElo \"", 0) != std::string::npos) {
whiteElo = std::stoul(line.substr(11));
} else if (line.rfind("[BlackElo \"", 0) != std::string::npos) {
blackElo = std::stoul(line.substr(11));
}
} catch (...) {
std::cerr << "ERROR: Parsing player rating metadata '" << line
<< "' failed." << std::endl;
break;
}
}
// Skip empty and further metadata lines (every PGN game starts with "<nr>.")
if (line.empty() || line.front() == '[') {
continue;
}
// In case of rating requested, only parse game when rating is detected
if (rating != static_cast<unsigned long>(-1)
&& (whiteElo < rating || blackElo < rating)) {
continue;
}
// Reset position to the start position, every game starts here!
pos = startpos;
// Read game content (assuming one line is the entire game)
std::istringstream game(line);
std::string count, san, token, eval;
while (game >> count >> san >> token) {
// Consume/Parse PGN comments
if (scored) {
// consume the comment and search for an evaluation
bool has_score = false;
while (game >> token) {
// Search for evaluation token (position score _after_ the move)
if (token == "[%eval") {
game >> eval;
eval.pop_back(); // delete trailing ']'
has_score = true;
// Consume the remainder of the comment (ignore it)
std::getline(game, token, '}');
break;
} else if (token == "}") {
break;
}
}
// In case of not finding an evaluation, skip the game (_not_ an error)
if (!has_score) {
break;
}
} else {
// Consume the remainder of the comment (ignore it)
std::getline(game, token, '}');
}
// Perform move
bool parseError = false;
Move move = UCI::parseSAN(san, pos, parseError);
if (parseError) {
std::cerr << "ERROR: Parsing '" << san << "' at position '"
<< pos.fen() << "' failed." << std::endl;
break;
}
move = pos.isLegal(move); // validate legality and extend move info
if (move) {
pos.make(move);
} else {
std::cerr << "ERROR: Encountered illegal move '" << san
<< " (" << move
<< ") ' at position '" << pos.fen() << "'." << std::endl;
break;
}
// Skip positions with too small ply count
if (pos.plyCount() < ply) {
continue;
}
// Write positions
if (scored && rating != static_cast<unsigned long>(-1)) {
// Ingore "check mate in" scores (not relevant for eval training)
// Do this after "make move" in situations where the check mate
// was overlooked, leading to new positions
if (eval.length() && eval[0] == '#') {
continue;
}
// Otherwise, classic eval score to be parsed in centipawns
std::cout << pos.fen() << "; " << eval << "; "
<< whiteElo << "; " << blackElo << '\n';
} else if (rating != static_cast<unsigned long>(-1)) {
// Otherwise, classic eval score to be parsed in centipawns
std::cout << pos.fen() << "; " << whiteElo << "; " << blackElo << '\n';
} else if (scored) {
// Ingore "check mate in" scores (not relevant for eval training)
// Do this after "make move" in situations where the check mate
// was overlooked, leading to new positions
if (eval.length() && eval[0] == '#') {
continue;
}
// Otherwise, classic eval score to be parsed in centipawns
std::cout << pos.fen() << "; " << eval << '\n';
} else {
// Write only the position FEN
std::cout << pos.fen() << '\n';
}
}
// Reset ELO after every game to ensure that games without an elo
// metadata tag don't get the wrong rating from a previous game
whiteElo = 0;
blackElo = 0;
}
}
int main(int argn, char* argv[]) {
// Setup control variables
bool scored = false;
unsigned long rating = -1;
unsigned long ply = 0;
// unsigned min_rating = 0;
std::string file = "";
// Parse command arguments
for (int i = 1; i < argn; ++i) {
if (std::string("--scored") == argv[i]) {
scored = true;
} else if (std::string("--rating") == argv[i]) {
if (i + 1 < argn) {
try {
rating = std::stoul(argv[++i]);
} catch (...) {
std::cerr << "ERROR: illegal --rating argument " << argv[i] << std::endl;
std::cout << usage << std::endl;
return 1;
}
} else {
std::cout << usage << std::endl;
return 1;
}
} else if (std::string("--ply") == argv[i]) {
if (i + 1 < argn) {
try {
ply = std::stoul(argv[++i]);
} catch (...) {
std::cerr << "ERROR: illegal --ply argument " << argv[i] << std::endl;
std::cout << usage << std::endl;
return 1;
}
} else {
std::cout << usage << std::endl;
return 1;
}
} else if (file != "") {
file = argv[i];
} else {
std::cout << usage << std::endl;
return 1;
}
}
// Invoke converter ether with file input or stdin
if (file == "") {
pgn2fen(std::cin, scored, rating, ply);
} else {
// Open input file
std::ifstream input(file);
if (!input) {
std::cerr << "ERROR: opening '" << file << "' failed" << std::endl;
return 1;
}
pgn2fen(input, scored, rating, ply);
}
return 0;
}