#include #include #include #include #include #include #include #include "SchachHoernchen/Board.h" //' Specialized version of `read_cyclic.cpp` taylored to work in conjunction with //' `gmlm_chess()` as data generator to provide random draws from a FEN data set //' with scores filtered to be in in the range `score_min` to `score_max`. //' // [[Rcpp::export(name = "data.gen", rng = true)]] Rcpp::CharacterVector data_gen( const std::string& file, const int sample_size, const float score_min = -5.0, const float score_max = +5.0, const bool quiet = false, const int min_ply_count = 10, const bool white_only = true ) { // Check parames if (sample_size < 1) { Rcpp::stop("`sample_size` must be positive"); } if (score_min >= score_max) { Rcpp::stop("`score_min` must be strictly smaller than `score_max`"); } // open FEN data set file std::ifstream input(file); if (!input) { Rcpp::stop("Opening file '%s' failed", file); } // set the read from stream position to a random line input.seekg(0, std::ios::end); unsigned long seek = unif_rand() * input.tellg(); input.seekg(seek); // from random position set stream position to line start (if not over shot) if (!input.eof()) { input.ignore(std::numeric_limits::max(), '\n'); } // Ensure (in any case) we are at a legal position (recycle) if (input.eof()) { input.seekg(0); } // Allocate output sample Rcpp::CharacterVector _sample(sample_size); Rcpp::NumericVector _scores(sample_size); // Read and filter lines from FEN data base file std::string line, fen; float score; Board pos; int sample_count = 0, retry_count = 0, reject_count = 0; while (sample_count < sample_size) { // Check for user interupt (that is, allows from `R` to interupt execution) R_CheckUserInterrupt(); // Avoid infinite loop if (reject_count > 1000 * sample_size) { Rcpp::stop("Too many rejections, stop to avoid infinite loop"); } // Read line, in case of failure retry from start of file (recycling) if (!std::getline(input, line)) { input.clear(); input.seekg(0); if (!std::getline(input, line)) { // another failur is fatal Rcpp::stop("Recycline lines in file '%s' failed", file); } } // Check for empty line, treated as a partial error which we retry a few times if (line.empty()) { if (++retry_count > 10) { Rcpp::stop("Retry count exceeded after reading empty line in '%s'", file); } else { continue; } } // Split candidat line into FEN and score std::stringstream candidat(line); std::getline(candidat, fen, ';'); candidat >> score; if (candidat.fail()) { // If this failes, the FEN data base is ill formed! Rcpp::stop("Ill formated FEN data base file '%s'", file); } // parse FEN to filter only positions with white to move bool parseError = false; pos.init(fen, parseError); if (parseError) { Rcpp::stop("Retry count exceeded after illegal FEN '%s'", fen); } // Reject / Filter samples if (((int)pos.plyCount() < min_ply_count) // early positions || (white_only && (pos.sideToMove() == piece::black)) // white to move positions || (score < score_min || score_max <= score) // scores out of slice || (quiet && !pos.isQuiet())) // quiet positions { reject_count++; continue; } // Everythings succeeded and ge got an appropriate sample in requested range _sample[sample_count] = fen; _scores[sample_count] = score; ++sample_count; // skip lines (ensures independent draws based on games being independent) if (input.eof()) { input.seekg(0); } for (int s = 0; s < 256; ++s) { input.ignore(std::numeric_limits::max(), '\n'); if (input.eof()) { input.seekg(0); } } } // Set scores as attribute to position sample _sample.attr("scores") = _scores; return _sample; }