140 lines
4.5 KiB
C++
140 lines
4.5 KiB
C++
#include <iostream>
|
|
#include <iomanip>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <limits>
|
|
|
|
#include <Rcpp.h>
|
|
|
|
#include "SchachHoernchen/Board.h"
|
|
|
|
//' Specialized version of `read_cyclic.cpp` taylored to work in conjunction with
|
|
//' `gmlm_chess()` as data generator to provide random draws from a FEN data set
|
|
//' with scores filtered to be in in the range `score_min` to `score_max`.
|
|
//'
|
|
// [[Rcpp::export(name = "data.gen", rng = true)]]
|
|
Rcpp::List data_gen(
|
|
const std::string& file,
|
|
const int sample_size,
|
|
const float score_min = -5.0,
|
|
const float score_max = +5.0,
|
|
const bool quiet = false,
|
|
const bool draw = true,
|
|
const int min_ply_count = 10,
|
|
const bool white_only = true
|
|
) {
|
|
// Check parames
|
|
if (sample_size < 1) {
|
|
Rcpp::stop("`sample_size` must be positive");
|
|
}
|
|
if (score_min >= score_max) {
|
|
Rcpp::stop("`score_min` must be strictly smaller than `score_max`");
|
|
}
|
|
|
|
// open FEN data set file
|
|
std::ifstream input(file);
|
|
if (!input) {
|
|
Rcpp::stop("Opening file '%s' failed", file);
|
|
}
|
|
|
|
// set the read from stream position to a random line
|
|
input.seekg(0, std::ios::end);
|
|
unsigned long seek = unif_rand() * input.tellg();
|
|
input.seekg(seek);
|
|
// from random position set stream position to line start (if not over shot)
|
|
if (!input.eof()) {
|
|
input.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
|
|
}
|
|
// Ensure (in any case) we are at a legal position (recycle)
|
|
if (input.eof()) {
|
|
input.seekg(0);
|
|
}
|
|
|
|
// Allocate output sample
|
|
Rcpp::CharacterVector _fens(sample_size);
|
|
Rcpp::NumericVector _scores(sample_size);
|
|
|
|
// Read and filter lines from FEN data base file
|
|
std::string line, fen;
|
|
float score;
|
|
Board pos;
|
|
int sample_count = 0, retry_count = 0, reject_count = 0;
|
|
while (sample_count < sample_size) {
|
|
// Check for user interupt (that is, allows from `R` to interupt execution)
|
|
R_CheckUserInterrupt();
|
|
|
|
// Avoid infinite loop
|
|
if (reject_count > 1000 * sample_size) {
|
|
Rcpp::stop("Too many rejections, stop to avoid infinite loop");
|
|
}
|
|
|
|
// Read line, in case of failure retry from start of file (recycling)
|
|
if (!std::getline(input, line)) {
|
|
input.clear();
|
|
input.seekg(0);
|
|
if (!std::getline(input, line)) {
|
|
// another failur is fatal
|
|
Rcpp::stop("Recycline lines in file '%s' failed", file);
|
|
}
|
|
}
|
|
|
|
// Check for empty line, treated as a partial error which we retry a few times
|
|
if (line.empty()) {
|
|
if (++retry_count > 10) {
|
|
Rcpp::stop("Retry count exceeded after reading empty line in '%s'", file);
|
|
} else {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Split candidat line into FEN and score
|
|
std::stringstream candidat(line);
|
|
std::getline(candidat, fen, ';');
|
|
candidat >> score;
|
|
if (candidat.fail()) {
|
|
// If this failes, the FEN data base is ill formed!
|
|
Rcpp::stop("Ill formated FEN data base file '%s'", file);
|
|
}
|
|
|
|
// parse FEN to filter only positions with white to move
|
|
bool parseError = false;
|
|
pos.init(fen, parseError);
|
|
if (parseError) {
|
|
Rcpp::stop("Retry count exceeded after illegal FEN '%s'", fen);
|
|
}
|
|
|
|
// Reject / Filter samples
|
|
if (((int)pos.plyCount() < min_ply_count) // early positions
|
|
|| (white_only && (pos.sideToMove() == piece::black)) // white to move positions
|
|
|| (score < score_min || score_max < score) // scores out of slice
|
|
|| (quiet && !pos.isQuiet()) // quiet positions
|
|
|| (!draw && score == 0.0)) // drawn positions
|
|
{
|
|
reject_count++;
|
|
continue;
|
|
}
|
|
|
|
// Everythings succeeded and ge got an appropriate sample in requested range
|
|
_fens[sample_count] = fen;
|
|
_scores[sample_count] = score;
|
|
++sample_count;
|
|
|
|
// skip lines (ensures independent draws based on games being independent)
|
|
if (input.eof()) {
|
|
input.seekg(0);
|
|
}
|
|
for (int s = 0; s < 256; ++s) {
|
|
input.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
|
|
if (input.eof()) {
|
|
input.seekg(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
return Rcpp::List::create(
|
|
Rcpp::Named("fens") = _fens,
|
|
Rcpp::Named("scores") = _scores
|
|
);
|
|
}
|