diff --git a/LaTeX/main.bib b/LaTeX/main.bib index 2a5d009..d4bcf42 100644 --- a/LaTeX/main.bib +++ b/LaTeX/main.bib @@ -401,3 +401,10 @@ url = {https://stockfishchess.org/}, abstract = {Stockfish is a free and strong UCI chess engine.}, } + +@mish{schachhoernchen, + title = {Schach H\"ornchen}, + year = {development since 2021, first release pending}, + author = {Kapla, Daniel}, + url = {todo!} +} diff --git a/LaTeX/paper.tex b/LaTeX/paper.tex index 1a2b935..0318a95 100644 --- a/LaTeX/paper.tex +++ b/LaTeX/paper.tex @@ -932,7 +932,7 @@ We are interested in the quality of the estimate of the true sufficient reductio That is the Frobenius norm of the diffeence between the projections onto the span of $\mat{B}$ and $\hat{\mat{B}}$. The proportionality constant\footnote{Depends on row dimension $p$ and the ranks of $\mat{B}$ and $\hat{\mat{B}}$ given by $(\min(\rank\mat{B} + \rank\hat{\mat{B}}, 2 p - (\rank\mat{B} + \rank\hat{\mat{B}})))^{-1/2}$.} of $d(\mat{B}, \hat{\mat{B}})$ ensures that the subspace distance is in the interval $[0, 1]$. A distance of zero implies equality of the spans, a distance of one means that the subspaces are orthogonal. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Tensor Normal} +\subsection{Tensor Normal}\label{sec:sim-tensor-normal} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% For every tensor normal model we draw i.i.d. samples $\ten{X}_i$ for $i = 1, ..., n$ from the conditional distribution of $\ten{X}\mid Y = y_i$ where $y_i$ is an i.i.d. sample from the standard normal distribution. The conditional distribution $\ten{X}\mid Y = y_i$ depends on the choice of the GMLM parameters $\overline{\ten{\eta}}$, $\mat{\beta}_1, ..., \mat{\beta}_r$, $\mat{\Omega}_1, ..., \mat{\Omega}_r$, and the function $\ten{F}_y$ of $y$. In all experiments we set $\overline{\ten{\eta}} = \mat{0}$. The other parameters and $\ten{F}_y$ are described per experiment. With the true GMLM parameters and $\ten{F}_y$ given, we compute the conditional tensor normal mean $\ten{\mu}_y = \ten{F}_y\mlm_{k = 1}^{r}\mat{\Omega}_k^{-1}\mat{\beta}_k$ and covariances $\mat{\Sigma}_k = \mat{\Omega}_k^{-1}$ as in \eqref{eq:tnormal_cond_params}. @@ -955,12 +955,65 @@ We start with a $1$ dimensional linear dependence on $y$ in 1a). Then, the depen Furthermore, we fit the model with the wrong ``known'' function $\ten{F}_y$. We set $\ten{F}_y$ to be a $2\times 2$ matrix with a quadratic linkage via elements given by $(\ten{F}_y)_{i j} = y^{|i - j|}$. \end{itemize} +\todo{How to describe that? I mean, sure, but what to write?} +The results of 1c) are surprising. The GMLM model behaves as expected, clearly being the best. The first surprise is that PCA, HOPCA and MGCCA are visually indistinguishable. This is explained by a high signal to noise ration in this particular example. But the biggest surprise is the failure of TSIR. It turns out that TSIR is usualy well equiped to handle those specific low rank problems (given the true rank of the problem which is the case for all methods in every simulation), but by pure coincidense we picked a case where TSIR failes. Intending to pinpoint the specific problem we made another simulation where we change the tensor order $r$ from $2$ till $4$. Furthermore, we altered the coefficient $\rho$ for the auto regression type matrices $(\mat{\Omega}_k)_{i j} = \rho^{|i - j|}$. We let $\ten{F}_y$ be the $r$ times iterated outer product of the vector $(1, y)$. In the case of $r = 3$ this given the same $\ten{F}_y$ as in 1c). Then, we setup two scenarios where the definition of the true $\mat{\beta}_k$'s of rank $1$, for $k = 1, \ldots, r$, are different. The rest is identical to simulation 1c). +\begin{itemize} + \item[V1)] The first version sets all $\mat{\beta}_k$'s identical to + \begin{displaymath} + \mat{\beta}_k = \begin{pmatrix} 1 & 1 \\ 0 & 0 \end{pmatrix} + \end{displaymath} + which gives the true vectorized reduction matrix $\mat{B} = \bigkron_{k = r}^{1}\mat{\beta}_k$ equal to a $2^r\times 2^r$ rank $1$ matrix with the first row all ones and the rest filled with zeros. The minimal true reduction is the $2^r$ dimensional first unit vector $\mat{e}_1$. In this setting the vectorized expected value is given by $\E[\vec{\ten{X}} \mid Y = y] = (1 + y)^r \bigkron_{k = r}^{1}\mat{\Omega}_k^{-1}$ + + \begin{displaymath} + \mat{\Omega}_k = \begin{pmatrix} + 1 & \rho \\ \rho & 1 + \end{pmatrix} + \end{displaymath} + \begin{displaymath} + \mat{\Sigma}_k = \mat{\Omega}_k^{-1} = \frac{1}{1 - \rho^2}\begin{pmatrix} + 1 & -\rho \\ -\rho & 1 + \end{pmatrix} + \end{displaymath} + + \begin{displaymath} + \E[\ten{X} \mid Y = y] = \frac{(1 + y)^r}{(1 - \rho^2)^r}\bigouter_{k = 1}^{r}\begin{pmatrix} + 1 \\ -\rho + \end{pmatrix} + \end{displaymath} + \begin{displaymath} + \E[\vec{\ten{X}} \mid Y = y] = \frac{(1 + y)^r}{(1 - \rho^2)^r}\bigkron_{k = r}^{1}\begin{pmatrix} + 1 \\ -\rho + \end{pmatrix} + \end{displaymath} + + In this setting only the first component of $\ten{X}$, that is $(\vec{\ten{X}})_1$, depends directly on $Y$ via $\E[(\vec{\ten{X}})_1\mid Y = y] = (1 + y)^r$. All other components contain information about $Y$ through the correlation structure only. \todo{check this!} + \item[V2)] Similar to the $\mat{\beta}_k$'s in 1c), we set all $\mat{\beta}_k$'s identical to + \begin{displaymath} + \mat{\beta}_k = \begin{pmatrix} 1 & -1 \\ -1 & 1 \end{pmatrix} + \end{displaymath} + + \begin{displaymath} + \E[\vec{\ten{X}}\mid Y = y] = \frac{(1 - y)^r}{(1 - \rho)^r}\bigkron_{k = r}^{1}\begin{pmatrix} + 1 \\ -1 + \end{pmatrix} + \end{displaymath} +\end{itemize} -\begin{figure} + + +% simplified the simulation such that $p_k = q_k = 2$ for $k = 1, \ldots, r$. We let the functions $\ten{F}_y = \bigcirc_{k = 1}^{r}(1, y)$ + +% Then, we simulate with $100$ replications per case where every case + + + +% The setup which is ídentical in both cases is as follows. The response $Y$ is i.i.d. standard normal and the response tensor $\ten{F}_y$ consists of monomials with max order $r$. Its elements are equal to $(\ten{F}_y)_{\mat{i}} = y^{|\mat{i}| - r}$ where $\mat{i}$ is a multi-index of length $r$ and $|mat{i}|$ is the sum of th elements of $\mat{i}$. + +\begin{figure}[hp!] \centering \includegraphics[width = \textwidth]{plots/sim-normal.pdf} - \caption{\label{fig:sim-normal}asknclknasknc} + \caption{\label{fig:sim-normal}Visualization of the simulation results for the tensor normal GMLM. Sample size on the $x$-axis and the mean of subspace distance $d(\mat{B}, \hat{\mat{B}})$ over $100$ replications on the $y$-axis. Described in \cref{sec:sim-tensor-normal}.} \end{figure} @@ -975,24 +1028,24 @@ We start with a $1$ dimensional linear dependence on $y$ in 1a). Then, the depen \subsection{Ising Model} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\begin{figure} +\begin{figure}[ht!] \centering \includegraphics[width = \textwidth]{plots/sim-ising.pdf} \caption{\label{fig:sim-ising}asknclknasknc} \end{figure} -\begin{table} - \begin{tabular}{c | ccc ccc c} - $n$ & GMLM & PCA & HOPCA & LPCA & CLPCA & TSIR & MGCCA \\ - \hline - 100 & {\bf 0.34} (0.14) & 0.90 (0.04) & 0.90 (0.05) & 0.94 (0.09) & 0.91 (0.03) & 0.48 (0.19) & 0.55 (0.13) \\ - 200 & {\bf 0.25} (0.11) & 0.90 (0.03) & 0.90 (0.03) & 0.96 (0.07) & 0.91 (0.02) & 0.38 (0.16) & 0.53 (0.10) \\ - 300 & {\bf 0.20} (0.09) & 0.89 (0.02) & 0.89 (0.02) & 0.97 (0.06) & 0.91 (0.02) & 0.29 (0.13) & 0.51 (0.11) \\ - 500 & {\bf 0.16} (0.07) & 0.90 (0.02) & 0.90 (0.02) & 0.98 (0.01) & 0.91 (0.01) & 0.23 (0.10) & 0.50 (0.08) \\ - 750 & {\bf 0.13} (0.05) & 0.90 (0.01) & 0.90 (0.01) & 0.98 (0.02) & 0.91 (0.01) & 0.23 (0.08) & 0.53 (0.06) - \end{tabular} - \caption{\label{tab:sim-ising}xyz uvw} -\end{table} +% \begin{table} +% \begin{tabular}{c | ccc ccc c} +% $n$ & GMLM & PCA & HOPCA & LPCA & CLPCA & TSIR & MGCCA \\ +% \hline +% 100 & {\bf 0.34} (0.14) & 0.90 (0.04) & 0.90 (0.05) & 0.94 (0.09) & 0.91 (0.03) & 0.48 (0.19) & 0.55 (0.13) \\ +% 200 & {\bf 0.25} (0.11) & 0.90 (0.03) & 0.90 (0.03) & 0.96 (0.07) & 0.91 (0.02) & 0.38 (0.16) & 0.53 (0.10) \\ +% 300 & {\bf 0.20} (0.09) & 0.89 (0.02) & 0.89 (0.02) & 0.97 (0.06) & 0.91 (0.02) & 0.29 (0.13) & 0.51 (0.11) \\ +% 500 & {\bf 0.16} (0.07) & 0.90 (0.02) & 0.90 (0.02) & 0.98 (0.01) & 0.91 (0.01) & 0.23 (0.10) & 0.50 (0.08) \\ +% 750 & {\bf 0.13} (0.05) & 0.90 (0.01) & 0.90 (0.01) & 0.98 (0.02) & 0.91 (0.01) & 0.23 (0.08) & 0.53 (0.06) +% \end{tabular} +% \caption{\label{tab:sim-ising}xyz uvw} +% \end{table} @@ -1007,7 +1060,7 @@ In this section be perform two \todo{realy two!} applications of the GMLM model %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Chess}\label{sec:chess} -The data set is provided by the \citetitle{lichess-database}\footnote{\fullcite{lichess-database}}. We downloaded November of 2023 consisting of more than $92$ million games. We removed all games without Stockfish\footnote{\fullcite{stockfish}} position evaluations. Those take the role of the response $Y$ and correspond to a winning probability from whites point of few. Positive scores are good for white and negative scores indicate an advantage for black. We ignore all highlty unballanced positions, which we set to be positions with absolute score above $5$. We also remove all positions with a mate score (one side can force check mate). Finally, we only considure positions with white to move. This leads to a final data set of roughly $190$ million positions, including duplicates. +The data set is provided by the \citetitle{lichess-database}\footnote{\fullcite{lichess-database}}. We downloaded November of 2023 consisting of more than $92$ million games. We removed all games without position evaluations. The evaluations, also denoted scores, are from Stockfish,\footnote{\fullcite{stockfish}} a free and strong chess engine. The scores take the role of the response $Y$ and correspond to a winning probability from whites point of few. Positive scores are good for white and negative scores indicate an advantage for black. We ignore all highlty unballanced positions, which we set to be positions with absolute score above $5$. We also remove all positions with a mate score (one side can force check mate). Finally, we only considure positions with white to move. This leads to a final data set of roughly $190$ million positions, including duplicates. A chess position is encoded as a 3D binary tensor of dimension $8\times 8\times 12$ giving the predictors $\ten{X}$, see \cref{fig:fen2tensor}. The first two axis encode the squares of a chess board, which is a $8\times 8$ grid. The third axis encodes chess pieces. The $12$ pieces derive from the $6$ types of pieces, namely pawns (\pawn), knights (\knight), bishops (\bishop), queens (\queen) and kings (\king) of two colors, black and white. diff --git a/dataAnalysis/chess/Rchess/R/RcppExports.R b/dataAnalysis/chess/Rchess/R/RcppExports.R index 99125ab..5ed760b 100644 --- a/dataAnalysis/chess/Rchess/R/RcppExports.R +++ b/dataAnalysis/chess/Rchess/R/RcppExports.R @@ -5,8 +5,8 @@ #' `gmlm_chess()` as data generator to provide random draws from a FEN data set #' with scores filtered to be in in the range `score_min` to `score_max`. #' -data.gen <- function(file, sample_size, score_min, score_max) { - .Call(`_Rchess_data_gen`, file, sample_size, score_min, score_max) +data.gen <- function(file, sample_size, score_min = -5.0, score_max = +5.0, quiet = FALSE) { + .Call(`_Rchess_data_gen`, file, sample_size, score_min, score_max, quiet) } #' Convert a legal FEN string to a 3D binary (integer with 0-1 entries) array diff --git a/dataAnalysis/chess/Rchess/src/RcppExports.cpp b/dataAnalysis/chess/Rchess/src/RcppExports.cpp index 827067e..fe1289a 100644 --- a/dataAnalysis/chess/Rchess/src/RcppExports.cpp +++ b/dataAnalysis/chess/Rchess/src/RcppExports.cpp @@ -12,8 +12,8 @@ Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // data_gen -Rcpp::CharacterVector data_gen(const std::string& file, const int sample_size, const float score_min, const float score_max); -RcppExport SEXP _Rchess_data_gen(SEXP fileSEXP, SEXP sample_sizeSEXP, SEXP score_minSEXP, SEXP score_maxSEXP) { +Rcpp::CharacterVector data_gen(const std::string& file, const int sample_size, const float score_min, const float score_max, const bool quiet); +RcppExport SEXP _Rchess_data_gen(SEXP fileSEXP, SEXP sample_sizeSEXP, SEXP score_minSEXP, SEXP score_maxSEXP, SEXP quietSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -21,7 +21,8 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const int >::type sample_size(sample_sizeSEXP); Rcpp::traits::input_parameter< const float >::type score_min(score_minSEXP); Rcpp::traits::input_parameter< const float >::type score_max(score_maxSEXP); - rcpp_result_gen = Rcpp::wrap(data_gen(file, sample_size, score_min, score_max)); + Rcpp::traits::input_parameter< const bool >::type quiet(quietSEXP); + rcpp_result_gen = Rcpp::wrap(data_gen(file, sample_size, score_min, score_max, quiet)); return rcpp_result_gen; END_RCPP } @@ -172,7 +173,7 @@ END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_Rchess_data_gen", (DL_FUNC) &_Rchess_data_gen, 4}, + {"_Rchess_data_gen", (DL_FUNC) &_Rchess_data_gen, 5}, {"_Rchess_fen2int", (DL_FUNC) &_Rchess_fen2int, 1}, {"_Rchess_read_cyclic", (DL_FUNC) &_Rchess_read_cyclic, 5}, {"_Rchess_sample_move", (DL_FUNC) &_Rchess_sample_move, 1}, diff --git a/dataAnalysis/chess/Rchess/src/data_gen.cpp b/dataAnalysis/chess/Rchess/src/data_gen.cpp index 5ecec49..5cc5581 100644 --- a/dataAnalysis/chess/Rchess/src/data_gen.cpp +++ b/dataAnalysis/chess/Rchess/src/data_gen.cpp @@ -17,8 +17,9 @@ Rcpp::CharacterVector data_gen( const std::string& file, const int sample_size, - const float score_min, - const float score_max + const float score_min = -5.0, + const float score_max = +5.0, + const bool quiet = false ) { // Check parames if (sample_size < 1) { @@ -48,7 +49,8 @@ Rcpp::CharacterVector data_gen( } // Allocate output sample - Rcpp::CharacterVector sample(sample_size); + Rcpp::CharacterVector _sample(sample_size); + Rcpp::NumericVector _scores(sample_size); // Read and filter lines from FEN data base file std::string line, fen; @@ -59,6 +61,11 @@ Rcpp::CharacterVector data_gen( // Check for user interupt (that is, allows from `R` to interupt execution) R_CheckUserInterrupt(); + // Avoid infinite loop + if (reject_count > 1000 * sample_size) { + Rcpp::stop("Too many rejections, stop to avoid infinite loop"); + } + // Read line, in case of failure retry from start of file (recycling) if (!std::getline(input, line)) { input.clear(); @@ -104,14 +111,16 @@ Rcpp::CharacterVector data_gen( reject_count++; continue; } - - // Avoid infinite loop - if (reject_count > 1000 * sample_size) { - Rcpp::stop("Too many rejections, stop to avoid infinite loop"); + // filter quiet positions (iff requested) + if (quiet && pos.isQuiet()) { + reject_count++; + continue; } // Everythings succeeded and ge got an appropriate sample in requested range - sample[sample_count++] = fen; + _sample[sample_count] = fen; + _scores[sample_count] = score; + ++sample_count; // skip lines (ensures independent draws based on games being independent) if (input.eof()) { @@ -125,5 +134,8 @@ Rcpp::CharacterVector data_gen( } } - return sample; + // Set scores as attribute to position sample + _sample.attr("scores") = _scores; + + return _sample; } diff --git a/dataAnalysis/chess/chess.R b/dataAnalysis/chess/chess.R index a76390c..5b36f30 100644 --- a/dataAnalysis/chess/chess.R +++ b/dataAnalysis/chess/chess.R @@ -13,23 +13,32 @@ data_set <- "lichess_db_standard_rated_2023-11.fen" # Function to draw samples `X` form the chess position `data_set` conditioned on # `Y` (position scores) to be in the interval `score_min` to `score_max`. data_gen <- function(batch_size, score_min, score_max) { - Rchess::fen2int(Rchess::data.gen(data_set, batch_size, score_min, score_max)) + Rchess::fen2int(Rchess::data.gen(data_set, batch_size, score_min, score_max, quiet = TRUE)) } -fun_y = function(y) { +fun_y <- function(y) { F <- t(outer(y, c(0, 1, 1, 2, 1, 2, 2, 3), `^`)) dim(F) <- c(2, 2, 2, length(y)) F } # Invoke specialized GMLM optimization routine for chess data -fit.gmlm <- gmlm_chess(data_gen, fun_y) +fit.gmlm <- gmlm_chess(data_gen, fun_y, step_size = 1e-3) +################################################################################ +### At 1838 is the last one with all values, not just quiet positions ### +################################################################################ + + +#### STOP HERE! +if (FALSE) { load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000000.Rdata") load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000274.Rdata") load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000532.Rdata") +# Load latest save point +load(sort(list.files("~/Work/tensorPredictors/dataAnalysis/chess/", pattern = "save_point*"), decreasing = TRUE)[[1]]) # build intervals from score break points score_breaks <- c(-5.0, -3.0, -2.0, -1.0, -0.5, -0.2, 0.2, 0.5, 1.0, 2.0, 3.0, 5.0) @@ -46,12 +55,77 @@ Omega_const <- local({ Omega_const <- !diag(768) & ((diag_offset %% 64L) == 0L) # One King per color Omega_const <- Omega_const | kronecker(diag(1:12 %in% c(6, 12)), !diag(64), `&`) + # Enemy kings can _not_ be on neightbouring squares + king_const <- mapply(function(i, j) { + `[<-`((abs(.row(c(8, 8)) - i) <= 1L) & (abs(.col(c(8, 8)) - j) <= 1L), i, j, FALSE) + }, .row(c(8, 8)), .col(c(8, 8))) + dim(Omega_const) <- c(64, 12, 64, 12) + Omega_const[, 6, , 12] <- Omega_const[, 6, , 12] | king_const + Omega_const[, 12, , 6] <- Omega_const[, 12, , 6] | king_const + dim(Omega_const) <- c(768, 768) # no pawns on rank 1 or rank 8 pawn_const <- tcrossprod(as.vector(`[<-`(matrix(0L, 8, 8), c(1, 8), , 1L)), rep(1L, 64)) pawn_const <- kronecker(`[<-`(matrix(0, 12, 12), c(1, 7), , 1), pawn_const) which(Omega_const | (pawn_const | t(pawn_const))) }) +const <- `[<-`(array(0L, c(8, 8, 12, 8, 8, 12)), Omega_const, 1L) +dimnames(const) <- rep(list( + 8:1, letters[1:8], unlist(strsplit("PNBRQKpnbrqk", "")) +), 2L) +diag_offset <- abs(.row(c(768, 768)) - .col(c(768, 768))) +dim(diag_offset) <- dim(const) +dimnames(diag_offset) <- dimnames(const) + +(function(r1, f1, p1, r2, f2, p2) { + print.table(const[r1, f1, p1, r2, f2, p2], zero.print = ".") + print.table(const[r2, f2, p2, r1, f1, p1], zero.print = ".") +})("4", "e", "p", "1", , ) + +(diag_offset["4", "e", "K", , , "k"] %% 64L) * (abs(.row(c(8, 8)) - .col(c(8, 8))) == 1L) + + +B <- Reduce(kronecker, rev(betas)) +dim(B) <- c(8, 8, 12, 8) +dimnames(B) <- list( + 8:1, + letters[1:8], + unlist(strsplit("PNBRQKpnbrqk", "")), + paste0("y^", c(0, 1, 1, 2, 1, 2, 2, 3)) +) + +old.par <- par(mfrow = c(3, 4)) +rmB <- rowMeans(B, dims = 3) +for (piece in dimnames(B)[[3]]) { + matrixImage(rmB[, , piece]) +} +par(old.par) + +print.as.board <- function(mat) { + print.table( + matrix(as.integer( + mat + ), 8, 8, dimnames = dimnames(const)[1:2]), + zero.print = "." + ) +} + +print.as.board({ + rows <- .row(c(8, 8)) + cols <- .col(c(8, 8)) + diags <- rows - cols + (abs(diag) == 1L | abs(diag) == 2L) & rows +}) + +print.as.board({ + (abs(.row(c(8, 8)) - 3) == 1L) & (abs(.col(c(8, 8)) - 3) == 1L) +}) + +king_const <- mapply(neighbours, .row(c(8, 8)), .col(c(8, 8))) +dim(king_const) <- c(8, 8, 8, 8) +dimnames(king_const) <- dimnames(const)[c(1, 2, 4, 5)] + +print.as.board(neighbours(4, 7)) y <- score_means[5] @@ -63,14 +137,6 @@ params <- `diag<-`(Omega, as.vector(mlm(`dim<-`(fun_y(y), dimF), betas))) # Conditional mean of the Ising model mu_y <- ising_m2(params) - layout(matrix(c( - 1, 2, 3, 3, 3, - 1, 4, 5, 6, 7 - ), nrow = 2, byrow = TRUE), width = c(6, 3, 1, 1, 1)) - - legend("topright", col = c("red", "blue", "darkgreen"), lty = 1, lwd = 2, - legend = c("dist.B", "dist.Omega", "loss"), bty = "n") - range(Omega) matrixImage(Omega) matrixImage(mu_y) @@ -88,6 +154,32 @@ plot(lm(y ~ mat(X_reduced, 4))) +fens <- Rchess::data.gen(data_set, 10000, quiet = TRUE) +y <- attr(fens, "scores") +X <- Rchess::fen2int(fens) + +mean_X <- rowMeans(X, dims = 3) +X_reduced <- mat(mlm(X - as.vector(mean_X), betas, transposed = TRUE), 4) +colnames(X_reduced) <- paste0("y^", c(0, 1, 1, 2, 1, 2, 2, 3)) + +fit <- lm(y ~ X_reduced) + +summary(fit) +vcov(fit) + +# resample +fens <- Rchess::data.gen(data_set, 10000, quiet = TRUE) +y <- attr(fens, "scores") +X <- Rchess::fen2int(fens) + +mean_X <- rowMeans(X, dims = 3) +X_reduced <- mat(mlm(X - as.vector(mean_X), betas, transposed = TRUE), 4) +colnames(X_reduced) <- paste0("y^", c(0, 1, 1, 2, 1, 2, 2, 3)) + +plot(predict(fit, newdata = as.data.frame(X_reduced)), y) + + + # save_points <- sort(list.files(pattern = "save_point*")) # load(save_points[length(save_points)]) @@ -98,3 +190,41 @@ plot(lm(y ~ mat(X_reduced, 4))) # }, save_points)) # plot(loss, type = "b") + + +setwd("~/Work/tensorPredictors/dataAnalysis/chess/") +save_points <- sort(list.files(".", pattern = "save_point*")) +c(head(save_points), "...", tail(save_points)) + +loss <- sapply(save_points, function(save_point) { + load(save_point) + last_loss +}, USE.NAMES = FALSE) +names(loss) <- seq_along(loss) +loss <- loss[is.finite(loss)] +c(head(loss), "...", tail(loss)) + +R2 <- sapply(save_points, function(save_point) { + load(save_point) + X_reduced <- mlm(X - as.vector(mean_X), betas, transposed = TRUE) + fit <- lm(y ~ mat(X_reduced, 4)) + summary(fit)$r.squared +}, USE.NAMES = FALSE) + + +plot(as.numeric(names(loss)), loss, type = "l", col = "red", lwd = 2, log = "y") +abline(v = 1745, lty = 2) + +plot(R2, type = "l", col = "red", lwd = 2, log = "y") +abline(v = 1740, lty = 2) +abline(h = R2[1740], lty = 2) + +summary(fit) +vcov(fit) + +} + +local({ + y <- rnorm(100) + 2 + (x <- rnorm(100)) + summary(lm(y ~ x)) +}) diff --git a/dataAnalysis/chess/gmlm_chess.R b/dataAnalysis/chess/gmlm_chess.R index 220d51f..8935be6 100644 --- a/dataAnalysis/chess/gmlm_chess.R +++ b/dataAnalysis/chess/gmlm_chess.R @@ -11,8 +11,6 @@ #' @param fun_y known functions of scalar `y`, returning a 3D/4D tensor #' @param score_breaks numeric vector of two or more unique cut points, the cut #' points are the interval bounds specifying the slices of `y`. -#' @param Omega_bounds numeric, (may be Infinite). Maximum absolute element values -#' of `Omega`. #' @param nr_threads integer, nr. of threads used by `ising_m2()` #' @param mcmc_samples integer, nr. of Monte-Carlo Chains passed to `ising_m2()` #' @param slice_size integer, size of sub-samples generated by `data_gen` for @@ -32,15 +30,14 @@ gmlm_chess <- function( data_gen, fun_y, score_breaks = c(-5.0, -3.0, -2.0, -1.0, -0.5, -0.2, 0.2, 0.5, 1.0, 2.0, 3.0, 5.0), - # Omega_bounds = 4.6, # TODO: wip!!!!! nr_threads = 8L, mcmc_samples = 10000L, slice_size = 512L, - max_iter = 1000L, + max_iter = 10000L, patience = 25L, step_size = 1e-3, eps = sqrt(.Machine$double.eps), - save_point = "gmlm_chess_save_point_%s.Rdata" + save_point = "save_point_%s.Rdata" ) { # build intervals from score break points score_breaks <- sort(score_breaks) @@ -56,6 +53,14 @@ gmlm_chess <- function( Omega_const <- !diag(768) & ((diag_offset %% 64L) == 0L) # One King per color Omega_const <- Omega_const | kronecker(diag(1:12 %in% c(6, 12)), !diag(64), `&`) + # Enemy kings can _not_ be on neightbouring squares + king_const <- mapply(function(i, j) { + `[<-`((abs(.row(c(8, 8)) - i) <= 1L) & (abs(.col(c(8, 8)) - j) <= 1L), i, j, FALSE) + }, .row(c(8, 8)), .col(c(8, 8))) + dim(Omega_const) <- c(64, 12, 64, 12) + Omega_const[, 6, , 12] <- Omega_const[, 6, , 12] | king_const + Omega_const[, 12, , 6] <- Omega_const[, 12, , 6] | king_const + dim(Omega_const) <- c(768, 768) # no pawns on rank 1 or rank 8 pawn_const <- tcrossprod(as.vector(`[<-`(matrix(0L, 8, 8), c(1, 8), , 1L)), rep(1L, 64)) pawn_const <- kronecker(`[<-`(matrix(0, 12, 12), c(1, 7), , 1), pawn_const) @@ -74,7 +79,7 @@ gmlm_chess <- function( cat(sprintf("Resuming from save point '%s'\n", load_point), "(to restart delete/rename the save points)\n") load(load_point) - # Fix `iter`, save after increment + # Fix `iter`, saved after increment iter <- iter - 1L } else { # draw initial sample to be passed to the normal GMLM estimator for initial `betas`