tensor_predictors/dataAnalysis/chess/chess.R

231 lines
6.8 KiB
R

options(keep.source = TRUE, keep.source.pkgs = TRUE)
library(tensorPredictors)
library(Rchess)
source("./gmlm_chess.R")
# Data set file name of chess positions with Stockfish [https://stockfishchess.org]
# evaluation scores (downloaded and processed by `./preprocessing.sh` from the
# lichess data base [https://database.lichess.org/])
data_set <- "lichess_db_standard_rated_2023-11.fen"
# Function to draw samples `X` form the chess position `data_set` conditioned on
# `Y` (position scores) to be in the interval `score_min` to `score_max`.
data_gen <- function(batch_size, score_min, score_max) {
Rchess::fen2int(Rchess::data.gen(data_set, batch_size, score_min, score_max, quiet = TRUE))
}
fun_y <- function(y) {
F <- t(outer(y, c(0, 1, 1, 2, 1, 2, 2, 3), `^`))
dim(F) <- c(2, 2, 2, length(y))
F
}
# Invoke specialized GMLM optimization routine for chess data
fit.gmlm <- gmlm_chess(data_gen, fun_y, step_size = 1e-3)
################################################################################
### At 1838 is the last one with all values, not just quiet positions ###
################################################################################
#### STOP HERE!
if (FALSE) {
load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000000.Rdata")
load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000274.Rdata")
load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000532.Rdata")
# Load latest save point
load(sort(list.files("~/Work/tensorPredictors/dataAnalysis/chess/", pattern = "save_point*"), decreasing = TRUE)[[1]])
# build intervals from score break points
score_breaks <- c(-5.0, -3.0, -2.0, -1.0, -0.5, -0.2, 0.2, 0.5, 1.0, 2.0, 3.0, 5.0)
score_min <- head(score_breaks, -1)
score_max <- tail(score_breaks, -1)
score_means <- (score_min + score_max) / 2
# build Omega constraint, that is the set of impossible combinations
# (including self interactions) due to the rules of chess
Omega_const <- local({
# One piece per square
diag_offset <- abs(.row(c(768, 768)) - .col(c(768, 768)))
Omega_const <- !diag(768) & ((diag_offset %% 64L) == 0L)
# One King per color
Omega_const <- Omega_const | kronecker(diag(1:12 %in% c(6, 12)), !diag(64), `&`)
# Enemy kings can _not_ be on neightbouring squares
king_const <- mapply(function(i, j) {
`[<-`((abs(.row(c(8, 8)) - i) <= 1L) & (abs(.col(c(8, 8)) - j) <= 1L), i, j, FALSE)
}, .row(c(8, 8)), .col(c(8, 8)))
dim(Omega_const) <- c(64, 12, 64, 12)
Omega_const[, 6, , 12] <- Omega_const[, 6, , 12] | king_const
Omega_const[, 12, , 6] <- Omega_const[, 12, , 6] | king_const
dim(Omega_const) <- c(768, 768)
# no pawns on rank 1 or rank 8
pawn_const <- tcrossprod(as.vector(`[<-`(matrix(0L, 8, 8), c(1, 8), , 1L)), rep(1L, 64))
pawn_const <- kronecker(`[<-`(matrix(0, 12, 12), c(1, 7), , 1), pawn_const)
which(Omega_const | (pawn_const | t(pawn_const)))
})
const <- `[<-`(array(0L, c(8, 8, 12, 8, 8, 12)), Omega_const, 1L)
dimnames(const) <- rep(list(
8:1, letters[1:8], unlist(strsplit("PNBRQKpnbrqk", ""))
), 2L)
diag_offset <- abs(.row(c(768, 768)) - .col(c(768, 768)))
dim(diag_offset) <- dim(const)
dimnames(diag_offset) <- dimnames(const)
(function(r1, f1, p1, r2, f2, p2) {
print.table(const[r1, f1, p1, r2, f2, p2], zero.print = ".")
print.table(const[r2, f2, p2, r1, f1, p1], zero.print = ".")
})("4", "e", "p", "1", , )
(diag_offset["4", "e", "K", , , "k"] %% 64L) * (abs(.row(c(8, 8)) - .col(c(8, 8))) == 1L)
B <- Reduce(kronecker, rev(betas))
dim(B) <- c(8, 8, 12, 8)
dimnames(B) <- list(
8:1,
letters[1:8],
unlist(strsplit("PNBRQKpnbrqk", "")),
paste0("y^", c(0, 1, 1, 2, 1, 2, 2, 3))
)
old.par <- par(mfrow = c(3, 4))
rmB <- rowMeans(B, dims = 3)
for (piece in dimnames(B)[[3]]) {
matrixImage(rmB[, , piece])
}
par(old.par)
print.as.board <- function(mat) {
print.table(
matrix(as.integer(
mat
), 8, 8, dimnames = dimnames(const)[1:2]),
zero.print = "."
)
}
print.as.board({
rows <- .row(c(8, 8))
cols <- .col(c(8, 8))
diags <- rows - cols
(abs(diag) == 1L | abs(diag) == 2L) & rows
})
print.as.board({
(abs(.row(c(8, 8)) - 3) == 1L) & (abs(.col(c(8, 8)) - 3) == 1L)
})
king_const <- mapply(neighbours, .row(c(8, 8)), .col(c(8, 8)))
dim(king_const) <- c(8, 8, 8, 8)
dimnames(king_const) <- dimnames(const)[c(1, 2, 4, 5)]
print.as.board(neighbours(4, 7))
y <- score_means[5]
# Conditional Ising model parameters
Omega <- `[<-`(Reduce(kronecker, rev(Omegas)), Omega_const, 0)
params <- `diag<-`(Omega, as.vector(mlm(`dim<-`(fun_y(y), dimF), betas)))
# Conditional mean of the Ising model
mu_y <- ising_m2(params)
range(Omega)
matrixImage(Omega)
matrixImage(mu_y)
X <- `dim<-`(Reduce(c, Map(data_gen, 512, score_min, score_max)), c(8, 8, 12, 512 * length(score_means)))
y <- rep(score_means, each = 512)
mean_X <- rowMeans(X, dims = 3)
X_reduced <- mlm(X - as.vector(mean_X), betas, transposed = TRUE)
summary(lm(y ~ mat(X_reduced, 4)))
plot(lm(y ~ mat(X_reduced, 4)))
fens <- Rchess::data.gen(data_set, 10000, quiet = TRUE)
y <- attr(fens, "scores")
X <- Rchess::fen2int(fens)
mean_X <- rowMeans(X, dims = 3)
X_reduced <- mat(mlm(X - as.vector(mean_X), betas, transposed = TRUE), 4)
colnames(X_reduced) <- paste0("y^", c(0, 1, 1, 2, 1, 2, 2, 3))
fit <- lm(y ~ X_reduced)
summary(fit)
vcov(fit)
# resample
fens <- Rchess::data.gen(data_set, 10000, quiet = TRUE)
y <- attr(fens, "scores")
X <- Rchess::fen2int(fens)
mean_X <- rowMeans(X, dims = 3)
X_reduced <- mat(mlm(X - as.vector(mean_X), betas, transposed = TRUE), 4)
colnames(X_reduced) <- paste0("y^", c(0, 1, 1, 2, 1, 2, 2, 3))
plot(predict(fit, newdata = as.data.frame(X_reduced)), y)
# save_points <- sort(list.files(pattern = "save_point*"))
# load(save_points[length(save_points)])
# loss <- drop(mapply(function(file) {
# load(file)
# last_loss
# }, save_points))
# plot(loss, type = "b")
setwd("~/Work/tensorPredictors/dataAnalysis/chess/")
save_points <- sort(list.files(".", pattern = "save_point*"))
c(head(save_points), "...", tail(save_points))
loss <- sapply(save_points, function(save_point) {
load(save_point)
last_loss
}, USE.NAMES = FALSE)
names(loss) <- seq_along(loss)
loss <- loss[is.finite(loss)]
c(head(loss), "...", tail(loss))
R2 <- sapply(save_points, function(save_point) {
load(save_point)
X_reduced <- mlm(X - as.vector(mean_X), betas, transposed = TRUE)
fit <- lm(y ~ mat(X_reduced, 4))
summary(fit)$r.squared
}, USE.NAMES = FALSE)
plot(as.numeric(names(loss)), loss, type = "l", col = "red", lwd = 2, log = "y")
abline(v = 1745, lty = 2)
plot(R2, type = "l", col = "red", lwd = 2, log = "y")
abline(v = 1740, lty = 2)
abline(h = R2[1740], lty = 2)
summary(fit)
vcov(fit)
}
local({
y <- rnorm(100) + 2 + (x <- rnorm(100))
summary(lm(y ~ x))
})