tensor_predictors/dataAnalysis/chess/chess.R

options(keep.source = TRUE, keep.source.pkgs = TRUE)

library(tensorPredictors)
library(Rchess)

source("./gmlm_chess.R")

# Data set file name of chess positions with Stockfish [https://stockfishchess.org]
# evaluation scores (downloaded and processed by `./preprocessing.sh` from the
# lichess data base [https://database.lichess.org/])
data_set <- "lichess_db_standard_rated_2023-11.fen"

# Function to draw samples `X` form the chess position `data_set` conditioned on
# `Y` (position scores) to be in the interval `score_min` to `score_max`.
data_gen <- function(batch_size, score_min, score_max) {
    Rchess::fen2int(Rchess::data.gen(data_set, batch_size, score_min, score_max))
}

fun_y = function(y) {
    F <- t(outer(y, c(0, 1, 1, 2, 1, 2, 2, 3), `^`))
    dim(F) <- c(2, 2, 2, length(y))
    F
}

# Invoke specialized GMLM optimization routine for chess data
fit.gmlm <- gmlm_chess(data_gen, fun_y)


load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000000.Rdata")
load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000274.Rdata")
load("/home/loki/Work/tensorPredictors/dataAnalysis/chess/gmlm_chess_save_point_000532.Rdata")

# build intervals from score break points
score_breaks <- c(-5.0, -3.0, -2.0, -1.0, -0.5, -0.2, 0.2, 0.5, 1.0, 2.0, 3.0, 5.0)
score_min <- head(score_breaks, -1)
score_max <- tail(score_breaks, -1)
score_means <- (score_min + score_max) / 2


# build Omega constraint, that is the set of impossible combinations
# (including self interactions) due to the rules of chess
Omega_const <- local({
    # One piece per square
    diag_offset <- abs(.row(c(768, 768)) - .col(c(768, 768)))
    Omega_const <- !diag(768) & ((diag_offset %% 64L) == 0L)
    # One King per color
    Omega_const <- Omega_const | kronecker(diag(1:12 %in% c(6, 12)), !diag(64), `&`)
    # no pawns on rank 1 or rank 8
    pawn_const <- tcrossprod(as.vector(`[<-`(matrix(0L, 8, 8), c(1, 8), , 1L)), rep(1L, 64))
    pawn_const <- kronecker(`[<-`(matrix(0, 12, 12), c(1, 7), , 1), pawn_const)
    which(Omega_const | (pawn_const | t(pawn_const)))
})


y <- score_means[5]


# Conditional Ising model parameters
Omega <- `[<-`(Reduce(kronecker, rev(Omegas)), Omega_const, 0)
params <- `diag<-`(Omega, as.vector(mlm(`dim<-`(fun_y(y), dimF), betas)))

# Conditional mean of the Ising model
mu_y <- ising_m2(params)

    layout(matrix(c(
        1, 2, 3, 3, 3,
        1, 4, 5, 6, 7
    ), nrow = 2, byrow = TRUE), width = c(6, 3, 1, 1, 1))

    legend("topright", col = c("red", "blue", "darkgreen"), lty = 1, lwd = 2,
        legend = c("dist.B", "dist.Omega", "loss"), bty = "n")

range(Omega)
matrixImage(Omega)
matrixImage(mu_y)

X <- `dim<-`(Reduce(c, Map(data_gen, 512, score_min, score_max)), c(8, 8, 12, 512 * length(score_means)))
y <- rep(score_means, each = 512)

mean_X <- rowMeans(X, dims = 3)

X_reduced <- mlm(X - as.vector(mean_X), betas, transposed = TRUE)

summary(lm(y ~ mat(X_reduced, 4)))

plot(lm(y ~ mat(X_reduced, 4)))


# save_points <- sort(list.files(pattern = "save_point*"))

# load(save_points[length(save_points)])

# loss <- drop(mapply(function(file) {
#     load(file)
#     last_loss
# }, save_points))

# plot(loss, type = "b")