2019-12-16 16:34:35 +00:00
|
|
|
% Generated by roxygen2: do not edit by hand
|
|
|
|
% Please edit documentation in R/datasets.R
|
|
|
|
\name{dataset}
|
|
|
|
\alias{dataset}
|
|
|
|
\title{Generates test datasets.}
|
|
|
|
\usage{
|
|
|
|
dataset(name = "M1", n = NULL, p = 20, sd = 0.5, ...)
|
|
|
|
}
|
|
|
|
\arguments{
|
|
|
|
\item{name}{One of \code{"M1"}, \code{"M2"}, \code{"M3"}, \code{"M4",}
|
|
|
|
\code{"M5"}, \code{"M6"} or \code{"M7"}. Alternative just the dataset number
|
|
|
|
1-7.}
|
|
|
|
|
|
|
|
\item{n}{number of samples.}
|
|
|
|
|
|
|
|
\item{p}{Dimension of random variable \eqn{X}.}
|
|
|
|
|
|
|
|
\item{sd}{standard diviation for error term \eqn{\epsilon}.}
|
|
|
|
|
|
|
|
\item{...}{Additional parameters only for "M2" (namely \code{pmix} and
|
|
|
|
\code{lambda}), see: below.}
|
|
|
|
}
|
|
|
|
\value{
|
|
|
|
List with elements
|
|
|
|
\itemize{
|
|
|
|
\item{X}{data, a \eqn{n\times p}{n x p} matrix.}
|
|
|
|
\item{Y}{response.}
|
|
|
|
\item{B}{the dim-reduction matrix}
|
|
|
|
\item{name}{Name of the dataset (name parameter)}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
\description{
|
|
|
|
Provides sample datasets M1-M7 used in the paper Conditional variance
|
|
|
|
estimation for sufficient dimension reduction, Lukas Fertl, Efstathia Bura.
|
|
|
|
The general model is given by:
|
|
|
|
\deqn{Y = g(B'X) + \epsilon}
|
|
|
|
}
|
|
|
|
\section{M1}{
|
|
|
|
|
|
|
|
The predictors are distributed as
|
|
|
|
\eqn{X\sim N_p(0, \Sigma)}{X ~ N_p(0, \Sigma)} with
|
|
|
|
\eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
|
|
|
\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 1} with a default
|
|
|
|
of \eqn{n = 100} data points. \eqn{p = 20},
|
|
|
|
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)}, and \eqn{Y} is
|
|
|
|
given as \deqn{Y = cos(b_1'X) + \epsilon} where \eqn{\epsilon} is
|
|
|
|
distributed as generalized normal distribution with location 0,
|
|
|
|
shape-parameter 0.5, and the scale-parameter is chosen such that
|
|
|
|
\eqn{Var(\epsilon) = 0.5}.
|
|
|
|
}
|
|
|
|
|
|
|
|
\section{M2}{
|
|
|
|
|
|
|
|
The predictors are distributed as \eqn{X \sim Z 1_p \lambda + N_p(0, I_p)}{X ~ Z 1_p \lambda + N_p(0, I_p)}. with
|
|
|
|
\eqn{Z \sim 2 Binom(p_{mix}) - 1\in\{-1, 1\}}{Z~2Binom(pmix)-1} where
|
|
|
|
\eqn{1_p} is the \eqn{p}-dimensional vector of one's, for a subspace
|
|
|
|
dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
|
|
|
\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
|
|
|
and \eqn{Y} is \deqn{Y = cos(b_1'X) + 0.5\epsilon} where \eqn{\epsilon} is
|
|
|
|
standard normal.
|
|
|
|
Defaults for \code{pmix} is 0.3 and \code{lambda} defaults to 1.
|
|
|
|
}
|
|
|
|
|
|
|
|
\section{M3}{
|
|
|
|
|
|
|
|
The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
|
|
|
for a subspace
|
|
|
|
dimension of \eqn{k = 1} with a default of \eqn{n = 100} data points.
|
|
|
|
\eqn{p = 20}, \eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
|
|
|
and \eqn{Y} is
|
|
|
|
\deqn{Y = 2 log(|b_1'X| + 2) + 0.5\epsilon} where \eqn{\epsilon} is
|
|
|
|
standard normal.
|
|
|
|
}
|
|
|
|
|
|
|
|
\section{M4}{
|
|
|
|
|
|
|
|
The predictors are distributed as \eqn{X\sim N_p(0,\Sigma)}{X~N_p(0,\Sigma)}
|
|
|
|
with \eqn{\Sigma_{i, j} = 0.5^{|i - j|}}{\Sigma_ij = 0.5^|i - j|} for
|
|
|
|
\eqn{i, j = 1,..., p} for a subspace dimension of \eqn{k = 2} with a default
|
|
|
|
of \eqn{n = 100} data points. \eqn{p = 20},
|
|
|
|
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
|
|
|
\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
|
|
|
and \eqn{Y} is given as \deqn{Y = \frac{b_1'X}{0.5 + (1.5 + b_2'X)^2} + 0.5\epsilon}{Y = (b_1'X) / (0.5 + (1.5 + b_2'X)^2) + 0.5\epsilon}
|
|
|
|
where \eqn{\epsilon} is standard normal.
|
|
|
|
}
|
|
|
|
|
|
|
|
\section{M5}{
|
|
|
|
|
|
|
|
The predictors are distributed as \eqn{X\sim U([0,1]^p)}{X~U([0, 1]^p)}
|
|
|
|
where \eqn{U([0, 1]^p)} is the uniform distribution with
|
|
|
|
independent components on the \eqn{p}-dimensional hypercube for a subspace
|
|
|
|
dimension of \eqn{k = 2} with a default of \eqn{n = 200} data points.
|
|
|
|
\eqn{p = 20},
|
|
|
|
\eqn{b_1 = (1,1,1,1,1,1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_1 = (1,1,1,1,1,1,0,...,0)' / sqrt(6)},
|
|
|
|
\eqn{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / \sqrt{6}\in\mathcal{R}^p}{b_2 = (1,-1,1,-1,1,-1,0,...,0)' / sqrt(6)}
|
|
|
|
and \eqn{Y} is given as \deqn{Y = cos(\pi b_1'X)(b_2'X + 1)^2 + 0.5\epsilon}
|
|
|
|
where \eqn{\epsilon} is standard normal.
|
|
|
|
}
|
|
|
|
|
|
|
|
\section{M6}{
|
|
|
|
|
|
|
|
The predictors are distributed as \eqn{X\sim N_p(0, I_p)}{X~N_p(0, I_p)}
|
|
|
|
for a subspace dimension of \eqn{k = 3} with a default of \eqn{n = 200} data
|
|
|
|
point. \eqn{p = 20, b_1 = e_1, b_2 = e_2}, and \eqn{b_3 = e_p}, where
|
|
|
|
\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
|
|
|
\eqn{Y} is given as \deqn{Y = (b_1'X)^2+(b_2'X)^2+(b_3'X)^2+0.5\epsilon}
|
|
|
|
where \eqn{\epsilon} is standard normal.
|
|
|
|
}
|
|
|
|
|
|
|
|
\section{M7}{
|
|
|
|
|
|
|
|
The predictors are distributed as \eqn{X\sim t_3(I_p)}{X~t_3(I_p)} where
|
|
|
|
\eqn{t_3(I_p)} is the standard multivariate t-distribution with 3 degrees of
|
|
|
|
freedom, for a subspace dimension of \eqn{k = 4} with a default of
|
|
|
|
\eqn{n = 200} data points.
|
|
|
|
\eqn{p = 20, b_1 = e_1, b_2 = e_2, b_3 = e_3}, and \eqn{b_4 = e_p}, where
|
|
|
|
\eqn{e_j} is the \eqn{j}-th unit vector in the \eqn{p}-dimensional space.
|
|
|
|
\eqn{Y} is given as \deqn{Y = (b_1'X)(b_2'X)^2+(b_3'X)(b_4'X)+0.5\epsilon}
|
|
|
|
where \eqn{\epsilon} is distributed as generalized normal distribution with
|
|
|
|
location 0, shape-parameter 1, and the scale-parameter is chosen such that
|
|
|
|
\eqn{Var(\epsilon) = 0.25}.
|
|
|
|
}
|
|
|
|
|
|
|
|
\references{
|
2021-03-05 13:52:45 +00:00
|
|
|
Fertl, L. and Bura, E. (2021), Conditional Variance
|
|
|
|
Estimation for Sufficient Dimension Reduction.
|
|
|
|
arXiv:2102.08782
|
2019-12-16 16:34:35 +00:00
|
|
|
}
|