### Source codes accompanying the paper ``Revisiting the random shift approach for testing in spatial statistics''
### by Tomáš Mrkvička, Jiří Dvořák, Jonatan Gonzáles and Jorge Mateu,
### submitted to Spatial Statistics.

### This file provides an example of the testing procedures for 
### testing independence of a pair of random fields.

### Version 5.2.2020


### Required external packages ###
##################################

library(spatstat)
library(geoR)


### Simulated data ###
######################

# test.points ... the set of sampling locations, here represented as a point pattern
#                 (the default observation window is the unit square)
# covariateA .... one of the random fields, available in the whole observation window,
#                 here represented as a pixel image
# covariateB .... one of the random fields, can be observed only at sampling points,
#                 here represented as a pixel image but only values at test.points are used

auxA <- attr(rLGCP("exp", mu=0, var=1, scale=0.2, saveLambda=TRUE),"Lambda")
auxB <- attr(rLGCP("exp", mu=0, var=1, scale=0.2, saveLambda=TRUE),"Lambda")
covariateA <- eval.im(log(auxA))
covariateB <- eval.im(log(auxB))
test.points <- runifpoint(100)


### Random shift with torus correction ###
##########################################

# Number of random shifts to be used in the test
N.perm <- 999

observed.covariance <- cov(covariateA[test.points],covariateB[test.points])
values.simulated <- rep(NA, times=N.perm)

for (k in 1:N.perm){
  test.points.shift <- rshift(test.points, edge="torus", radius=0.5)
  values.simulated[k] <- cov(covariateA[test.points.shift],covariateB[test.points])
}

test.rank <- rank(c(observed.covariance,values.simulated))[1]
p.value.torus <- 2*min(test.rank, N.perm+1-test.rank)/(N.perm+1)
p.value.torus


### Random shift with minus correction ###
##########################################

# Number of random shifts to be used in the test
N.perm <- 999

W.central <- owin(xrange=c(1/3,2/3),yrange=c(1/3,2/3))
test.points.central <- test.points[W.central]
valuesA.central <- covariateA[test.points.central]

observed.covariance <- cov(valuesA.central,covariateB[test.points.central])
values.simulated <- rep(NA, times=N.perm)

for (k in 1:N.perm){
  points.shift <- shift.ppp(test.points.central, vec=runif(2, min=-1/3, max=1/3))
  valuesB.shift <- covariateB[points.shift]
  values.simulated[k] <- cov(valuesA.central,valuesB.shift)
}
test.rank <- rank(c(observed.covariance,values.simulated))[1]
p.value.minus <- 2*min(test.rank, N.perm+1-test.rank)/(N.perm+1)
p.value.minus


### Random shift with variance correction (count) ###
#####################################################

# Number of random shifts to be used in the test
N.perm <- 999

values.simulated <- rep(NA, times=N.perm+1)
values.simulated[1] <- cov(covariateA[test.points],covariateB[test.points])
n.simulated <- rep(NA, times=N.perm+1)
n.simulated[1] <- test.points$n

# Random shifts
for (k in 1:N.perm){
  jump <- runifdisc(1, radius = 0.5)
  test.points.shifted <- shift(test.points, c(jump$x,jump$y))
  W.reduced <- intersect.owin(test.points$window, test.points.shifted$window)
  test.points.reduced <- test.points[W.reduced]
  test.points.reduced.backshifted <- shift(test.points.reduced, c(-jump$x,-jump$y))
  covA <- covariateA[test.points.reduced.backshifted]
  covB <- covariateB[test.points.reduced]
  values.simulated[k+1] <- cov(covA,covB)
  n.simulated[k+1] <- test.points.reduced$n
}

# Variance correction (count)
values.std <- (values.simulated - mean(values.simulated))*sqrt(n.simulated)
test.rank <- rank(values.std)[1]
p.value.count <- 2*min(test.rank, N.perm+1-test.rank)/(N.perm+1)
p.value.count


### Random shift with variance correction (kernel) ###
######################################################

# Number of random shifts to be used in the test
N.perm <- 999

# Value of bandwidth to be used in the kernel regression
bw <- 0.10

# Epanechnikov kernel used for determining the weights in the regression
epanechnikov <- function(x,bw){
  return(pmax(0,(1/bw)*3/4*(1-(x/bw)^2)))
}

values.simulated <- rep(NA, times=N.perm+1)
values.simulated[1] <- cov(covariateA[test.points],covariateB[test.points])
n.simulated <- rep(NA, times=N.perm+1)
n.simulated[1] <- test.points$n
shift.vecs <- matrix(data=NA, nrow=N.perm+1, ncol=2)
shift.vecs[1,] <- c(0,0)

# Random shifts
for (k in 1:N.perm){
  jump <- runifdisc(1, radius = 0.5)
  shift.vecs[k+1,] <- c(jump$x,jump$y)
  test.points.shifted <- shift(test.points, c(jump$x,jump$y))
  W.reduced <- intersect.owin(test.points$window, test.points.shifted$window)
  test.points.reduced <- test.points[W.reduced]
  test.points.reduced.backshifted <- shift(test.points.reduced, c(-jump$x,-jump$y))
  covA <- covariateA[test.points.reduced.backshifted]
  covB <- covariateB[test.points.reduced]
  values.simulated[k+1] <- cov(covA,covB)
  n.simulated[k+1] <- test.points.reduced$n
}

# Standardization using kernel regression (locally constant regression),
# compute matrix of pairwise distances between the shift vectors
diff_ij <- function(i,j) sqrt(rowSums((shift.vecs[i,]-shift.vecs[j,])^2))
dist.mat <- outer(seq_len(N.perm+1), seq_len(N.perm+1), diff_ij)

# Variance correction (kernel)
values.simulated.std2 <- (values.simulated-mean(values.simulated))^2
mvar <- rep(NA, times=N.perm+1)
for (i in 1:(N.perm+1)){
  wis <- epanechnikov(dist.mat[i,], bw=bw) # weights in the regression
  mvar[i] <- sum(wis*values.simulated.std2)/sum(wis)
}

values.std <- (values.simulated - mean(values.simulated))/sqrt(mvar)
test.rank <- rank(values.std)[1]
p.value.kernel <- 2*min(test.rank, N.perm+1-test.rank)/(N.perm+1)
p.value.kernel



### Random shift with variance correction (exact variance) ###
##############################################################

# Number of random shifts to be used in the test
N.perm <- 999

values.simulated <- rep(NA, times=N.perm+1)
values.simulated[1] <- cov(covariateA[test.points],covariateB[test.points])
var.simulated <- rep(NA, times=N.perm+1)
n <- test.points$n
dist.mat <- pairdist(test.points)

# Estimate variogram model for covariateA
geo.A <- data.frame(x=test.points$x, y=test.points$y, z=covariateA[test.points])
geo.A <- as.geodata(geo.A)
v.A <- variog(geo.A, uvec=seq(0,1,by=0.05), messages=FALSE)
obj.A <- function(par){# par[1] = sigma^2, par[2] = s
  sum( (v.A$v - par[1]*(1-exp(-v.A$u/par[2])) )^2)
}
est.par.A <- optim(par=c(1,1), fn=obj.A)$par
cov.mat.A <- est.par.A[1]*exp(-dist.mat/est.par.A[2])

# Estimate variogram model for covariateB
geo.B <- data.frame(x=test.points$x, y=test.points$y, z=covariateB[test.points])
geo.B <- as.geodata(geo.B)
v.B <- variog(geo.B, uvec=seq(0,1,by=0.05), messages=FALSE)
obj.B <- function(par){
  # par[1] = sigma^2, par[2] = s
  sum( (v.B$v - par[1]*(1-exp(-v.B$u/par[2])) )^2)
}
est.par.B <- optim(par=c(1,1), fn=obj.B)$par
cov.mat.B <- est.par.B[1]*exp(-dist.mat/est.par.B[2])

# Variance of the test statistics for the observed data
r.sums <- rowSums(cov.mat.A) %o% rep(1,times=n)
c.sums <- rep(1,times=n) %o% colSums(cov.mat.A)
rc.sum <- sum(rowSums(cov.mat.A))
A <- cov.mat.A + rc.sum/n^2 - r.sums/n - c.sums/n
r.sums <- rowSums(cov.mat.B) %o% rep(1,times=n)
c.sums <- rep(1,times=n) %o% colSums(cov.mat.B)
rc.sum <- sum(rowSums(cov.mat.B))
B <- cov.mat.B + rc.sum/n^2 - r.sums/n - c.sums/n
var.simulated[1] <- sum(rowSums(A*B))/(n-1)^2

# Random shifts
for (k in 1:N.perm){
  jump <- runifdisc(1, radius = 0.5)
  test.points.shifted <- shift(test.points, c(jump$x,jump$y))
  W.reduced <- intersect.owin(test.points$window, test.points.shifted$window)
  test.points.reduced <- test.points[W.reduced]
  test.points.reduced.backshifted <- shift(test.points.reduced, c(-jump$x,-jump$y))
  covA <- covariateA[test.points.reduced.backshifted]
  covB <- covariateB[test.points.reduced]
  values.simulated[k+1] <- cov(covA,covB)
  
  # Which points are not discarded by the random shift
  keep <- inside.owin(x=test.points$x, y=test.points$y, w=W.reduced)
  n.reduced <- sum(keep)
  
  # Variance of the test statistics for the shifted data
  cov.mat.A.red <- cov.mat.A[keep,keep]
  cov.mat.B.red <- cov.mat.B[keep,keep]
  r.sums <- rowSums(cov.mat.A.red) %o% rep(1,times=n.reduced)
  c.sums <- rep(1,times=n.reduced) %o% colSums(cov.mat.A.red)
  rc.sum <- sum(rowSums(cov.mat.A.red))
  A <- cov.mat.A.red + rc.sum/n.reduced^2 - r.sums/n.reduced - c.sums/n.reduced
  r.sums <- rowSums(cov.mat.B.red) %o% rep(1,times=n.reduced)
  c.sums <- rep(1,times=n.reduced) %o% colSums(cov.mat.B.red)
  rc.sum <- sum(rowSums(cov.mat.B.red))
  B <- cov.mat.B.red + rc.sum/n.reduced^2 - r.sums/n.reduced - c.sums/n.reduced
  var.simulated[k+1] <- sum(rowSums(A*B))/(n.reduced-1)^2
}

# Variance corrections
values.std <- (values.simulated - mean(values.simulated))/sqrt(var.simulated)
test.rank <- rank(values.std)[1]
p.value.var <- 2*min(test.rank, N.perm+1-test.rank)/(N.perm+1)
p.value.var