rm(list=ls());

###
#
#
#   10. Nonparametric regression
#
#
###

library(MASS);
library(locpol);

###
#
#   1. Motorcycle data
#
###

data(mcycle);
#      A data frame giving a series of measurements of head acceleration
#      in a simulated motorcycle accident, used to test crash helmets.
#      times: in milliseconds after impact.
#      accel: in g.

plot(mcycle);

X <- mcycle$times;
Y <- mcycle$accel;
 
x0 <- seq(min(X), max(X), by=0.1);

#   A. Fitting by global polynomials 

#   A naive function to fit polynomials of order p
#   for numerical reasons, it would be better to use the function 'poly'
fit.pol <- function(p, X, Y, COLOR="blue", draw=TRUE){
	
	n <- length(X)
	XX <- matrix(NA, n, p)
	
	for(i in 1:p){
		XX[,i] <- X^(i);
	}
	
	fit <- lm(Y~XX);
	
	print(summary(fit));
	
	XX0 <- matrix(NA, length(x0), p)
	
	for(i in 1:p){
		XX0[,i] <- x0^(i);
	}
	
	fitted <- cbind(1,XX0)%*%fit$coef;
	
	if(draw){
		lines(x0, fitted, col=COLOR, lwd=2);
	}
	
	invisible(fitted)
}

plot(mcycle);
fit.pol(2, X, Y, COLOR="red");
fit.pol(4, X, Y, COLOR="orange");
fit.pol(6, X, Y,  COLOR="yellow");
fit.pol(8, X, Y, COLOR="green");
fit.pol(10, X, Y, COLOR="blue");
fit.pol(12, X, Y, COLOR="darkblue");
legend("bottomright", lty=rep(1, 6), 
       col=c("red", "orange", "yellow", "green", "blue", "darkblue"), 
       legend=paste("p=", seq(2,12, by=2)))


#   B. Local constant and local linear fiting 

x0 <- seq(min(X), max(X), by=0.1);

#   Nadaraya - Watson's estimator

#   Local constant estimator, in the window we consider we take the mean of all
#   observations within the window as the resulting regression estimator

h = 5
d = 150
xe2 = seq(5,55,length = d)
ye = rep(NA,d)

for(i in 1:d){
  x = xe2[i]
  plot(mcycle, main = "Nadaraya - Watson (loc. pol. with p=0)")
  abline(v=x, lty=2)
  abline(v=x+c(-h,h), lty = 3, lwd=2)
  II = (x-h<=mcycle$times)&(x+h>=mcycle$times)
  points(mcycle[II,], col = 2, pch = 16)
  segments(x-h,ye[i]<-mean(mcycle[II,2]), x+h, ye[i], col = 2, lwd = 2)
  points(x,ye[i], pch = 15, cex=1.5, col = 2)
  lines(xe2[1:i],ye[1:i],col=2)
  Sys.sleep(10/d)
}

#   Local Linear estimator (p=1)

#   For this estimator, we fit a linear function to all data within the 
#   observation window. The point estimator of the regression function 
#   (locally an intercept when properly shifted) gives an estimator of the 
#   regression function. The slope, at the same time, gives a reasonable 
#   estimator of the first derivative of the regression function 

h = 15 # try also h = .5, or h = 50
for(i in 1:d){
  x = xe2[i]
  plot(mcycle, main = "Local Linear (loc. pol. with p=1)")
  abline(v=x, lty=2)
  abline(v=x+c(-h,h), lty = 3, lwd=2)
  II = (x-h<=mcycle$times)&(x+h>=mcycle$times)
  points(mcycle[II,], col = 2, pch = 16)
  #
  fit.lm = lm(Y[II]~I(X[II]-x))
  ye[i] = fit.lm$coef[1]
  #
  segments(x-h,ye[i]-h*fit.lm$coef[2], x+h, ye[i]+h*fit.lm$coef[2], col = 2, 
           lwd = 2)
  points(x,ye[i], pch = 15, cex=1.5, col = 2)
  lines(xe2[1:i],ye[1:i],col=2)
  Sys.sleep(10/d)
}

#   The previous illustrations were shown for the rectangular kernel
#   (only points within h from x are considered, all with the same weights)
#   Of course, as in kernel density estimation, other kernels and weighting 
#   schemes could be used analogously

fit = locpol(Y~X,deg=1, data=data.frame(X,Y), xeval=x0, kernel = EpaK, bw=h)
lines(fit$lpFit[,1], fit$lpFit[,2], lwd=2, col=3)
legend("bottomright",c("p=1 (rect. kernel)", "p=1 (Epan. kernel)"), col=c(2,3),
       lwd=2)

###
#
#   Work with package locpol
#
###

#   Local linear estimator p = 1
fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK);
# does not work.

#   The bandwidth is found by cross-validation. By default the bandwidth h is 
#   searched for in the interval '.lokestOptInt' which ranges from 0.0005 to 
#   1.5000, which causes often troubles. 

#   Banwdith choice through cross-validation 
(bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(1,15)));

#   Rule of thumb (ROT)
(bw.ROT <- thumbBw(X, Y, deg=1, kernel=EpaK))

#   ROT - manual calculations 
n <- nrow(mcycle);
fit <- lm(Y~X+I(X^2)+I(X^3)+I(X^4))
# sigmatilde2 <- sum(residuals(fit)^2)/(n-5);
sigmatilde2 <- sum(residuals(fit)^2)/n;
ddm <- cbind(0,0,1, X,X^2)%*%(coef(fit)*(0:4)*(-1:3))
(RK(EpaK)*sigmatilde2/(n*mu2K(EpaK)^2 * mean(ddm^2)))^(1/5)

#   Be careful with the scaling
thumbBw(X, Y, deg=1, kernel=EpaK)
thumbBw(2*X, Y, deg=1, kernel=EpaK)
thumbBw(3*X, Y, deg=1, kernel=EpaK)

#   We use bandwidth chosen through cross-validation 
BW <- bw.CV;

#   Local linear estimator p = 1 - the second attempt 
fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, 
                 kernel=EpaK, bw=BW);

#   Plots associatioted with 'locpol' object 
#   Beware! plot(locpol) alters the global plotting parameters
op = par(mfrow=c(1,1))
plot(1,1)
plot(fit.LL)
plot(1,1)
#   to fix this issue it must be wrapped in 
#   op = par() # save the current plotting parameters
#   and 
par(op) # change the plotting parameters back to op
#
plot(1,1)
#   Now it's ok again

#   Local constant estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
plot(X, Y);

lines(x0, fit.LL$lpFit$Y, col="green", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(12, X, Y, COLOR="darkblue"); 

legend("bottomright", lty=rep(1, 3) , col=c("green", "red", "darkblue"), 
       legend=c("local linear", "local constant (NW)", 
                "polynomial of order 12"), lwd=rep(2,3))


#   Comparison of the weights for the local linear smoother, and the 
#   Nadaraya-Watson smoother
weights.NW <- locCteWeightsC(x=X, xeval=X, kernel=EpaK, bw=bw.CV);
weights.LL <- locLinWeightsC(x=X, xeval=X, kernel=EpaK, bw=bw.CV); 

#

par(mfrow=c(3,3));
indexy <- round(seq(1,133,length=9));
for(i in 1:9){
	qqq <- as.logical(weights.NW$locWeig[indexy[i],] > 1e-10) | as.logical(
	  abs(weights.LL$locWeig[indexy[i],]) > 1e-10)
	plot(jitter(weights.NW$locWeig[indexy[i],qqq]), jitter(
	  weights.LL$locWeig[indexy[i],qqq]), cex=0.55, pch=16, 
	  main=paste("Weights at the point x =", X[indexy[i]]), 
	  ylab="Local linear weights", xlab="Nadaraya-Watson weights");
}
par(mfrow=c(1,1))

###
#
#   Comparison of local polynomial estimation for p=0,1,2,3 
#
###

compare.lp <- function(BW){
	
	# # Local polynomial fits 
	fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
	                 bw=BW);
	fit.LL2 <- locpol(Y~X, deg=2, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
	                  bw=BW);
  fit.LL3 <- locpol(Y~X, deg=3, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
                    bw=BW);


	# Local linear estimator p = 0 (Nadaraya-Watson)
	fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

	plot(X, Y, main=paste("Banwdith:", round(BW,2)));

	lines(x0, fit.NW$beta0, col="red", lwd=2)
	lines(x0, fit.LL$lpFit$Y, col="green", lwd=2);
	lines(x0, fit.LL2$lpFit$Y, col="blue", lwd=2);
	lines(x0, fit.LL3$lpFit$Y, col="darkblue", lwd=2);

	legend("bottomright", lty=rep(1, 4), 
	       col=c("red", "green", "blue", "darkblue"), 
	       legend=c("local constant (NW)", "local linear", "local quadratic",  
	                "local cubic"))
}

# # CV, p=1
(BW <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(1,15)));
compare.lp(BW)

# # CV, p=3
(BW <- regCVBwSelC(X, Y, deg=3, kernel=EpaK, interval=c(1,15)));
compare.lp(BW)


# # Comparison of local polynomial estimation for p=0,1,2,3
# # (for each p the bandwidth is chosen by CV)

# # Local polynomial fits 
(BW1 <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(1,15)));
fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
                 bw=BW1);
(BW2 <- regCVBwSelC(X, Y, deg=2, kernel=EpaK, interval=c(1,15)));
fit.LL2 <- locpol(Y~X, deg=2, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
                  bw=BW2);
(BW3 <- regCVBwSelC(X, Y, deg=3, kernel=EpaK, interval=c(1,15)));
fit.LL3 <- locpol(Y~X, deg=3, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
                  bw=BW3);


# Local linear estimator p = 0 (Nadaraya-Watson)
BW0 <- regCVBwSelC(X, Y, deg=0, kernel=EpaK, interval=c(1,15));
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=BW0); 


plot(X, Y, main=paste("Banwidths:", round(BW0,2), ",", round(BW1,2), ",", 
                      round(BW2,2), ",", round(BW3,2)));

lines(x0, fit.NW$beta0, col="red", lwd=2)
lines(x0, fit.LL$lpFit$Y, col="green", lwd=2);
lines(x0, fit.LL2$lpFit$Y, col="blue", lwd=2);
lines(x0, fit.LL3$lpFit$Y, col="darkblue", lwd=2);

legend("bottomright", lty=rep(1, 4), col=c("red", "green", "blue", "darkblue"),
       legend=c("local constant (NW)", "local linear", "local quadratic",  
                "local cubic"), lwd=rep(2,4))

###
#
#   Function Lowess, see Section 10.6
#
###

?lowess

# # Lowess - smoothing 
fit.lowess <- lowess(X, Y); 

#   The role of the bandwidth is played by parameter f = smoother span
#   By default f=2/3

plot(mcycle, main="f=2/3"); 
lines(fit.lowess, lwd=2);

#   The default values of 'the smoother span' is too large. 
plot(mcycle); 
lines(lowess(X, Y, f=1/2), col="red", lwd=2);
lines(lowess(X, Y, f=1/3), col="orange", lwd=2);
lines(lowess(X, Y, f=1/4), col="yellow", lwd=2);
lines(lowess(X, Y, f=1/5), col="green", lwd=2);
lines(lowess(X, Y, f=1/6), col="blue", lwd=2);
legend("bottomright", lty=rep(1, 6), 
       col=c("red", "orange", "yellow", "green", "blue"), 
       legend=paste("f=", round(1/(2:6),3)), lwd=rep(2,6));

# iter=0 vs iter=3
plot(mcycle, main="Lowess iter=0 vs. iter=3, f=1/4");
lines(lowess(X, Y, f=1/4, iter=0), col="red", lwd=2); 
# local linear fit with nearest neighbour bandwidth choice
lines(lowess(X, Y, f=1/4), col="blue", lwd=2); 
# robustified local linear fit with nearest neighbour bandwidth choice
legend("bottomright", lty=rep(1, 6), col=c("red", "blue"), 
       legend=c("Lowess - iter = 0", "Lowess - iter = 3"), lwd=rep(2,2));

# # Adding several outliers 
set.seed(1234567)
# outliers <- cbind(sample(X, size=5), sample(Y, size=5));
X1 <- c(X, 2+runif(5)*55);
Y1 <- c(Y, -134+24*runif(5));

n <- length(Y); # original sample size

plot(mcycle, main="Lowess iter=0 vs. iter=3, f=1/4");
points(X1[(n+1):(n+5)], Y1[(n+1):(n+5)], col="darkgreen", pch=4, cex=2, lwd=4); 
lines(lowess(X1, Y1, f=1/4, iter=0), col="red", lwd=2); 
# local linear fit with nearest neighbour bandwidth choice
lines(lowess(X1, Y1, f=1/4, iter=3), col="blue", lwd=2); 
# robustified local linear fit with nearest neighbour bandwidth choice
legend("topright", lty=rep(1, 6), col=c("red", "blue"), 
       legend=c("Lowess - iter = 0", "Lowess - iter = 3"), lwd=rep(2,2));


####
#
#   Section 10.5.4: An illustration how the nearest-neighbour type 
#                   bandwidth changes with x
#
####

n.x0 <- length(x0); 
bw <- numeric(n.x0);

k0 <- floor(1/4*n)

for(i in 1:n.x0){
  bw[i] <- sort(abs(x0[i]-X))[k0];  
}

par(mfrow=c(1,2));
plot(x0, bw, type="l", lwd=2, main=paste("Nearest neighbour bandwidth, k=n/4"))
rug(X);
#
plot(density(X), xlim=range(x0), type="l", lwd=2, 
     main="Density of the covariates")
rug(X);
par(mfrow=c(1,1));

###
#
#   Section 10.7: Conditional variance estimation
#
###

bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(1,15));
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=X, kernel=EpaK, bw=bw.CV); 
fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=X, kernel=EpaK, 
                 bw=bw.CV);

weights.NW <- locCteWeightsC(x=X, xeval=x0, kernel=EpaK, bw=2*bw.CV)$locWeig;
weights.LL <- locLinWeightsC(x=X, xeval=x0, kernel=EpaK, bw=2*bw.CV)$locWeig; 

sigma0.NW <- weights.NW%*%(Y^2) - (weights.NW%*%Y)^2;
sigma1.NW <- weights.NW%*%(Y-fit.NW$beta0)^2;

sigma0.LL <- weights.LL%*%(Y^2) - (weights.LL%*%Y)^2;
sigma1.LL <- weights.LL%*%(Y-fitted(fit.LL))^2;

table(sigma0.NW < 0);
table(sigma1.NW < 0);
table(sigma0.LL < 0);
table(sigma1.LL < 0);

plot(x0, sqrt(sigma0.NW), ylim=range(mcycle), type="l", ylab="", 
     main="Conditional standard deviation estimation", lwd=2); 
lines(x0, sqrt(sigma1.NW), lwd=2, lty="dashed");
lines(x0, sqrt(pmax(sigma0.LL,0)), col="blue", lwd=2); 
lines(x0, sqrt(pmax(sigma1.LL,0)), col="blue", lwd=2, lty="dashed"); 
abline(h=0, lty="dotted")
points(mcycle);
# 
legend(40,-70, lty=c("solid",  "dashed", "solid", "dashed"), lwd=c(2,2,2,2), 
       col=c("black", "black", "blue", "blue"), 
       legend=c("NW - direct", "NW - modified", "LL - direct", 
                "LL - modified"));


###
#
#   Example: GAGurine 
#
###

data(GAGurine);

#      Data were collected on the concentration of a chemical GAG in the
#      urine of 314 children aged from zero to seventeen years. The aim
#      of the study was to produce a chart to help a paediatrican to
#      assess if a child's GAG concentration is ''normal''.

#      Age: age of a child in years.
#      GAG: concentration of GAG (the units have been lost).

plot(GAGurine);

X <- GAGurine$Age;
Y <- GAGurine$GAG;

x0 <- seq(min(X), max(X), by=0.01);

#   Local linear estimator p = 1
fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK);
summary(fit.LL);

(bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(1,15)));
(bw.ROT <- thumbBw(X, Y, deg=1, kernel=EpaK))

#   Local linear estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
plot(X, Y)

lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(8, X, Y, COLOR="green"); 
legend("topright", lty=rep(1, 6), col=c("blue", "red", "green"), 
       lwd=2, legend=c("Local linear", "local constant (NW)", 
                       "polynomial of order 8"));

# 
op = par(mfrow=c(1,1))
plot(fit.LL);
par(op)

#
plot(X, Y); 
lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2);
lines(lowess(X, Y, f=1/4, iter=0),  col="green", lwd=2);
lines(lowess(X, Y, f=1/4),  col="darkgreen", lwd=2);
lines(lowess(X, Y, f=2*bw.CV/(max(X)-min(X)), iter=0),  col="red", lwd=2);
2*bw.CV/(max(X)-min(X)) # equivalent span, 
                        # if the desing points were uniformly distributed 

legend("topright", lty=rep(1, 6), col=c("blue", "green", "darkgreen", "red"), 
       legend=c("Local linear global bw", "Lowess - iter = 0, f=1/4", 
                "Lowess - iter = 3, f=1/3", paste("Lowess - iter = 0, f=", 
                                          round(2*bw.CV/(max(X)-min(X)), 3))));

###
#
#   Simulated example 1
#
### 

n <- 200;

set.seed(1234567);
X <- -2+4*sort(runif(n)); # random design Uniform(-2,2)
# X <- seq(-2, 2, length=n); # regular fixed design 

cond.mean <- function(x) x + 2*exp(-16*x^2);
mX <- cond.mean(X);
sigma <- 0.4;
eps <- rnorm(n); 
Y <- mX + sigma*eps;

plot(X, Y)
x0 <- seq(-2, 2, length=1000);

#   Local linear estimator p = 1
(fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK));
(bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(0.1,4)));
(bw.ROT <- thumbBw(X, Y, deg=1, kernel=EpaK))

#   Local linear estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
plot(X, Y)
lines(x0, cond.mean(x0), lwd=3)
lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(12, X, Y, COLOR="green"); 
lines(lowess(X, Y, f=2*fit.LL$bw/(max(X)-min(X)), iter=0), col="darkgrey", 
      lwd=2)
legend("bottomright", lty=rep(1, 4), lwd=rep(2, 4), 
       col=c("black", "blue", "red", "green", "darkgrey"), 
       legend=c("True mean function", "Local linear", "Local constant (NW)", 
                "polynomial of order 12", "lowess iter=0"));


###
#
#   Simulated example 2
#
### 

n <- 200;

set.seed(1234567);
X <- sqrt(1/2)*sort(rnorm(n)); # random design N(0,1/2)
# X <- seq(-2, 2, length=n); # regular fixed design 

cond.mean <- function(x) sin(2*x) + 2*exp(-16*x^2);
mX <- cond.mean(X);
sigma <- 0.3;
eps <- rnorm(n); 
Y <- mX + sigma*eps;

plot(X, Y)
x0 <- seq(min(X), max(X), length=1000);

#   Local linear estimator p = 1
(fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, 
                  kernel=EpaK));
(bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(0.1,4)));
(bw.ROT <- thumbBw(X, Y, deg=1, kernel=EpaK))

#   Local linear estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
temp <- fit.pol(12, X, Y, draw=FALSE);
plot(X, Y, ylim=range(Y, fit.LL$lpFit$Y, fit.NW$beta0))
lines(x0, cond.mean(x0), lwd=2)
lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(12, X, Y, COLOR="green"); 
lines(lowess(X, Y, f=2*fit.LL$bw/(max(X)-min(X)), iter=0), 
      col="darkgrey", lwd=2)
legend(0, -0.5, lty=rep(1, 4), lwd=rep(2, 4), 
       col=c("black", "blue", "red", "green", "darkgrey"), 
       legend=c("True mean function", "Local linear", "Local constant (NW)", 
                "polynomial of order 12", "lowess iter=0"));

###
#
#   Simulated example 3
#
###

n <- 200;

set.seed(1234567);
X <- sqrt(1/2)*sort(rnorm(n)); # random design N(0,1/2)
# X <- -2+4*sort(runif(n)); # random design Uniform(-2,2)
# X <- seq(-2, 2, length=n); # regular fixed design 

cond.mean <- function(x) 0.3*exp(-4*(x+1)^2) + 0.7*exp(-16*(x-1)^2);
mX <- cond.mean(X);
sigma <- 0.1;
eps <- rnorm(n); 
Y <- mX + sigma*eps;

plot(X, Y)
# x0 <- seq(-2, 2, length=1000);
x0 <- seq(min(X), max(X), length=1000);

#   Local linear estimator p = 1
(fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK));
(bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(0.1,4)));
(bw.ROT <- thumbBw(X, Y, deg=1, kernel=EpaK))

#   Local linear estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
plot(X, Y)
lines(x0, cond.mean(x0), lwd=2)
lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(12, X, Y, COLOR="green"); 
lines(lowess(X, Y, f=2*fit.LL$bw/(max(X)-min(X)), iter=0), col="pink", lwd=2)
legend(-1.5, 0.75, lty=rep(1, 4), lwd=rep(2, 4), 
       col=c("black", "blue", "red", "green", "pink"), 
       legend=c("True mean function", "Local linear", "Local constant (NW)", 
                "polynomial of order 12", "lowess iter=0"));

### 
#
#   Simulated example 4
#
### 

n <- 200;

set.seed(1234567);
X <- sqrt(1/5)*sort(abs(rnorm(n))); # random design N(0,1/5)
# X <- sort(runif(n)); # random design Uniform(0,1)
# X <- seq(-2, 2, length=n); # regular fixed design 
cond.mean <- function(x) 200 * (exp(-9.75*x) - 4*exp(-19.5*x) +
                                  3*exp(-29.25*x));
mX <- cond.mean(X);
sigma <- 5;
eps <- rnorm(n); 
Y <- mX + sigma*eps;

plot(X, Y)
# x0 <- seq(0, 1, length=1000);
x0 <- seq(min(X), max(X), length=1000);

#   Local linear estimator p = 1
(fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK));
(bw.CV <- regCVBwSelC(X, Y, deg=1, kernel=EpaK, interval=c(0.01,4)));
(bw.ROT <- thumbBw(X, Y, deg=1, kernel=EpaK))

#   Local linear estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
plot(X, Y)
lines(x0, cond.mean(x0), lwd=3)
lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(12, X, Y, COLOR="green"); 
lines(lowess(X, Y, f=fit.LL$bw/(max(X)-min(X)), iter=0), col="darkgrey", lwd=2)
legend(0.6, -30, lty=rep(1, 4), lwd=rep(2, 4), 
       col=c("black", "blue", "red", "green", "darkgrey"), 
       legend=c("True mean function", "Local linear", "Local constant (NW)", 
                "polynomial of order 12", "lowess iter=0"));

#   Once more with hand chosen bandwidths 

(fit.LL <- locpol(Y~X, deg=1, data=data.frame(X,Y), xeval=x0, kernel=EpaK, 
                  bw=0.1));

#   Local linear estimator p = 0 (Nadaraya-Watson)
fit.NW <- locCteSmootherC(x=X, y=Y, xeval=x0, kernel=EpaK, bw=fit.LL$bw); 

#   Nonparametric fit 
plot(X, Y)

lines(x0, cond.mean(x0), lwd=3)
lines(x0, fit.LL$lpFit$Y, col="blue", lwd=2)
lines(x0, fit.NW$beta0, col="red", lwd=2)
fit.pol(12, X, Y, COLOR="green"); 
lines(lowess(X, Y, f=2*fit.LL$bw/(max(X)-min(X)), iter=0), col="darkgrey", 
      lwd=2)

legend(0.6, -30, lty=rep(1, 4), lwd=rep(2, 4), 
       col=c("black", "blue", "red", "green", "darkgrey"), 
       legend=c("True mean function", "Local linear", "Local constant (NW)", 
                "polynomial of order 12", "lowess iter=0"));

###
#
#   Calculation of LOWESS 
#
###

set.seed(1234567)
X <- sort(rnorm(n));
Y <- sin(X) + rnorm(n)/10;

# 
lp0 <- numeric(n);
lp1 <- numeric(n);
bw <- numeric(n);

k0 <- floor(2/3*n)

K <- function(x) (1 - (abs(x))^3)^3 * (abs(x) < 1); 
# kernel for the local weights 
B <- function(x) (1 - x^2)^2 * (abs(x) < 1); # kernel for weighting residuals 

for(i in 1:n){
  bw[i] <- sort(abs(X[i]-X))[k0];  
# 	local linear fit p=1
  lp0[i] <- lm(Y~1 + I(X-X[i]), weights=K((X-X[i])/bw[i]))$coef[1]; 
}

n.iter <- 3;
res <- Y - lp0;

for (j in 1:n.iter){  
  delta <- B(res/(6*median(abs(res)))); 
  for(i in 1:n){    
    lp1[i] <- lm(Y~1 + I(X-X[i]), weights=K((X-X[i])/bw[i])*delta)$coef[1];
  }
  res <- Y - lp1;
}

par(mfrow=c(1,2))
plot(X, Y)
# Lowess
fit0 <- lowess(X, Y, iter=0, delta=0);
fit1 <- lowess(X, Y, delta=0); 

# #  Lowess R - "our implementation of lowess" -- initial. fits
summary(fit0$y-lp0)
# #  Lowess R - "our implementation of lowess" -- after 3 iterations
summary(fit1$y-lp1)

lines(X, sin(X))

lines(fit0, col="red", lwd=2);
lines(fit1, col="blue", lwd=2, lty="dashed");

legend("bottomright", cex=.8, lty=c("solid", "dashed"), lwd=rep(2, 2), 
       col=c("red", "blue"), legend=c("Lowess iter=0", "Lowess iter=3"));

# 
plot(fit0$x, fit0$y-lp0, type="l", lwd=2, ylim=range(fit0$y-lp0, fit1$y-lp1), 
     cex.main = .8, main="Difference of lowess function and hand calculations", 
     col="red", xlab="x", ylab="Difference");
lines(fit0$x, fit1$y-lp1, lty="dashed", lwd=2, col="blue")

legend("bottomright", cex=.8, lty=c("solid", "dashed"), lwd=rep(2, 2), 
       col=c("red", "blue"), 
       legend=c("Lowess iter=0 - Lowess iter=0", 
                "Lowess iter=3 - Lowess iter=3"));

par(mfrow=c(1,1))