#Kriging code
install.packages(c("sp","gstat"))
install.packages("ggplot2")
library(sp)
library(gstat)

#code to simulate data
xy <- expand.grid(1:100, 1:100)
names(xy) <- c('long','lat')
first <- gstat(formula=z~1, locations=~long+lat, dummy=T, beta=1, model=vgm(psill=0.025, range=5, model='Exp'), nmax=20)
#beta is used only for simple kriging, and is a vector with the trend coefficients (including an intercept)
#if no independent variables are defined the model (z~1) only contains an intercept, i.e. the simple kriging mean.
first <- predict(first, newdata=xy, nsim=1)
gridded(first) = ~long+lat
spplot(first)
write.csv(first,file="C://Location/data.csv")

#code to enter data
setwd("C://Location")
read.csv("C://Location/data.csv")
#If you want a sample you can create one in excel\
#or add an observations column with rep() and use runif() to create a vector to sample by

#plotting
head(data) #what are the colnames CASE sensitive
library(ggplot2)
hist(data$colname)
qqplot(data$colname)
qqline(data$colname)

#Had data named plants with a count response, x, and y column
#Added points from a different data set called potential_sites, columns x and y
qplot(x, y, data = plants, geom="point", color = count, main="Plant Abundance and Potential Sites")+
scale_color_gradient(low = "brown", high = "green")+
geom_point(aes(x,y), data=potential_sites, color="black")

#If you want to look at graphs in a new window
windows()
qplot()
plot(plants$x, plants$count)
plot(plants$y, plants$count)
plot(variogram(count ~ x, data = plants, locations = ~ x + y)) #trend in x
#default cutoff sqrt(100^2+100^2)/3, 100x100 grid 
#default lagsize/binsize sqrt(100^2+100^2)/3/15
vsimp <- variogram(count~1, data=plants, locations = ~x+y, cutoff=45, width=3) #change bins

#try starting values range 15 model Spherical, partial sill 500, nugget 2
fit1 <- fit.variogram(vsimp, vgm(15, "Sph", 500, 2) )
fit1
plot(vsimp, fit1)
mod1 <- krige(count~1, data=plants, locations = ~ x + y, model=fit1, newdata=potential_sites)
#nmax for nearest neighbors
mod1 <- krige(count~1, data=plants, locations = ~ x + y, model=fit1, newdata=potential_sites, nmax=20 )

#Cross validation
cross <- krige.cv(count~1, data=plants,locations=~x+y, fit1)
mean(cross$residual)
sd(cross$residual)
mean(cross$zscore)
sd(cross$zscore)
mean(cross$residual/cross$zscore)