Estimated Time: ~40 minutes
In this tutorial, we will train a RFsp model based on encoding positional information but also considering the buffer distance maps from observation points as covariates accounting for surrounding conditions. Random Forest for Spatial Data (RFsp) (Hengl et al., 2018) is an adaptation of the traditional RF method to account for spatial dependencies in data.
Inputs:
Output
setwd("C:/Users/raza/project")
base_path <- "C:/Users/raza/project/plot"
library(devtools)
#install_github("envirometrix/landmap")
library(landmap)
year_ranges <- c("2001-05")
for (year in year_ranges) {
# Load spatiotemporal data frame for the current year
start_time <- Sys.time()
cat("Processing year:", year, "\n")
load(file = paste0("stfdf_", year, ".rda"))
covariates <- c("GWD", "Lat", "Lon", "SLOPE", "ASP", "DEM", "LNF", "TPI", "TWI", "TRI", "PREC", "TEMP", "NDVI", "ET", "EVI")
### Create BBOX ###
xmin <- min(stfdf@sp@coords[, "lon"]) - 1000
xmax <- max(stfdf@sp@coords[, "lon"]) + 1000
ymin <- min(stfdf@sp@coords[, "lat"]) - 1000
ymax <- max(stfdf@sp@coords[, "lat"]) + 1000
bbox <- extent(xmin, xmax, ymin, ymax)
bbox <- as(bbox, "SpatialPolygons")
bbox@proj4string <- CRS(utm33) #"+proj=longlat +datum=WGS84"
fishnet <- raster(bbox)
res(fishnet) <- c(1000, 1000) # 1 x 1 km
fishnet[] <- runif(length(fishnet), -10, 10)
fishnet <- as(fishnet, "SpatialPixelsDataFrame")
st <- as(fishnet, "SpatialPoints")
plot(fishnet)
#plot(border, add = TRUE)
extent(fishnet)
extent(stfdf@sp["staid"])
class(fishnet)
## Geographic distances only:
grid.distP <- landmap::buffer.dist(stfdf@sp["staid"], fishnet[1], as.factor(1:nrow(stfdf@sp)))
#print(grid.distP)
temp_df <- as.data.frame(stfdf)
GWD_df$staid <- as.integer(as.character(GWD_df$staid))
ov_toma <- do.call(cbind, list(stfdf@sp@data, over(stfdf@sp, grid.distP)))
GWD_df <- plyr::join(GWD_df, ov_toma, by = "staid")
GWD_df <- GWD_df[, c(covariates, paste("layer.", sort(unique(as.integer(as.character(GWD_df$staid)))), sep = ""))]
GWD_df <- GWD_df[complete.cases(GWD_df), ]
#colnames(GWD_df)
time <- sort(unique(GWD_df$time))
daysNum <- length(time)
days <- gsub("-", "", time, fixed = TRUE)
nrow(GWD_df)
ntree <- 250 # 500
# tune RFsp as Hengl et al. 2018 did
rt <- makeRegrTask(data = GWD_df[sample(nrow(GWD_df), 12000), ], target = "GWD")
estimateTimeTuneRanger(rt, num.threads = detectCores())
# Time-consuming >> do on number cruncher
set.seed(42)
t.rfsp <- tuneRanger(rt, num.trees = ntree, iters = 120, build.final.model = FALSE)
dev_parameters <- t.rfsp$recommended.pars
print(dev_parameters)
# Identify unique combinations of latitude and longitude
unique_coords <- unique(GWD_df[, c("Lat", "Lon")])
# Set a seed for reproducibility
set.seed(123)
# Create an index for splitting the data
index <- createDataPartition(1:nrow(unique_coords), p = 0.8, list = FALSE)
# Use the index to split the unique coordinates into training and testing sets
train_coords <- unique_coords[index, ]
test_coords <- unique_coords[-index, ]
# Merge the original data with the training and testing coordinates
train_data <- merge(GWD_df, train_coords, by = c("Lat", "Lon"))
test_data <- merge(GWD_df, test_coords, by = c("Lat", "Lon"))
# Select covariates
covariates <- colnames(GWD_df)
covariates <- covariates[covariates != "GWD"] # Assuming "GWD" is the response variable
# Subset the data frames with selected covariates
train_data <- train_data[, c("GWD", covariates)]
test_data <- test_data[, c("GWD", covariates)]
rfsp_model <- ranger(GWD ~ ., data = train_data, importance = "impurity", seed = 42,
num.trees = ntree, mtry = dev_parameters$mtry,
splitrule = "variance",
min.node.size = dev_parameters$min.node.size,
sample.fraction = dev_parameters$sample.fraction,
quantreg = TRUE) ### quantreg???
# Save the trained model for the current year
save(rfsp_model, file = paste0("../models/RFsp_", year, ".rda"))
# Create and save plot_data
plot_data <- data.frame(Lat = train_data$Lat, Lon = train_data$Lon, Observations = train_data$GWD, Predictions = rfsp_model$predictions)
plot_file <- paste0(base_path, (paste0("\\model_training\\rfsp_training_", year, ".csv")))
write.csv(plot_data, plot_file)
# Make predictions on the test data using the trained model
predictions <- predict(rfsp_model, data = test_data, type = "response")
# Add the predictions to the test_data dataframe
test_data$predicted_GWD <- predictions$predictions
test_data <- test_data[, c("GWD", "predicted_GWD", "Lat", "Lon")]
# Save test_data with predictions as a CSV file
test_file <- paste0(base_path, (paste0("\\model_testing\\rfsp_test_", year, ".csv")))
write.csv(test_data, test_file)
end_time <- Sys.time()
computation_time <- end_time - start_time
#cat("dev_parameters for year", year, ":", dev_parameters, "\n")
cat("Computation time for year", year, ":", computation_time, "\n")
# Save dev_parameters, RMSE, and computation_time to a text file
result_file <- paste0(base_path, "\\computation_times_rfsp.txt")
write(paste("Year:", year), file = result_file, append = TRUE)
write(paste("dev_parameters:", toString(dev_parameters)), file = result_file, append = TRUE)
#write(paste("RMSE:", min(rmse_hp, na.rm = TRUE)), file = result_file, append = TRUE)
write(paste("Computation time:", computation_time), file = result_file, append = TRUE)
write("\n", file = result_file, append = TRUE)
}