protect(): protection stack overflow in Ranger
https://github.com/imbs-hl/ranger/issues/103
due to the formula interface
rfModel <- ranger(Species~. , data = iris, mtry = 2, num.trees = 1000, importance = “impurity”, probability = T)
rfModel <- ranger(dependent.variable.name = “Species”, data = iris, mtry = 2, num.trees = 1000, importance = “impurity”, probability = T)
importance_pvalues() function
library(ranger)
num.permutations <- 100
# Run RF
rf <- ranger(dependent.variable.name = "Species", data = iris, importance = "permutation")
# Permute and compute importance again (be sure to use same parameters as above)
vimp <- replicate(num.permutations, {
dat <- iris
dat[, "Species"] <- dat[sample(nrow(dat)), "Species"]
ranger(dependent.variable.name = "Species", data = dat, importance = "permutation")$variable.importance
})
# Compute p-values
pval <- sapply(1:nrow(vimp), function(i) {
(sum(vimp[i, ] >= rf$variable.importance[i]) + 1)/(ncol(vimp) + 1)
})
res <- cbind(rf$variable.importance, pval)
colnames(res) <- c("importance", "pvalue")
res