protect(): protection stack overflow in Ranger

Published by onesixx on

https://github.com/imbs-hl/ranger/issues/103

due to the formula interface

rfModel <- ranger(Species~. , data = iris, mtry = 2, num.trees = 1000, importance = “impurity”, probability = T)

rfModel <- ranger(dependent.variable.name = “Species”, data = iris, mtry = 2, num.trees = 1000, importance = “impurity”, probability = T)

importance_pvalues() function 

library(ranger)

num.permutations <- 100

# Run RF
rf <- ranger(dependent.variable.name = "Species", data = iris, importance = "permutation")

# Permute and compute importance again (be sure to use same parameters as above)
vimp <- replicate(num.permutations, {
  dat <- iris
  dat[, "Species"] <- dat[sample(nrow(dat)), "Species"]
  ranger(dependent.variable.name = "Species", data = dat, importance = "permutation")$variable.importance
})

# Compute p-values
pval <- sapply(1:nrow(vimp), function(i) {
  (sum(vimp[i, ] >= rf$variable.importance[i]) + 1)/(ncol(vimp) + 1)
})

res <- cbind(rf$variable.importance, pval)
colnames(res) <- c("importance", "pvalue")

res
Categories: R Analysis

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x