通过阈值筛选随机森林的重要特征

14.7 Selecting Important Features in Random Forests
# 筛选重要特征
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
from sklearn.feature_selection import SelectFromModel


iris = datasets.load_iris()
features = iris.data
target = iris.target

randomforest = RandomForestClassifier(random_state=0, n_jobs=-1)

# create object that selects features with importance greater
# than or requal to a threshold
# 选择重要性大于阈值的特征
selector = SelectFromModel(randomforest, threshold=0.9)
# 创建新的特征矩阵
features_important = selector.fit_transform(features, target)
model = randomforest.fit(features_important, target)