-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path272-特征嵌入法选择.py
38 lines (25 loc) · 945 Bytes
/
272-特征嵌入法选择.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# 嵌入法基本用法
import pandas as pd
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import cross_val_score
data = pd.read_csv('./datas/digit_recognizor_simple.csv')
x = data.iloc[:, 1:]
y = data.iloc[:, 0]
RFC_ = RFC(random_state=42)
x_embedded = SelectFromModel(RFC_, threshold=0.0005).fit_transform(x, y)
print(x_embedded.shape) #(1000, 351)
score = cross_val_score(RFC_, x_embedded, y, cv=10).mean()
print(score) # 0.88
# 学习曲线调参
import numpy as np
import matplotlib.pyplot as plt
scores = []
thresholds = np.linspace(0, RFC_.fit(x, y).feature_importances_.max(), 20)
for ts in thresholds:
x_embedded = SelectFromModel(RFC_, threshold=ts).fit_transform(x, y)
score = cross_val_score(RFC_, x_embedded, y, cv=10).mean()
scores.append(score)
plt.plot(thresholds, scores)
plt.xticks(thresholds)
plt.show()