-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathsoil_plasticity_classifier.py
52 lines (39 loc) · 1.54 KB
/
soil_plasticity_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Import libraries
import pandas as pd
import numpy as np
# Columns visible with vision algorithm.
columns = ["mottles", "structure", "texture", "primary_colour", "strength", "secondary_colour", "plasticity"]
# Read in the csv.
df = pd.read_csv(r"C:/Users/Tim/Desktop/SOILAIPROJECT/soil_plasticity_data_encoded.csv")
df = df[columns]
# Show the value counts of plasticity.
print(df["plasticity"].value_counts())
print(df["primary_colour"].value_counts())
# Split the X and y data.
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
# Split to train and test data.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
# Train an algorithm to test accuracy.
from sklearn.ensemble import RandomForestClassifier
rand_for = RandomForestClassifier(n_estimators=50, random_state=42)
# Train the classifier.
rand_for.fit(X_train, y_train)
# Predict y_test.
y_pred = rand_for.predict(X_test)
# What's the accuracy?
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
# Try with xgboost.
from xgboost import XGBClassifier
xgb = XGBClassifier(n_estimators=500, objective="multi:softmax", max_depth=3)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
# Try with nearest neighbours.
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=30)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)