-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathkmeans_example
57 lines (41 loc) · 1.59 KB
/
kmeans_example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# -*- coding: utf-8 -*-
"""Untitled2.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1aK5P9sD9vLwrY_tyzrIUtz8IsmJOYbwF
"""
from sklearn.datasets import load_breast_cancer
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import scale
import pandas as pd
bc = load_breast_cancer()
print(bc)
X = scale(bc.data)
print(X)
y = bc.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = KMeans(n_clusters=2, random_state=0)
model.fit(X_train)
predictions = model.predict(X_test)
labels = model.labels_
print("labels: ", labels)
print("Predictions: ", predictions)
print("accuracy: ", accuracy_score(y_test, predictions))
print("Actual: ", y_test)
from sklearn import metrics
# Commented out IPython magic to ensure Python compatibility.
def bench_k_means(estimator, name, data):
estimator.fit(data)
print('%-9s\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
# % (name, estimator.inertia_,
metrics.homogeneity_score(y, estimator.labels_),
metrics.completeness_score(y, estimator.labels_),
metrics.v_measure_score(y, estimator.labels_),
metrics.adjusted_rand_score(y, estimator.labels_),
metrics.adjusted_mutual_info_score(y, estimator.labels_),
metrics.silhouette_score(data, estimator.labels_,
metric='euclidean')))
bench_k_means(model, "1", X)
print(pd.crosstab(y_train, labels))