-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclustering.py
103 lines (85 loc) · 3.61 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import networkx
class Clustering:
EPSILON = 0.001 # Weight to use on matching graph instead of 0.
def __init__(self, alpha, clustering_lol):
self.alpha = alpha
self.clustering = clustering_lol
self.k = len(self.clustering)
self.labeling = [ i for i in range(0, self.k) ]
def set_labeling(self, label_list):
self.labeling = label_list
def get_labeling(self):
return self.labeling
def get_dict_id_labels(self):
id_labels = {}
for cluster, label in zip(self.clustering, self.labeling):
for point in cluster:
id_labels[point] = label
return id_labels
def node_label(cat, num):
return str(cat) + "." + str(num)
def get_index(node_label):
return int(node_label.split(".")[-1])
def get_intersection_weight(clustlist1, clustlist2):
intersection = set(clustlist1).intersection(clustlist2)
weight = len(intersection)
return weight
def get_percent_weight(clustlist1, clustlist2):
print(" clust1 size:" + str(len(clustlist1)) + " clust2 size:" + str(len(clustlist2)))
intersection = set(clustlist1).intersection(clustlist2)
weight = len(intersection)
print(" intersection size:" + str(weight))
if weight == 0:
return 0
percent = weight / len(clustlist1)
print(" percent:" + str(percent))
return percent
def make_bipartite_graph(clustering1, clustering2):
k = clustering1.k
assert(k == clustering2.k)
graph = networkx.Graph()
for i in range(0, k):
n1 = Clustering.node_label(1, i)
graph.add_node(n1, bipartite=0)
for i in range(0, k):
n2 = Clustering.node_label(2, i)
graph.add_node(n2, bipartite=1)
for i in range(0, k):
for j in range(0, k):
clust1 = clustering1.clustering[i]
clust2 = clustering2.clustering[j]
n1 = Clustering.node_label(1, i)
n2 = Clustering.node_label(2, j)
weight = Clustering.get_intersection_weight(clust1, clust2)
print("n1:" + n1 + " n2:" + n2 + " weight:" + str(weight))
if weight == 0:
weight = Clustering.EPSILON
graph.add_edge(n1, n2, weight = weight)
return graph
def matching_to_dict(matching_dict_tuples):
matching = {}
for start, end in matching_dict_tuples:
matching[start] = end
matching[end] = start
return matching
def set_labeling_maxmatching(clustering_start, clustering_end):
k = clustering_start.k
graph = Clustering.make_bipartite_graph(clustering_start, clustering_end)
# matching = networkx.bipartite.maximum_matching(graph)
matching_tuples = networkx.algorithms.max_weight_matching(
graph, maxcardinality=False, weight='weight')
matching = Clustering.matching_to_dict(matching_tuples)
print(matching)
startlabeling = clustering_start.get_labeling()
print("start labels:" + str(startlabeling))
endlabeling = []
for i in range(0, k):
startnode = Clustering.node_label(1, i)
nodematch = matching[startnode]
j = Clustering.get_index(nodematch)
startlabel = startlabeling[j]
endlabeling.append(startlabel)
clustering_end.set_labeling(endlabeling)
# c1 = Clustering(0.5, [[1,2],[2,3],[1,3]])
# c2 = Clustering(0.5, [[1,2],[1,3],[2,3]])
# Clustering.set_labeling_maxmatching(c1, c2)