-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsemantics.py
134 lines (103 loc) · 5.17 KB
/
semantics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import numpy as np
from scipy import spatial
class Semantics:
"""
Vector based semantic Long Term Memory.
"""
__type_ids = []
#A dictionary mapping semantic represenations (semantic vectors) to predicate ids
__semantics = {}
#A dictionary mapping predicate ids to semantic vectors
__semantics_categories = {}
def _get_semantic_id(self, vector):
return "".join(list(map(str, vector)))
def get_semantic_category(self, semantic_vector):
"""
Returns the predicate type id which corresponds to the given semantic vector.
Use to recover the predicate type from a vector representation
"""
assert(semantic_vector.shape[0] == self.get_semantic_dimension())
sem_v_id = self._get_semantic_id(semantic_vector)
if sem_v_id in self.__semantics:
return self.__semantics[sem_v_id]
else:
return None
def add_category(self, predicate_type_id, semantic_vector):
"""
Creates a new semantic category or updates an existing one
"""
assert(semantic_vector.shape[0] == self.get_semantic_dimension())
sem_v_id = self._get_semantic_id(semantic_vector)
self.__semantics[sem_v_id] = predicate_type_id
self.__semantics_categories[predicate_type_id] = semantic_vector
def get_semantic_dimension(self):
"""
Returns the dimension of the semantic space
"""
return self.__sem_dim
def _populate_semantic_vector(self, predicate_type_id):
return np.random.uniform(size=self.get_semantic_dimension(), low=-1)
def get_semantic_vector(self, predicate_type_id, populate = False):
"""
Returns the semantic vector representation of a category identified by a predicate type id
If the category is not represented in current semantic LTM and the flag
'populate' is turned on, then the category is created and its representation
is populated with a random semantic vector
"""
if predicate_type_id in self.__semantics_categories:
return self.__semantics_categories[predicate_type_id]
if populate:
vector = self._populate_semantic_vector(predicate_type_id)
self.add_category(
predicate_type_id,
vector
)
return vector
else:
return None
def __init__(self, sem_dim):
"""
Constructs a new semantic Long Term Memory with the specified dimension of the semantic space
"""
self.__sem_dim = sem_dim
class LocalistSemantics(Semantics):
predicate_type_id_localist_units = {}
max_unit = 0
def _populate_semantic_vector(self, predicate_type_id):
v = np.zeros(shape=(self.get_semantic_dimension()))
if predicate_type_id not in self.predicate_type_id_localist_units:
assert(self.max_unit < self.get_semantic_dimension())
self.predicate_type_id_localist_units[predicate_type_id] = self.max_unit
self.max_unit += 1
v[self.predicate_type_id_localist_units[predicate_type_id]] = 1
return v
def __init__(self, semantic_dim):
Semantics.__init__(self, semantic_dim)
class WordEmbeddingsSemantics(Semantics):
def _populate_semantic_vector(self, predicate_type_id):
predicate_type_id_str = str(predicate_type_id)
with open(self.__words_embeddings_file, 'r') as embeddings_f:
for line in embeddings_f:
row = line.strip().split(' ')
if row[0].lower() == predicate_type_id_str.lower():
return np.array(list(map(float, row[1:])))
print("Can't find embedding for {}".format(predicate_type_id_str))
return Semantics._populate_semantic_vector(self, predicate_type_id)
def __init__(self, words_embeddings_file, semantic_dim):
self.__words_embeddings_file = words_embeddings_file
Semantics.__init__(self, semantic_dim)
class KazumaSemantics(WordEmbeddingsSemantics):
def __init__(self):
WordEmbeddingsSemantics.__init__(self, "kazuma.embeddings.txt", 100)
class Glove50Semantics(WordEmbeddingsSemantics):
def __init__(self):
WordEmbeddingsSemantics.__init__(self, "glove.50d.embeddings.txt", 50)
class Glove100Semantics(WordEmbeddingsSemantics):
def __init__(self):
WordEmbeddingsSemantics.__init__(self, "glove.100d.embeddings.txt", 100)
class Glove200Semantics(WordEmbeddingsSemantics):
def __init__(self):
WordEmbeddingsSemantics.__init__(self, "glove.200d.embeddings.txt", 200)
class Glove300Semantics(WordEmbeddingsSemantics):
def __init__(self):
WordEmbeddingsSemantics.__init__(self, "glove.300d.embeddings.txt", 300)