Skip to content

Commit

Permalink
added logreg model support
Browse files Browse the repository at this point in the history
  • Loading branch information
Kaleb committed Apr 7, 2020
1 parent f25c5d5 commit 237ce91
Show file tree
Hide file tree
Showing 13 changed files with 441 additions and 19 deletions.
Binary file modified __pycache__/base_classes.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/re_generator.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/re_generator_v2.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/speech_module.cpython-36.pyc
Binary file not shown.
Binary file modified __pycache__/training.cpython-36.pyc
Binary file not shown.
Binary file added __pycache__/training_v2.cpython-36.pyc
Binary file not shown.
11 changes: 6 additions & 5 deletions base_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@ def __init__(self, msg=None):
"id" : msg.id, # object id
"type": msg.type, # object type (block, screwdriver, etc)
"rgb": (msg.color.r, msg.color.g, msg.color.b), # object color as RGBA
"dimensions": (msg.x_dim, msg.y_dim, msg.z_dim), # object dimentions (estimated)
"dim": (msg.x_dim, msg.y_dim, msg.z_dim), # object dimentions (estimated)
# "pose": msg.pose.position # object pose (estimated) as Position msg (xyz)
}

def from_dict(self, dict):
self.features = dict
print(self.features)

def get_feature_val(self, feature):
if feature == "color":
Expand Down Expand Up @@ -86,7 +87,7 @@ def _intialize_feature_distributions(self):
obj_ratios = {}
for o in self.env:
type = o.get_feature_val("type")
dims = [float(d) for d in o.get_feature_val("dimensions")]
dims = [float(d) for d in o.get_feature_val("dim")]
sz = 1.0
for d in dims:
sz *= d
Expand Down Expand Up @@ -122,7 +123,7 @@ def _intialize_feature_distributions(self):

for o in self.env:
type = o.get_feature_val("type")
dims = [float(d) for d in o.get_feature_val("dimensions")]
dims = [float(d) for d in o.get_feature_val("dim")]

sz = 1.0
for d in dims:
Expand All @@ -140,11 +141,11 @@ def _intialize_feature_distributions(self):
o._set_feature_val("z_size", z_size)
o._set_feature_val("z_dim", z_dim)

# grab size and dimensions
# grab size and dim
# all_sizes = []
# all_ratios = []
# for o in self.env:
# dims = [float(d) for d in o.get_feature_val("dimensions")]
# dims = [float(d) for d in o.get_feature_val("dim")]
# sz = 1.0
# for d in dims:
# sz *= d
Expand Down
2 changes: 1 addition & 1 deletion gen_batch_predictions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from training import CorpusTraining

# from training_v2 import CorpusTraining

def predict_batch(xml_workspace_filename):
trainer = CorpusTraining()
Expand Down
Binary file added model/lgreg.pkl
Binary file not shown.
174 changes: 174 additions & 0 deletions re_generator_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import numpy as np
import copy
import pickle
import os
import copy

from sklearn.linear_model import LogisticRegression

from speech_module import SpeechModule
from base_classes import Object, Context

class SpeechLearner:
def __init__(self):
self.clf = LogisticRegression(max_iter=1000)

def train(self, X, y):
self.clf.fit(X, y)
print(self.clf.classes_)

def predict(self, X):
return self.clf.predict(X)

def predict_probs(self, X):
return self.clf.predict_proba(X)

def plot_learned_function(self, data):
# TODO implement
raise NotImplementedError
pass

def save_model(self, filename):
pickle.dump(self.clf, open(filename, 'wb'))

def load_model(self, filename):
self.clf = pickle.load(open(filename, 'rb'))

class REG:
def __init__(self):
w2c = "data/w2c_4096.txt"
# w2c = "/ros/catkin_ws/src/hrc_discrim_learning/src/hrc_discrim_learning/data/w2c_4096.txt"
self.sm = SpeechModule(w2c)
self.theta = 0.5

self.features = ["color", "size", "dim", "none"]

self.model = SpeechLearner()

def save_models(self, filename="model/lgreg.pkl"):
self.model.save_model(filename)

def load_models(self, filename="model/lgreg.pkl"):
self.model.load_model(filename)
self.mapping = self.model.clf.classes_

def train_model(self, x, y):
self.model.train(x, y)
self.mapping = self.model.clf.classes_

def _generate_single_output(self, object, context, type_env, feature_set):
""" UPDATED """
features= copy.copy(feature_set)
model_input, labels, results = self.get_model_input(object, context)
# prediction = self.mapping[self.model.predict([model_input])]
pdist = self.model.predict_probs([model_input])[0]
# map prob distributions as a dict
pdist_as_dict = {}
for i in range(4):
cls = self.mapping[i]
pdist_as_dict[cls] = pdist[i]

if len(type_env) > 1 and "none" :
features.remove("none")
best = max(feature_set, key=lambda x: pdist_as_dict[x])

if best == "none":
return None, None, None
else:
return best, labels[best], results[best]

def generate_output(self, object, context):
# context should include object
""" UPDATED """

output = ""
type = object.get_feature_val('type')
type_env = context.get_type_match(type)

feature_set = copy.copy(self.features)
while feature_set:
feature, label, new_context = self._generate_single_output(object, context, type_env, feature_set)
if not label:
break
output += (label + " ")
context = Context(new_context)
type_env = context.get_type_match(type)
feature_set.remove(feature)

output+=type
return output


# output = ""
#
# type = object.get_feature_val("type")
# type_env = context.get_type_match(type)
#
# # next: iterate through possible features
# feature_set = copy.copy(self.features)
# while feature_set:
# feature, label, new_context = self._generate_single_output(object, context, type_env, feature_set)
# if not label:
# output += type
# return output
#
# output += (label + " ")
# context = Context(new_context)
# type_env = context.get_type_match(type)
# feature_set.remove(feature)
#
# # return "ERR: check REGenerator"
# return output

def get_model_input(self, object, context):
v = []
labels = {}
results = {}
for feature in self.features[:-1]:
label, score, data, kept_objects = self._get_model_feature_input(feature, object, context)
labels[feature] = label
results[feature] = kept_objects
v += [score, data]

return v, labels, results

def _get_model_feature_input(self, feature, object, context):
# context should include object
label, data = self.sm.label_feature(object, context, feature)

if feature == "color" or feature == "type":
score, kept_objects = self.elim_objects_discrete(context, label, feature)
else:
score, kept_objects = self.elim_objects_gradable(context, feature, label, data)

return label, score, data, kept_objects

def elim_objects_discrete(self, context, label, feature):
# we want to eliminate everything that the term label can NOT apply to
score = 0
kept_objects = []
for o in context.env:
this_label, data = self.sm.label_feature(o, context, feature)
if label == this_label:
kept_objects.append(o)
else:
score += 1

return score, kept_objects

def elim_objects_gradable(self, context, feature, label, label_score):
# we want to eliminate everything that the term can NOT apply to
# that is, everything that the term fits LESS well than the target object
score = 0
kept_objects = []
for o in context.env:
this_label, data = self.sm.label_feature(o, context, feature)
if this_label == label and data >= label_score:
kept_objects.append(o)
else:
score += 1

return score, kept_objects

def update_context(self, kept_objects):
return Context(kept_objects)
2 changes: 1 addition & 1 deletion speech_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def _label_color(self, obj):
return l1, val1

def _label_size(self, obj, context):
# # estimate volume based on dimensions
# # estimate volume based on dim
z = obj.get_feature_val("z_size")
if z > 0:
label = "big"
Expand Down
23 changes: 11 additions & 12 deletions training.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def get_train_x_y(self, xml_workspace_filename, csv_responses_filename):

responses = self.parse_responses_from_csv(csv_responses_filename)
tokenized_responses = self.process_all_outputs(responses)
print("training.py:28: len(tokenized_responses) = ", len(tokenized_responses))
# print("training.py:28: len(tokenized_responses) = ", len(tokenized_responses))

feature_inputs = self.assemble_x(tokenized_responses)
# TODO update to work with all responses (rather than one per qid)
Expand All @@ -44,8 +44,6 @@ def train(self, feature_inputs, feature_outputs, save=True):
sz_y = sz_y[:len(sz_x)]
dim_y = dim_y[:len(dim_x)]

print(len(clr_x), len(clr_y))
print(len(dim_x), len(dim_y))
self.reg.train_model("color", clr_x, clr_y)
self.reg.train_model("size", sz_x, sz_y)
self.reg.train_model("dim", dim_x, dim_y)
Expand Down Expand Up @@ -78,10 +76,12 @@ def parse_workspace_data_from_xml(self, filename):
v /= 100.0
rgb = hsv_to_rgb(h, s, v)
feature_dict["rgb"] = [255 * d for d in rgb]
elif datum.tag == "location" or datum.tag == "dimensions":
elif datum.tag == "location":
x = int(datum[0].text)
y = int(datum[1].text)
feature_dict[datum.tag] = (x, y)
elif datum.tag == "dimensions":
feature_dict["dim"] = (int(datum[0].text), int(datum[1].text))

o = Object()
o.from_dict(feature_dict)
Expand All @@ -97,7 +97,7 @@ def assemble_x_for_q(self, obj, context, tokenized_response):
labels, tokens = tokenized_response
type = obj.get_feature_val("type")

features = ["color", "size", "dimensions"]
features = ["color", "size", "dim"]

color_x = []
size_x = []
Expand All @@ -110,8 +110,8 @@ def assemble_x_for_q(self, obj, context, tokenized_response):
if "size" in features:
_, sz_score, sz_data, sz_kept_objects = self.reg.get_model_input("size", obj, context)
size_x.append([sz_score, sz_data])
if "dimensions" in features:
_, dim_score, dim_data, dim_kept_objects = self.reg.get_model_input("dimensions", obj, context)
if "dim" in features:
_, dim_score, dim_data, dim_kept_objects = self.reg.get_model_input("dim", obj, context)
dim_x.append([dim_score, dim_data])

if t == self.sm.COLOR_I:
Expand All @@ -122,7 +122,7 @@ def assemble_x_for_q(self, obj, context, tokenized_response):
features.remove("size")
elif t == self.sm.DIM_I:
kept = dim_kept_objects
features.remove("dimensions")
features.remove("dim")

context = self.reg.update_context(kept)

Expand All @@ -133,8 +133,8 @@ def assemble_x_for_q(self, obj, context, tokenized_response):
if "size" in features:
_, sz_score, sz_data, sz_kept_objects = self.reg.get_model_input("size", obj, context)
size_x.append([sz_score, sz_data])
if "dimensions" in features:
_, dim_score, dim_data, dim_kept_objects = self.reg.get_model_input("dimensions", obj, context)
if "dim" in features:
_, dim_score, dim_data, dim_kept_objects = self.reg.get_model_input("dim", obj, context)
dim_x.append([dim_score, dim_data])

return color_x, size_x, dim_x
Expand Down Expand Up @@ -164,7 +164,7 @@ def test_labeling(self, key, c):
self.sm = SpeechModule(w2c)
clr = self.sm.label_feature(key, c, "color")
sz = self.sm.label_feature(key, c, "size")
dim = self.sm.label_feature(key, c, "dimensions")
dim = self.sm.label_feature(key, c, "dim")

# print(clr, sz, dm)
return clr, sz, dim
Expand Down Expand Up @@ -198,7 +198,6 @@ def parse_responses_from_csv(self, filename):
all_responses[count].append(response)
count += 1

print(all_responses)
return all_responses

def assemble_Y(self, tokenized_responses):
Expand Down
Loading

0 comments on commit 237ce91

Please sign in to comment.