Skip to content

Commit

Permalink
Ground truth analysis fixes (#259)
Browse files Browse the repository at this point in the history
* Add support for a new module on datetime

Similar to moment.js in javascript, and supports parsing ISO format strings as
well as getting the timestamp from a datetime object, something which requires
too much fiddling otherwise.

* Fix a bunch of issues with the initial data collection for ground truth

- In particular, the iOS battery level was never read, so we always got -100
- The android key was always "sensor_config" (because when we updated the
  read timestamp on the document, it updated everything including the key)
    e-mission/cordova-usercache#14

* Fix issue with tour model creation

Found this while running the intake pipeline locally. Does not appear to happen
on the real server, but fixing corner cases is important!

* Add support for displaying the formatted time from the data_df in geojson

Makes it easier to debug maps in the various evaluations
  • Loading branch information
shankari committed Apr 10, 2016
1 parent e9bc02a commit 70fefb5
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 5 deletions.
4 changes: 4 additions & 0 deletions bin/historical/fix_ios_broken_battery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import emission.core.get_database as edb

edb.get_timeseries_db().remove({'metadata.key': 'background/battery',
'data.battery_level_pct': -100})
25 changes: 25 additions & 0 deletions bin/historical/fix_sensor_config_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import emission.core.get_database as edb


def fix_key(check_field, new_key):
print "First entry for "+new_key+" is %s" % list(edb.get_timeseries_db().find(
{"metadata.key": "config/sensor_config",
check_field: {"$exists": True}}).sort(
"metadata/write_ts").limit(1))
udb = edb.get_usercache_db()
tdb = edb.get_timeseries_db()
for i, entry in enumerate(edb.get_timeseries_db().find(
{"metadata.key": "config/sensor_config",
check_field: {"$exists": True}})):
entry["metadata"]["key"] = new_key
if i % 10000 == 0:
print udb.insert(entry)
print tdb.remove(entry["_id"])
else:
udb.insert(entry)
tdb.remove(entry["_id"])

fix_key("data.battery_status", "background/battery")
fix_key("data.latitude", "background/location")
fix_key("data.zzaEh", "background/motion_activity")
fix_key("data.currState", "statemachine/transition")
1 change: 1 addition & 0 deletions emission/analysis/modelling/tour_model/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def bin_data(self):
#delete lower portion of bins
def delete_bins(self):
if len(self.bins) <= 1:
self.newdata = self.data
return
num = self.elbow_distance()
sum = 0
Expand Down
10 changes: 5 additions & 5 deletions emission/analysis/plotting/geojson/geojson_feature_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,18 +313,18 @@ def get_feature_list_for_point_array(points_array):

return feature_coll

def get_feature_list_from_df(loc_time_df, ts="ts", latitude="latitude", longitude="longitude"):
def get_feature_list_from_df(loc_time_df, ts="ts", latitude="latitude", longitude="longitude", fmt_time="fmt_time"):
"""
Input DF should have columns called "ts", "latitude" and "longitude", or the corresponding
columns can be passed in using the ts, latitude and longitude parameters
"""
points_array = get_location_entry_list_from_df(loc_time_df, ts, latitude, longitude)
points_array = get_location_entry_list_from_df(loc_time_df, ts, latitude, longitude, fmt_time)
return get_feature_list_for_point_array(points_array)

def get_location_entry_list_from_df(loc_time_df, ts="ts", latitude="latitude", longitude="longitude"):
def get_location_entry_list_from_df(loc_time_df, ts="ts", latitude="latitude", longitude="longitude", fmt_time="fmt_time"):
location_entry_list = []
for idx, row in loc_time_df.iterrows():
retVal = {"latitude": row[latitude], "longitude": row[longitude], "ts": row["ts"],
"_id": str(idx), "loc": gj.Point(coordinates=[row[longitude], row[latitude]])}
retVal = {"latitude": row[latitude], "longitude": row[longitude], "ts": row[ts],
"_id": str(idx), "fmt_time": row[fmt_time], "loc": gj.Point(coordinates=[row[longitude], row[latitude]])}
location_entry_list.append(ecwl.Location(retVal))
return location_entry_list
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ xmltodict
python-crontab
attrdict
enum34
arrow

0 comments on commit 70fefb5

Please sign in to comment.