-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconverter.py
72 lines (61 loc) · 2.14 KB
/
converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
'''
Converts my JSON log files to the XML format used by Michael Ogawa's
prototype implementation of the 'storylines' algorithm. See website
http://www.michaelogawa.com/research/storylines/ for more information.
'''
import json
import os
import xml.etree.ElementTree as et
import xml.dom.minidom as minidom
##Returns dictionary of form
##{unix time stamp : [(topic, avg sentiment score)]}
def loadnews(directory):
graphdict = {}
for dirpath, dirnames, filenames in os.walk(directory):
for name in filenames:
f = open(os.path.join(dirpath, name),'r')
lines = f.readlines()
f.close()
for line in lines:
data = json.loads(line)
graphdict[data[0]] = data[1]
return graphdict
##Extracts only the log entries where something has changed
def extractevents(logdata):
events = {}
last = (0, [])
#I don't even know. Processing-storylines uses 1000-day intervals in demo.
dayinterval = 86400000
for key in sorted(logdata.iterkeys()):
data = (key, logdata[key])
if last == [] or data != last:
last = data
events[dayinterval*len(events)] = data
#print "%s: %s" % (key, data)
return events
def buildxml(events):
root = et.Element('file_events')
for time in sorted(events.iterkeys()):
data = events[time][1]
time = str(int(time))
for subject in data:
topic, score = subject
event = et.SubElement(root, 'event')
event.set('filename', 'negative' if score < 0 else 'positive')
event.set('date', time)
event.set('author', topic)
return root
##Makes ElementTree output readable
def prettyPrint(element):
txt = et.tostring(element)
return minidom.parseString(txt).toprettyxml()
def convert(logdir, outdir):
logdata = loadnews(logdir)
events = extractevents(logdata)
xmltree = buildxml(events)
xmlfile = open(os.path.join(outdir,'storylinenews.xml'), 'w')
xmlfile.write(prettyPrint(xmltree))
xmlfile.close()
outdir = 'data/'
logdir = 'data/logs/'
convert(logdir, outdir)