-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_sifts.py
307 lines (307 loc) · 12.5 KB
/
parse_sifts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# #!/home/exec/anaconda3/bin/python
# -*- coding: utf-8 -*-
# curl --silent ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/xml/3sn6.xml.gz | gunzip | python2 parse_sifts.py 1> 3sn6.tsv 2> /dev/null
# Generated Thu Nov 20 17:12:58 2014 by generateDS.py version 2.13a.
#
# Command line options:
# ('-f', '')
# ('-o', 'sifts.py')
#
# Command line arguments:
# eFamily.xsd
#
# Command line:
# /usr/local/bin/generateDS.py -f -o "sifts.py" eFamily.xsd
#
# Current working directory (os.getcwd()):
# src
#
import sys
import getopt
import re as re_
import base64
import datetime as datetime_
etree_ = None
Verbose_import_ = False
(
XMLParser_import_none, XMLParser_import_lxml,
XMLParser_import_elementtree
) = range(3)
XMLParser_import_library = None
try:
# lxml
from lxml import etree as etree_
XMLParser_import_library = XMLParser_import_lxml
if Verbose_import_:
print("running with lxml.etree")
except ImportError:
try:
# cElementTree from Python 2.5+
import xml.etree.cElementTree as etree_
XMLParser_import_library = XMLParser_import_elementtree
if Verbose_import_:
print("running with cElementTree on Python 2.5+")
except ImportError:
try:
# ElementTree from Python 2.5+
import xml.etree.ElementTree as etree_
XMLParser_import_library = XMLParser_import_elementtree
if Verbose_import_:
print("running with ElementTree on Python 2.5+")
except ImportError:
try:
# normal cElementTree install
import cElementTree as etree_
XMLParser_import_library = XMLParser_import_elementtree
if Verbose_import_:
print("running with cElementTree")
except ImportError:
try:
# normal ElementTree install
import elementtree.ElementTree as etree_
XMLParser_import_library = XMLParser_import_elementtree
if Verbose_import_:
print("running with ElementTree")
except ImportError:
raise ImportError(
"Failed to import ElementTree from any known place")
def parsexml_(*args, **kwargs):
if (XMLParser_import_library == XMLParser_import_lxml and
'parser' not in kwargs):
# Use the lxml ElementTree compatible parser so that, e.g.,
# we ignore comments.
kwargs['parser'] = etree_.ETCompatXMLParser()
doc = etree_.parse(*args, **kwargs)
return doc
#
# User methods
#
# Calls to the methods in these classes are generated by generateDS.py.
# You can replace these methods by re-implementing the following class
# in a module named generatedssuper.py.
try:
from generatedssuper import GeneratedsSuper
except ImportError as exp:
class GeneratedsSuper(object):
tzoff_pattern = re_.compile(r'(\+|-)((0\d|1[0-3]):[0-5]\d|14:00)$')
class _FixedOffsetTZ(datetime_.tzinfo):
def __init__(self, offset, name):
self.__offset = datetime_.timedelta(minutes=offset)
self.__name = name
def utcoffset(self, dt):
return self.__offset
def tzname(self, dt):
return self.__name
def dst(self, dt):
return None
def gds_format_string(self, input_data, input_name=''):
return input_data
def gds_validate_string(self, input_data, node, input_name=''):
if not input_data:
return ''
else:
return input_data
def gds_format_base64(self, input_data, input_name=''):
return base64.b64encode(input_data)
def gds_validate_base64(self, input_data, node, input_name=''):
return input_data
def gds_format_integer(self, input_data, input_name=''):
return '%d' % input_data
def gds_validate_integer(self, input_data, node, input_name=''):
return input_data
def gds_format_integer_list(self, input_data, input_name=''):
return '%s' % input_data
def gds_validate_integer_list(self, input_data, node, input_name=''):
values = input_data.split()
for value in values:
try:
float(value)
except (TypeError, ValueError):
raise_parse_error(node, 'Requires sequence of integers')
return input_data
def gds_format_float(self, input_data, input_name=''):
return ('%.15f' % input_data).rstrip('0')
def gds_validate_float(self, input_data, node, input_name=''):
return input_data
def gds_format_float_list(self, input_data, input_name=''):
return '%s' % input_data
def gds_validate_float_list(self, input_data, node, input_name=''):
values = input_data.split()
for value in values:
try:
float(value)
except (TypeError, ValueError):
raise_parse_error(node, 'Requires sequence of floats')
return input_data
def gds_format_double(self, input_data, input_name=''):
return '%e' % input_data
def gds_validate_double(self, input_data, node, input_name=''):
return input_data
def gds_format_double_list(self, input_data, input_name=''):
return '%s' % input_data
def gds_validate_double_list(self, input_data, node, input_name=''):
values = input_data.split()
for value in values:
try:
float(value)
except (TypeError, ValueError):
raise_parse_error(node, 'Requires sequence of doubles')
return input_data
def gds_format_boolean(self, input_data, input_name=''):
return ('%s' % input_data).lower()
def gds_validate_boolean(self, input_data, node, input_name=''):
return input_data
def gds_format_boolean_list(self, input_data, input_name=''):
return '%s' % input_data
def gds_validate_boolean_list(self, input_data, node, input_name=''):
values = input_data.split()
for value in values:
if value not in ('true', '1', 'false', '0', ):
raise_parse_error(
node,
'Requires sequence of booleans '
'("true", "1", "false", "0")')
return input_data
def gds_validate_datetime(self, input_data, node, input_name=''):
return input_data
def gds_format_datetime(self, input_data, input_name=''):
if input_data.microsecond == 0:
_svalue = '%04d-%02d-%02dT%02d:%02d:%02d' % (
input_data.year,
input_data.month,
input_data.day,
input_data.hour,
input_data.minute,
input_data.second,
)
else:
_svalue = '%04d-%02d-%02dT%02d:%02d:%02d.%s' % (
input_data.year,
input_data.month,
input_data.day,
input_data.hour,
input_data.minute,
input_data.second,
('%f' % (float(input_data.microsecond) / 1000000))[2:],
)
if input_data.tzinfo is not None:
tzoff = input_data.tzinfo.utcoffset(input_data)
if tzoff is not None:
total_seconds = tzoff.seconds + (86400 * tzoff.days)
if total_seconds == 0:
_svalue += 'Z'
else:
if total_seconds < 0:
_svalue += '-'
total_seconds *= -1
else:
_svalue += '+'
hours = total_seconds // 3600
minutes = (total_seconds - (hours * 3600)) // 60
_svalue += '{0:02d}:{1:02d}'.format(hours, minutes)
return _svalue
@classmethod
def gds_parse_datetime(cls, input_data):
tz = None
if input_data[-1] == 'Z':
tz = GeneratedsSuper._FixedOffsetTZ(0, 'UTC')
input_data = input_data[:-1]
else:
results = GeneratedsSuper.tzoff_pattern.search(input_data)
if results is not None:
tzoff_parts = results.group(2).split(':')
tzoff = int(tzoff_parts[0]) * 60 + int(tzoff_parts[1])
if results.group(1) == '-':
tzoff *= -1
tz = GeneratedsSuper._FixedOffsetTZ(
tzoff, results.group(0))
input_data = input_data[:-6]
time_parts = input_data.split('.')
if len(time_parts) > 1:
micro_seconds = int(float('0.' + time_parts[1]) * 1000000)
input_data = '%s.%s' % (time_parts[0], micro_seconds, )
dt = datetime_.datetime.strptime(
input_data, '%Y-%m-%dT%H:%M:%S.%f')
else:
dt = datetime_.datetime.strptime(
input_data, '%Y-%m-%dT%H:%M:%S')
dt = dt.replace(tzinfo=tz)
return dt
def gds_validate_date(self, input_data, node, input_name=''):
return input_data
def gds_format_date(self, input_data, input_name=''):
_svalue = '%04d-%02d-%02d' % (
input_data.year,
input_data.month,
input_data.day,
)
try:
if input_data.tzinfo is not None:
tzoff = input_data.tzinfo.utcoffset(input_data)
if tzoff is not None:
total_seconds = tzoff.seconds + (86400 * tzoff.days)
if total_seconds == 0:
_svalue += 'Z'
else:
if total_seconds < 0:
_svalue += '-'
total_seconds *= -1
else:
_svalue += '+'
hours = total_seconds // 3600
minutes = (total_seconds - (hours * 3600)) // 60
_svalue += '{0:02d}:{1:02d}'.format(hours, minutes)
except AttributeError:
pass
return _svalue
@classmethod
def gds_parse_date(cls, input_data):
tz = None
if input_data[-1] == 'Z':
tz = GeneratedsSuper._FixedOffsetTZ(0, 'UTC')
input_data = input_data[:-1]
else:
results = GeneratedsSuper.tzoff_pattern.search(input_data)
if results is not None:
tzoff_parts = results.group(2).split(':')
tzoff = int(tzoff_parts[0]) * 60 + int(tzoff_parts[1])
if results.group(1) == '-':
tzoff *= -1
tz = GeneratedsSuper._FixedOffsetTZ(
tzoff, results.group(0))
input_data = input_data[:-6]
dt = datetime_.datetime.strptime(input_data, '%Y-%m-%d')
dt = dt.replace(tzinfo=tz)
return dt.date()
def gds_validate_time(self, input_data, node, input_name=''):
return input_data
def gds_format_time(self, input_data, input_name=''):
if input_data.microsecond == 0:
_svalue = '%02d:%02d:%02d' % (
input_data.hour,
input_data.minute,
input_data.second,
)
else:
_svalue = '%02d:%02d:%02d.%s' % (
input_data.hour,
input_data.minute,
input_data.second,
('%f' % (float(input_data.microsecond) / 1000000))[2:],
)
if input_data.tzinfo is not None:
tzoff = input_data.tzinfo.utcoffset(input_data)
if tzoff is not None:
total_seconds = tzoff.seconds + (86400 * tzoff.days)
if total_seconds == 0:
_svalue += 'Z'
else:
...