This repository has been archived by the owner on Jan 22, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstructures.py
703 lines (570 loc) · 26.6 KB
/
structures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
from string import ascii_letters, digits
from collections import namedtuple, Counter
from sortedcontainers import SortedDict
identifier = 0
class SizeSelector(object):
def __init__(self, scale = 1, sizes = None):
if isinstance(scale, int) and scale: self.scale = scale
else: self.scale = 1
if isinstance(sizes, Counter): self.sizes = sizes
else: self.sizes = Counter()
@property
def no_estimate_possible(self):
if self.sizes: return False
return True
@property
def size_estimate(self):
most_common = self.sizes.most_common()
estimate, rate = most_common[0]
for size, count in most_common[1:]:
if count != rate: break
if size < estimate: estimate = size
return self.scale * estimate
def add_possible_size(self, size):
assert isinstance(size, int)
self.sizes.update([size // self.scale])
def remove_possible_sizes_above(self, limit):
assert isinstance(limit, int)
assert all( isinstance(size, int) for size in self.sizes.keys() )
self.sizes = Counter(filter(lambda size: size <= limit, self.sizes.elements()))
def remove_possible_sizes_below(self, limit):
assert isinstance(limit, int)
assert all( isinstance(size, int) for size in self.sizes.keys() )
self.sizes = Counter(filter(lambda size: size >= limit, self.sizes.elements()))
def scaled_selector(self, scale):
assert isinstance(scale, int)
assert all( isinstance(size, int) for size in self.sizes.keys() )
return SizeSelector(self.scale * scale, self.sizes)
class Structure(object):
def __init__(self, size = None):
if isinstance(size, int):
self.presize = size
self.size_known = True
else:
self.presize = SizeSelector()
self.size_known = False
self.known = False
def description(self, label = None, specifiers = None):
descriptors = []
if specifiers is not None:
assert isinstance(specifiers, str)
descriptors.append(specifiers)
try:
descriptors.append(self.full_name)
except AttributeError:
descriptors.append('void')
try:
assert isinstance(self.size, int)
size = self.size
if size: descriptors.append('<size = {:d} byte>'.format(size))
except AttributeError: pass
if label is not None:
assert isinstance(label, str)
descriptors.append(label)
return ' '.join(descriptors)
@property
def size(self):
if not self.size_known:
assert isinstance(self.presize, SizeSelector)
if self.presize.no_estimate_possible: raise AttributeError('size is unknown')
self.presize = self.presize.size_estimate
self.size_known = True
assert isinstance(self.presize, int)
return self.presize
@size.setter
def size(self, size):
assert isinstance(size, int)
if not self.size_known:
self.presize = size
self.size_known = True
else: assert self.presize == size
@property
def possible_size_known(self):
return self.size_known or not self.presize.no_estimate_possible
def add_possible_size(self, size):
assert isinstance(size, int)
if not self.size_known:
assert isinstance(self.presize, SizeSelector)
self.presize.add_possible_size(size)
else: assert self.size <= size
def remove_possible_sizes_above(self, limit):
assert isinstance(limit, int)
if not self.size_known:
assert isinstance(self.presize, SizeSelector)
self.presize.remove_possible_sizes_above(limit)
else: assert self.size <= limit
def remove_possible_sizes_below(self, limit):
assert isinstance(limit, int)
if not self.size_known:
assert isinstance(self.presize, SizeSelector)
self.presize.remove_possible_sizes_below(limit)
else: assert self.size >= limit
def same(self, other):
if self is other: return True
if type(self) is not type(other): return False
if self.size_known and other.size_known and self.size != other.size: return False
void = Structure(0)
class Data(Structure):
def __init__(self, name = '', substructures = None, size = None):
super().__init__(size)
real_name = name.partition('<')[0] + name.rpartition('>')[2]
try: invalid_name = name[0] not in ascii_letters or any(
character not in ascii_letters + digits + '_ ' for character in real_name[1:])
except (TypeError, IndexError): invalid_name = True
if invalid_name:
global identifier
assert isinstance(identifier, int)
self.name = '#{:d}'.format(identifier)
identifier += 1
else: self.name = name
try: self.substructures
except AttributeError: self.substructures = SortedDict()
if isinstance(substructures, list):
for substructure in substructures:
self.add_substructure(substructure)
@property
def full_name(self):
name_parts = []
try:
assert isinstance(self.extra_name, str)
name_parts.append(self.extra_name)
except AttributeError: pass
name_parts.append(self.name)
return ' '.join(name_parts)
def add_substructure(self, substructure):
assert isinstance(substructure, Substructure)
assert substructure.offset not in self.substructures
if substructure.size_known:
assert isinstance(substructure.presize, int)
try: assert self.substructures.keys()[self.substructures.bisect(substructure.offset)] >= substructure.offset + substructure.size
except IndexError: assert not self.size_known or self.size >= substructure.offset + substructure.size
self.substructures[substructure.offset] = substructure
def add_possible_size(self, size):
assert isinstance(size, int)
super().add_possible_size(size)
self.annotate_size()
def annotate_size(self):
if self.substructures:
substructures = reversed(self.substructures.values())
last_substructure = next(substructures)
last_done = self.annotate_size_of_last_substructure(last_substructure)
upper_bound = last_substructure.offset
for substructure in substructures:
possible_size = upper_bound - substructure.offset
substructure.remove_possible_sizes_above(possible_size)
substructure.add_possible_size(possible_size)
upper_bound = substructure.offset
self.annotate_size = lambda: self.annotate_size_of_last_substructure(last_substructure)
self.annotate_size = lambda: None
def annotate_size_of_last_substructure(self, substructure):
assert isinstance(substructure, Substructure)
if substructure.size_known:
assert isinstance(substructure.presize, int)
self.remove_possible_sizes_below(substructure.offset + substructure.size)
elif self.size_known:
assert isinstance(self.presize, int)
possible_size = self.size - substructure.offset
substructure.remove_possible_sizes_above(possible_size)
substructure.add_possible_size(possible_size)
else: return
self.annotate_size = self.annotate_size_of_last_substructure = lambda: None
def same(self, other):
if self is other: return True
if type(self) == Array or type(other) == Array and type(self) != type(other): return False
if not isinstance(other, type(self)) and not isinstance(self, type(other)): return False
if self.full_name == other.full_name: return True
if type(self) is not type(other) and self.name == other.name: return True
if '#' not in self.name and '#' not in other.name: return False
if self.size_known and other.size_known and self.size != other.size: return False
if len(self.substructures) != len(other.substructures): return False
if type(self) is not type(other) and len(self.substructures) == 0: return False
try: self_substructures = self.substructures.values()
except AttributeError: self_substructures = self.substructures
try: other_substructures = other.substructures.values()
except AttributeError: other_substructures = other.substructures
return all(self_substructure.same(other_substructure)
for self_substructure, other_substructure
in zip(self_substructures, other_substructures))
class DataEnumeration(Data):
def __init__(self, *arguments, **keyword_arguments):
super().__init__(*arguments, **keyword_arguments)
self.extra_name = 'enumeration'
class DataStructure(Data):
def __init__(self, *arguments, **keyword_arguments):
super().__init__(*arguments, **keyword_arguments)
self.extra_name = 'structure'
class DataClass(Data):
def __init__(self, *arguments, **keyword_arguments):
super().__init__(*arguments, **keyword_arguments)
self.extra_name = 'class'
class DataUnion(Data):
def __init__(self, *arguments, **keyword_arguments):
self.substructures = []
super().__init__(*arguments, **keyword_arguments)
self.extra_name = 'union'
def add_substructure(self, substructure):
assert isinstance(substructure, Substructure)
assert not substructure.offset
self.substructures.append(substructure)
def annotate_size(self):
if self.size_known:
assert isinstance(self.presize, int)
for substructure in self.substructures:
substructure.remove_possible_sizes_above(self.size)
substructure.add_possible_size(self.size)
self.annotate_size = lambda: None
else:
biggest_size = 0
for substructure in self.substurctures:
assert not substructure.offset
if substructure.size_known:
assert isinstance(substructure.presize, int)
biggest_size = max(substructure.size, biggest_size)
else:
biggest_size = None
break
if biggest_size is not None:
self.remove_possible_sizes_below(biggest_size)
self.add_possible_size(biggest_size)
self.annotate_size = lambda: None
def annotate_size_of_last_substructure(self, substructure): pass
class Array(Data):
def __init__(self, cell, count = 0, size = None):
assert isinstance(cell, SpecificStructure)
self.cell = cell
if isinstance(count, int): self.count = count
else: self.count = 0
if isinstance(size, int): self.cell.size = size // self.count
self.extra_name = '[{:d}]'.format(self.count)
self.name = self.cell.description() + self.extra_name
def description(self, label = None, specifiers = None):
descriptors = []
if specifiers is not None:
assert isinstance(specifiers, str)
descriptors.append(specifiers)
descriptors.append(self.cell.description(label))
return ' '.join(descriptors) + self.extra_name
@property
def size_known(self):
return self.cell.structure.size_known
@size_known.setter
def size_known(self, boolean):
assert isinstance(boolean, bool)
self.cell.structure.size_known = boolean
@property
def presize(self):
if isinstance(self.cell.structure.presize, SizeSelector):
return self.cell.structure.presize.scaled_selector(self.count)
assert isinstance(self.cell.structure.presize, int)
return self.cell.structure.presize * self.count
@presize.setter
def presize(self, size):
assert isinstance(size, int)
self.cell.structure.presize = size // self.count
@property
def substructures(self):
substructures = SortedDict()
for x in range(self.count):
cell_offset = x * self.cell.structure.size
substructures[cell_offset] = Substructure(self.cell, offset = cell_offset)
return substructures
def add_substructure(self, substructure): pass
def annotate_size(self): pass
def annotate_size_of_last_substructure(self, substructure): pass
def same(self, other):
same = Structure.same(self, other)
if same is not None: return same
return self.count == other.count and self.cell.same(other.cell)
class Pointer(Structure):
pointer_size = SizeSelector()
pointer_size_known = False
def __init__(self, destination = None, size = None):
if isinstance(size, int): self.size = size
if isinstance(destination, SpecificStructure): self.destination = destination
else: self.destination = SpecificStructure(void)
def description(self, label = None, specifiers = None):
descriptors = [self.destination.description(), '*']
if specifiers is not None: descriptors.append(specifiers)
if label is not None: descriptors.append(label)
return ' '.join(descriptors)
@property
def size_known(self):
return type(self).pointer_size_known
@size_known.setter
def size_known(self, boolean):
assert isinstance(boolean, bool)
type(self).pointer_size_known = boolean
@property
def presize(self):
return type(self).pointer_size
@presize.setter
def presize(self, size):
assert isinstance(size, int)
type(self).pointer_size = size
def same(self, other):
same = Structure.same(self, other)
if same is not None: return same
return self.destination.same(other.destination)
class Reference(Pointer):
def description(self): return self.destination.description() + ' &'
class Function(Structure):
def __init__(self, return_type = None, argument_types = None, size = None):
super().__init__(size)
global identifier
self.name = '#{:d}'.format(identifier)
identifier += 1
if isinstance(return_type, Structure): self.return_type = return_type
else: self.return_type = void
self.argument_types = []
if isinstance(argument_types, list):
for argument_type in argument_types:
if isinstance(argument_type, SpecificStructure):
self.argumet_types.append(argument_type)
def description(self, label = None, specifiers = None):
descriptors = [self.return_type.description(), self.name]
descriptors.append('(' + ', '.join(map(lambda argument_type: argument_type.description(), self.argument_types)) + ')')
if specifiers is not None:
assert isinstance(specifiers, str)
descriptors.append(specifiers)
if label is not None:
assert isinstance(label, str)
descriptors.append(label)
return ' '.join(descriptors)
def add_argument_type(self, argument_type):
assert isinstance(argument_type, SpecificStructure)
self.argument_types.append(argument_type)
def same(self, other):
same = Structure.same(self, other)
if same is not None: return same
if not self.return_type.same(other.return_type): return False
return all(self_argument_type.same(other_argument_type)
for self_argument_type, other_argument_type
in zip(self.argument_types, other.argument_types))
class SpecificStructure(namedtuple('SpecificStructure', ['structure', 'constant', 'volatile'])):
def __new__(self_class, structure, constant = False, volatile = False):
if not isinstance(structure, Structure): raise TypeError()
return super(SpecificStructure, self_class).__new__(self_class, structure, constant, volatile)
def description(self, label = None):
specifiers = []
if self.constant: specifiers.append('constant')
if self.volatile: specifiers.append('volatile')
if specifiers: return self.structure.description(label, ' '.join(specifiers))
return self.structure.description(label)
def same(self, other):
if type(self) is not type(other): return False
if self[1:] is not other[1:]: return False
return self.structure.same(other.strurcture)
class Substructure(namedtuple('Substructure', ['structure', 'label', 'offset'])):
def __new__(self_class, structure, label = '', offset = 0):
if not isinstance(label, str): label = ''
if not isinstance(offset, int): offset = 0
if not isinstance(structure, SpecificStructure): raise TypeError()
return super().__new__(self_class, structure, label, offset)
@property
def size_known(self):
return self.structure.structure.size_known
@property
def possible_size_known(self):
return self.structure.structure.possible_size_known
@property
def presize(self):
return self.structure.structure.presize
@property
def size(self):
return self.structure.structure.size
def description(self):
return self.structure.description(self.label)
def add_possible_size(self, size):
self.structure.structure.add_possible_size(size)
def remove_possible_sizes_above(self, limit):
self.structure.structure.remove_possible_sizes_above(limit)
def remove_possible_sizes_below(self, limit):
self.structure.structure.remove_possible_sizes_below(limit)
def same(self, other):
if type(self) is not type(other): return False
if self[1:] is not other[1:]: return False
return self.structure.same(other.sturcture)
def parse_structures_recursive(string):
if string is None: return {}
separators = ';&$%?#@'
data_structures = {}
functions = []
pointers = []
def lookup_data_structure(structure):
assert isinstance(structure, Data)
if structure.name in data_structures:
lookup_structure = data_structures[structure.name]
assert structure.same(lookup_structure)
return lookup_structure
def lookup_structure(structure, structure_list):
assert isinstance(structure, Structure)
for other_structure in structure_list:
if structure.same(other_structure):
return other_structure
def update_structure(structure, other_structure):
assert isinstance(structure, Data)
assert structure.same(other_structure)
if '#' in structure.name and '#' not in other_structure.name:
structure.name = other_structure.name
if not isinstance(structure, type(other_structure)):
assert isinstance(other_structure, type(structure))
new_structure = type(other_structure)(structure.name)
new_structure.substructures = structure.substructures
new_structure.presize = structure.presize
new_structure.size_known = structure.size_known
data_structures[new_structure.name] = new_structure
return new_structure
return structure
def parse_fields(fields, delimiter = ','):
imbalance = { '<>' : 0, '()' : 0 }
field = []
for char in fields:
if not any(imbalance.values()) and char == delimiter:
yield ''.join(field).strip()
field = []
continue
elif char == '<':
imbalance['<>'] += 1
elif char == '>':
imbalance['<>'] -= 1
elif char == '(':
imbalance['()'] += 1
elif char == ')':
imbalance['()'] -= 1
field.append(char)
if field and not any(imbalance.values()): yield ''.join(field).strip()
def parse_keyword(name, keyword):
if not name: return False, name
name = name.strip()
partitions = name.partition(keyword)
if not partitions[1]: return False, name
if partitions[0]:
preceding = partitions[0][-1]
if not preceding.isspace(): return False, name
if partitions[2]:
succeeding = partitions[-1][0]
if not succeeding.isspace(): return False, name
return True, (partitions[0].strip() + ' ' + partitions[2].strip()).strip()
def parse_specific_pointer(pointer_partition, structure, known):
while pointer_partition[1]:
pointer_partition = pointer_partition[2].partition('*')
constant, keywords = parse_keyword(pointer_partition[0], 'const')
volatile, keywords = parse_keyword(keywords, 'volatile')
assert not keywords
pointer = Pointer(structure)
lookup = lookup_structure(pointer, pointers)
if not lookup:
pointers.append(pointer)
known = False
else: pointer, known = lookup, True
structure = SpecificStructure(pointer, constant, volatile)
return structure, known
def parse_specific_structure(field):
if field == 'void': return SpecificStructure(void)
pointer_partition = field.partition('*')
constant, name = parse_keyword(pointer_partition[0], 'const')
volatile, name = parse_keyword(name, 'volatile')
is_enumeration, name = parse_keyword(name, 'enum')
is_structure, name = parse_keyword(name, 'struct')
is_union, name = parse_keyword(name, 'union')
is_class, name = parse_keyword(name, 'class')
if not (is_enumeration or is_structure or is_union or is_class):
structure = Data(name)
elif is_enumeration and not (is_structure or is_union):
structure = DataEnumeration(name)
elif is_structure and not (is_enumeration or is_union or is_class):
structure = DataStructure(name)
elif is_union and not (is_enumeration or is_structure or is_class):
structure = DataUnion(name)
elif is_class and not (is_enumeration or is_structure or is_union):
structure = DataClass(name)
else: assert False
lookup = lookup_data_structure(structure)
if not lookup:
data_structures[structure.name] = structure
known = False
else:
structure = update_structure(lookup, structure)
known = True
structure = SpecificStructure(structure, constant, volatile)
return parse_specific_pointer(pointer_partition, structure, known)
def parse_general_structure(field):
function_partition = field.partition('()')
structure, known = parse_specific_structure(function_partition[0])
if not function_partition[1]: return structure, known
return_type, constant, volatile = structure
assert constant == volatile == False
function_partition = function_partition[2].partition('(')
assert function_partition[1]
function_partition = function_partition[2].partition(')')
assert function_partition[1]
function = Function(return_type)
arguments = parse_fields(function_partition[0])
for argument in arguments:
if argument == 'void': break
reference_partition = argument.partition('&')
structure = parse_general_structure(reference_partition[0])[0]
if reference_partition[1]:
assert not reference_partition[2]
structure = SpecificStructure(Reference(structure))
function.add_argument_type(structure)
lookup = lookup_structure(function, functions)
if not lookup:
functions.append(function)
known = False
else: function, known = lookup, True
pointer_partition = function_partition[2].strip().partition('*')
assert not pointer_partition[0] and pointer_partition[1]
return parse_specific_pointer(pointer_partition, function, known)
def parse_structure(line, depth = 0, substructure = False):
line = line.strip()
try: segments = list(parse_fields(line, separators[depth]))
except IndexError: segments = [line]
fields = tuple(parse_fields(segments[0]))
if not substructure:
structure = Data(fields[0])
lookup = lookup_data_structure(structure)
if not lookup:
for segment in segments[1:]:
structure.add_substructure(parse_structure(segment, depth + 1, True))
lookup = lookup_structure(structure, data_structures.values())
if lookup: lookup = update_structure(lookup, structure)
if lookup: structure = lookup
else: data_structures[structure.name] = structure
try: structure.size = int(fields[1])
except (IndexError, ValueError): pass
return
structure, known = parse_general_structure(fields[0])
if not known:
try:
for segment in segments[1:]:
structure.structure.add_substructure(parse_structure(segment, depth + 1, True))
except AttributeError: assert False
if isinstance(structure.structure, Data):
lookup = lookup_structure(structure.structure, data_structures.values())
if lookup: lookup = update_structure(lookup, structure.structure)
if known or lookup: structure = SpecificStructure(lookup, *structure[1:])
else: data_structures[structure.structure.name] = structure.structure
try:
label = fields[1]
array_partition = label.partition('[')
if array_partition[1]:
label = array_partition[0].strip()
array_partition = array_partition[2].partition(']')
assert array_partition[1] and not array_partition[2]
try: count = int(array_partition[0])
except ValueError: count = 0
array = Array(structure, count)
lookup = lookup_data_structure(array)
if not lookup: data_structures[array.full_name] = array
structure = SpecificStructure(array)
except IndexError: label = None
try: structure.structure.size = int(fields[3])
except (IndexError, ValueError): pass
try: return Substructure(structure, label, int(fields[2]))
except (IndexError, ValueError):
return Substructure(structure, label)
lines = string.strip().split('\n')
for line in lines: parse_structure(line)
for structure in data_structures.values(): structure.annotate_size()
return data_structures