Skip to content

Commit

Permalink
fix bug with unions inside of arrays (fastavro#400)
Browse files Browse the repository at this point in the history
  • Loading branch information
scottbelden authored Mar 3, 2020
1 parent 5e957ec commit fffad9f
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 6 deletions.
29 changes: 23 additions & 6 deletions fastavro/io/json_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def read_object_end(self):
def read_array_start(self):
self._parser.advance(ArrayStart())
self._push()
self._key = None

def read_array_end(self):
self._parser.advance(ArrayEnd())
Expand All @@ -164,13 +165,29 @@ def read_index(self):
self._parser.advance(Union())
alternative_symbol = self._parser.pop_symbol()

if self._current[self._key] is None:
label = "null"
# TODO: Try to clean this up.
# A JSON union is encoded like this: {"union_field": {int: 32}} and so
# what we are doing is trying to change that into {"union_field": 32}
# before eventually reading the value of "union_field"
if self._key is None:
# If self._key is None, self._current is an item in an array
if self._current is None:
label = "null"
else:
label, data = self._current.popitem()
self._current = data
# TODO: Do we need to do this?
self._parser.push_symbol(UnionEnd())
else:
label, data = self._current[self._key].popitem()
self._current[self._key] = data
# TODO: Do we need to do this?
self._parser.push_symbol(UnionEnd())
# self._current is a JSON object and self._key should be the name
# of the union field
if self._current[self._key] is None:
label = "null"
else:
label, data = self._current[self._key].popitem()
self._current[self._key] = data
# TODO: Do we need to do this?
self._parser.push_symbol(UnionEnd())

index = alternative_symbol.labels.index(label)
symbol = alternative_symbol.get_symbol(index)
Expand Down
107 changes: 107 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,3 +344,110 @@ def test_empty_arrays():

new_records = roundtrip(schema, records)
assert records == new_records


def test_union_in_array():
"""https://github.com/fastavro/fastavro/issues/399"""
schema = {
"type": "array",
"items": [{
"type": "record",
"name": "rec1",
"fields": [{
"name": "field1",
"type": ["string", "null"],
}]
}, {
"type": "record",
"name": "rec2",
"fields": [{
"name": "field2",
"type": ["string", "null"],
}]
}, "null"],
}

records = [
[{"field1": "foo"}, {"field2": None}, None],
]

new_records = roundtrip(schema, records)
assert records == new_records


def test_union_in_array2():
"""https://github.com/fastavro/fastavro/issues/399"""
schema = {
'type': 'record',
'name': 'Inbox',
'fields': [
{'type': 'string', 'name': 'id'},
{'type': 'string', 'name': 'msg_title'},
{
'name': 'msg_content',
'type': {
'type': 'array',
'items': [
{
'type': 'record',
'name': 'LimitedTime',
'fields': [
{
'type': ['string', 'null'],
'name': 'type',
'default': 'now'
}
]
},
{
'type': 'record',
'name': 'Text',
'fields': [
{
'type': ['string', 'null'],
'name': 'text'
}
]
}
]
}
}
]
}

records = [
{
'id': 1234,
'msg_title': 'Hi',
'msg_content': [{'type': 'now'}, {'text': 'hi from here!'}]
},
]

new_records = roundtrip(schema, records)
assert records == new_records


def test_union_in_map():
"""https://github.com/fastavro/fastavro/issues/399"""
schema = {
"type": "record",
"name": "Test",
"namespace": "test",
"fields": [{
"name": "map",
"type": {
"type": "map",
"values": ["string", "null"],
},
}]
}

records = [{
'map': {
'c': '1',
'd': None
}
}]

new_records = roundtrip(schema, records)
assert records == new_records

0 comments on commit fffad9f

Please sign in to comment.