Skip to content

Commit

Permalink
NiFi: updated Raw file ocr template.
Browse files Browse the repository at this point in the history
  • Loading branch information
vladd-bit committed Nov 24, 2023
1 parent 228daf0 commit 88b3acc
Show file tree
Hide file tree
Showing 2 changed files with 2,484 additions and 1,722 deletions.
53 changes: 53 additions & 0 deletions nifi/user-scripts/parse-json-to-avro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import traceback
from io import BytesIO
import sys
import os
import uuid
import json
import avro
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter

from avro.schema import Schema

from pydoc import locate

records_stream = json.loads(sys.stdin.read())

schema = {
"type": "record",
"name": "inferAvro",
"namespace":"org.apache.nifi",
"fields": []
}

fields = list(records_stream[0].keys())

for field_name in fields:
schema["fields"].append({"name": field_name, "type": ["null", { "type" : "long", "logicalType" : "timestamp-millis"}, "string"]})

avro_schema = avro.schema.parse(json.dumps(schema))

file_id = str(uuid.uuid4().hex)

tmp_file_path = os.path.join("/opt/nifi/user-scripts/tmp/" + file_id + ".avro")

with open(tmp_file_path, mode="wb+") as tmp_file:
writer = DataFileWriter(tmp_file, DatumWriter(), avro_schema)

for _record in records_stream:
writer.append(_record)

writer.close()

tmp_file = open(tmp_file_path, "rb")

tmp_file_data = tmp_file.read()

tmp_file.close()

# delete file temporarly created above
if os.path.isfile(tmp_file_path):
os.remove(tmp_file_path)

sys.stdout.buffer.write(tmp_file_data)
Loading

0 comments on commit 88b3acc

Please sign in to comment.