diff --git a/docs/source/ebu_tt_live.scripts.rst b/docs/source/ebu_tt_live.scripts.rst index 941482cee..b85b30ee8 100644 --- a/docs/source/ebu_tt_live.scripts.rst +++ b/docs/source/ebu_tt_live.scripts.rst @@ -58,3 +58,12 @@ scripts Package :undoc-members: :show-inheritance: + +:mod:`imsc_hrm_validator` Module +------------------------------------- + +.. autoclass:: ebu_tt_live.scripts.imsc_hrm_validator.imscHrmValidator + :members: + +.. automodule:: ebu_tt_live.scripts.imsc_hrm_validator + :show-inheritance: diff --git a/docs/source/scripts_and_their_functions.rst b/docs/source/scripts_and_their_functions.rst index f9c70e834..cdbc6028c 100644 --- a/docs/source/scripts_and_their_functions.rst +++ b/docs/source/scripts_and_their_functions.rst @@ -226,6 +226,9 @@ This script loads a file from the file system and attempts to validate it as the specified format, either EBU-TT Part 1, EBU-TT Part 3 or EBU-TT-D. By default the expected format is EBU-TT-D. +Additionally, EBU-TT-D documents can be validated against the +`IMSC-HRM `_ by adding the ``--hrm`` flag. + Example command lines: ``validator -i path/to/ebu-tt-1-file-to-test.xml -f 1`` @@ -233,3 +236,5 @@ Example command lines: ``validator -i path/to/ebu-tt-3-file-to-test.xml -f 3`` ``validator -i path/to/ebu-tt-d-file-to-test.xml -f D`` + +``validator -i path/to/ebu-tt-d-file-to-test.xml -f D --hrm`` diff --git a/docs/source/validation_framework.rst b/docs/source/validation_framework.rst index 677f6b1b4..0b73d842f 100644 --- a/docs/source/validation_framework.rst +++ b/docs/source/validation_framework.rst @@ -146,3 +146,20 @@ by using the context manager class and instead of the context being passed around as a parameter among functions the binding classes call the :py:func:`ebu_tt_live.bindings.pyxb_utils.get_xml_parsing_context` function to gain access to the parsing context object. + + +Validation outside document objects +=================================== + +When constraints beyond the document specification need to be validated, +validation code can be written outside the document and bindings objects themselves. + +IMSC-HRM validation +------------------- + +The :py:class:`ebu_tt_live.scripts.imscHrmValidator` class is an example +of such out-of-document validation. It provides a single +:py:func:`ebu_tt_live.scripts.imscHrmValidator.validate` method that +processes the provided validated EBU-TT-D document, according to the +`IMSC-HRM `_ algorithm, +and returns true or false as appropriate. diff --git a/ebu_tt_live/adapters/document_data.py b/ebu_tt_live/adapters/document_data.py index 34e7caed0..05543b8f5 100644 --- a/ebu_tt_live/adapters/document_data.py +++ b/ebu_tt_live/adapters/document_data.py @@ -73,7 +73,11 @@ class XMLtoEBUTTDAdapter(IDocumentDataAdapter): _provides = EBUTTDDocument def convert_data(self, data, **kwargs): - return EBUTTDDocument.create_from_xml(data), kwargs + doc = EBUTTDDocument.create_from_xml(data) + kwargs.update(dict( + raw_xml=data + )) + return doc, kwargs class EBUTTDtoXMLAdapter(IDocumentDataAdapter): diff --git a/ebu_tt_live/adapters/test/test_data/testEbuttd.xml b/ebu_tt_live/adapters/test/test_data/testEbuttd.xml new file mode 100644 index 000000000..8061a303a --- /dev/null +++ b/ebu_tt_live/adapters/test/test_data/testEbuttd.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + It only took me six days. + + + + diff --git a/ebu_tt_live/adapters/test/test_document_data_adapters.py b/ebu_tt_live/adapters/test/test_document_data_adapters.py index d3b593bb8..c06a214f6 100644 --- a/ebu_tt_live/adapters/test/test_document_data_adapters.py +++ b/ebu_tt_live/adapters/test/test_document_data_adapters.py @@ -136,11 +136,49 @@ def test_sequence_id_mismatch(self): class TestXMLtoEBUTTDAdapter(TestCase): - _output_type = documents.EBUTTDDocument _adapter_class = document_data.XMLtoEBUTTDAdapter - _expected_keys = [] + _test_xml_file = 'testEbuttd.xml' + _test_data_dir_path = os.path.join(os.path.dirname(__file__), 'test_data') + _test_xml_path = os.path.join(_test_data_dir_path, _test_xml_file) + _output_type = documents.EBUTTDDocument + _expected_keys = [ + 'raw_xml' + ] + instance = None + + def setUp(self): + self.instance = self._adapter_class() + self.assertIsInstance(self.instance, IDocumentDataAdapter) - # TODO: Finish this once we have EBUTT-D parsing + def _assert_output_type(self, result): + self.assertIsInstance(result, self._output_type) + + def _assert_kwargs_passtrough(self, result_kwargs, expected_keys): + self.assertEqual(set(result_kwargs.keys()), set(expected_keys)) + + def _get_xml(self): + with open(self._test_xml_path, 'r') as xml_file: + xml_data = xml_file.read() + return xml_data + + def _get_input(self): + return self._get_xml() + + def test_success(self): + expected_keys = [] + expected_keys.extend(self._expected_keys) + result, res_kwargs = self.instance.convert_data(self._get_input()) + self._assert_output_type(result) + self._assert_kwargs_passtrough(res_kwargs, expected_keys) + + def test_kwargs_passthrough(self): + in_kwargs = { + 'foo': 'bar' + } + expected_keys = ['foo'] + expected_keys.extend(self._expected_keys) + result, res_kwargs = self.instance.convert_data(self._get_input(), **in_kwargs) + self._assert_kwargs_passtrough(res_kwargs, expected_keys) class TestEBUTT3toXMLAdapter(TestXMLtoEBUTT3Adapter): @@ -164,20 +202,18 @@ def test_sequence_id_match(self): pass -class TestEBUTTDtoXMLAdapter(TestEBUTT3toXMLAdapter): +class TestEBUTTDtoXMLAdapter(TestXMLtoEBUTTDAdapter): + _output_type = six.text_type _adapter_class = document_data.EBUTTDtoXMLAdapter _expected_keys = [] + def _get_input(self): + return documents.EBUTTDDocument.create_from_xml(self._get_xml()) + def _get_input(self): input_doc = documents.EBUTTDDocument(lang='en-GB') return input_doc - def test_sequence_id_mismatch(self): - pass - - def test_sequence_id_match(self): - pass - class TestEBUTT3toEBUTTDAdapter(TestXMLtoEBUTT3Adapter): _adapter_class = document_data.EBUTT3toEBUTTDAdapter diff --git a/ebu_tt_live/bindings/__init__.py b/ebu_tt_live/bindings/__init__.py index 9be639faf..de844c398 100644 --- a/ebu_tt_live/bindings/__init__.py +++ b/ebu_tt_live/bindings/__init__.py @@ -1539,6 +1539,11 @@ def _validateBinding_vx(self): super(d_tt_type, self)._validateBinding_vx() + def get_timing_type(self, timedelta_in): + if self.timeBase == 'media': + return ebuttdt.FullClockTimingType(timedelta_in) + else: + log.error('d_tt_type.get_timing_type() where self.timeBase == {}'.format(self.timeBase)) raw.d_tt_type._SetSupersedingClass(d_tt_type) @@ -1942,6 +1947,10 @@ def _semantic_before_traversal( parent_binding=None): self._semantic_preprocess_timing( dataset=dataset, element_content=element_content) + self._semantic_collect_applicable_styles( + dataset=dataset, + style_type=style_type, + parent_binding=parent_binding) def _semantic_after_traversal( self, @@ -2041,6 +2050,9 @@ def _validateBinding_vx(self): raw.layout: layout, raw.body_type: body_type, }, + 'ebuttd': { + raw.d_tt_type: d_tt_type, + }, } diff --git a/ebu_tt_live/bindings/_ebuttdt.py b/ebu_tt_live/bindings/_ebuttdt.py index 000f61fd8..474d7cf2c 100644 --- a/ebu_tt_live/bindings/_ebuttdt.py +++ b/ebu_tt_live/bindings/_ebuttdt.py @@ -68,8 +68,11 @@ def _ConvertArguments_vx(cls, args, kw): context = get_xml_parsing_context() if context is not None: # This means we are in XML parsing context. There should be a timeBase and a timing_attribute_name in the - # context object. - time_base = context['timeBase'] + # context object. But if there's no timeBase, in the context + # of EBU-TT-D, we will assume media. Some files in the wild + # trigger this behaviour, for reasons not yet identified, i.e. + # we somehow get here without having a timeBase context set. + time_base = context.get('timeBase', 'media') # It is possible for a timing type to exist as the value of an element not an attribute, # in which case no timing_attribute_name is in the context; in that case don't attempt # to validate the data against a timebase. At the moment this only affects the @@ -611,6 +614,9 @@ def _do_eq(self, other): def __eq__(self, other): return self._do_eq(other) + + def __hash__(self): + return hash((self.horizontal, self.vertical)) ebuttdt_raw.cellFontSizeType._SetSupersedingClass(CellFontSizeType) diff --git a/ebu_tt_live/bindings/pyxb_utils.py b/ebu_tt_live/bindings/pyxb_utils.py index 2fcfba8cb..8bcfa627d 100644 --- a/ebu_tt_live/bindings/pyxb_utils.py +++ b/ebu_tt_live/bindings/pyxb_utils.py @@ -23,7 +23,7 @@ def get_xml_parsing_context(): into account the timeBase attribute on the tt element. In that case when the timeBase element is encountered by the parser is is added to the parsing context object to help PyXB make the right type in the timingType union. - :return: dict that is te parsing context for the currently running parser + :return: dict that is the parsing context for the currently running parser :return: None if not in parsing mode """ log.debug('Accessing xml_parsing_context: {}'.format(__xml_parsing_context)) diff --git a/ebu_tt_live/documents/ebuttd.py b/ebu_tt_live/documents/ebuttd.py index ae2ebb84b..5cd3889cc 100644 --- a/ebu_tt_live/documents/ebuttd.py +++ b/ebu_tt_live/documents/ebuttd.py @@ -18,6 +18,7 @@ class EBUTTDDocument(SubtitleDocument, TimelineUtilMixin): _encoding = 'UTF-8' def __init__(self, lang): + self.load_types_for_document() self._ebuttd_content = bindings.ttd( timeBase='media', head=bindings.d_head_type( @@ -46,13 +47,23 @@ def validate(self): document=self ) + @classmethod + def load_types_for_document(cls): + bindings.load_types_for_document('ebuttd') + @classmethod def create_from_xml(cls, xml): # NOTE: This is a workaround to make the bindings accept separate root element identities # for the same name. tt comes in but we rename it to ttd to make the xsd validate. + cls.load_types_for_document() xml_dom = minidom.parseString(xml) - if xml_dom.documentElement.tagName == 'tt': - xml_dom.documentElement.tagName = 'ttd' + if xml_dom.documentElement.namespaceURI == 'http://www.w3.org/ns/ttml': + if xml_dom.documentElement.prefix is not None and \ + xml_dom.documentElement.prefix != '' and \ + xml_dom.documentElement.tagName == xml_dom.documentElement.prefix + ':tt': + xml_dom.documentElement.tagName = xml_dom.documentElement.prefix + ':ttd' + elif xml_dom.documentElement.tagName == 'tt': + xml_dom.documentElement.tagName = 'ttd' instance = cls.create_from_raw_binding( binding=bindings.CreateFromDOM( xml_dom @@ -62,6 +73,7 @@ def create_from_xml(cls, xml): @classmethod def create_from_raw_binding(cls, binding): + cls.load_types_for_document() instance = cls.__new__(cls) instance._ebuttd_content = binding return instance diff --git a/ebu_tt_live/gen_uax24.py b/ebu_tt_live/gen_uax24.py new file mode 100644 index 000000000..dccd71ec4 --- /dev/null +++ b/ebu_tt_live/gen_uax24.py @@ -0,0 +1,116 @@ +"""Process the UAX24 scripts at https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt +to generate a Python equivalent. + +For example a command like: +python ebu_tt_live/gen_uax24.py -scriptFile uax24scripts.txt -outFile ebu_tt_live/uax24.py + +will generate a Python file that specifies script lists that can be queried. +""" + +import argparse +import sys +from csv import reader + +LIST_SUFFIX='_list' +TRIPLE_QUOTE='"""' +SCRIPTS_TO_LIST={ + 'Common': [], + 'Latin': [], + 'Greek': [], + 'Cyrillic': [], + 'Hebrew': [], + 'Han': [], + 'Katakana': [], + 'Hiragana': [], + 'Bopomofo': [], + 'Hangul': [], +} + +# https://stackoverflow.com/questions/14158868/python-skip-comment-lines-marked-with-in-csv-dictreader +def decomment(csvfile): + for row in csvfile: + raw = row.split('#')[0].strip() + if raw: yield raw + +def writeComments(outFile): + outFile.write(TRIPLE_QUOTE) + outFile.write( + 'Utility for discovering which UAX24 script a given character code is in,\n' + 'useful for example in computing the copy or render times in the IMSC-HRM.\n' + '\n' + 'Auto-generated from UAX24 Scripts.txt using gen_uax24.py\n') + outFile.write(TRIPLE_QUOTE) + outFile.write('\n') + return + +def writeFuncs(outFile): + outFile.write( + 'def lr(a, b):\n' + ' return list(range(a, b + 1))\n' + '\n') + return + +def genLists(csv_reader): + for row in csv_reader: + scr = row[1].strip().split(' ', maxsplit=1)[0] + if scr in SCRIPTS_TO_LIST: + SCRIPTS_TO_LIST[scr].append(row[0].strip()) + return + +def charOrRange(char_code: str) -> str: + range_indicator = char_code.find('..') + if range_indicator != -1: + return '*lr(0x{}, 0x{})'.format( + char_code[0:range_indicator], + char_code[range_indicator+2:] # assume already stripped of trailing spaces + ) + else: + return '0x{}'.format(char_code) + +def writeLists(outFile): + for script, char_codes in SCRIPTS_TO_LIST.items(): + outFile.write('\n{}{} = [\n'.format(script, LIST_SUFFIX)) + for char_code in char_codes: + outFile.write(' {},\n'.format( + charOrRange(char_code) + )) + outFile.write(']\n') + return + +def generateUax24(args) -> int: + csv_reader = reader(decomment(args.scriptFile), delimiter=';', skipinitialspace=True) + outFile = args.outFile + writeComments(outFile) + writeFuncs(outFile) + genLists(csv_reader) + writeLists(outFile) + + return 1 + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + '-scriptFile', + type=argparse.FileType('rt'), + required=True, + help='UAX24 Scripts file', + action='store') + + parser.add_argument( + '-outFile', + type=argparse.FileType('wt'), + default=sys.stdout, + nargs='?', + help='Location to write the python file representing the scripts', + action='store') + + parser.set_defaults(func=generateUax24) + + args = parser.parse_args() + return args.func(args) + + +if __name__ == "__main__": + # execute only if run as a script + main() diff --git a/ebu_tt_live/scripts/imsc_hrm_validator.py b/ebu_tt_live/scripts/imsc_hrm_validator.py new file mode 100644 index 000000000..7892ace18 --- /dev/null +++ b/ebu_tt_live/scripts/imsc_hrm_validator.py @@ -0,0 +1,423 @@ +from collections import namedtuple +from datetime import timedelta +from ebu_tt_live.documents import EBUTTDDocument +from ebu_tt_live.bindings import d_p_type, d_span_type +from ebu_tt_live.bindings._ebuttdt import CellFontSizeType, PercentageOriginType, PercentageExtentType, rgbHexColorType, rgbaHexColorType, namedColorType, named_color_to_rgba +from pyxb.binding.basis import NonElementContent, ElementContent +import ebu_tt_live.uax24 as uax24 +import logging + + +log = logging.getLogger('imsc_hrm_validator') +log.setLevel(logging.DEBUG) + +glyphStyles = [ + 'color', + 'fontFamily', + 'fontSize', + 'fontStyle', + 'fontWeight', + 'textDecoration', + # 'textOutline', # textOutline is not permitted in EBU-TT-D + # 'textShadow', # textShadow is not permitted in EBU-TT_D +] + +glyph_tuple_fieldnames = glyphStyles[:] # copy not reference +glyph_tuple_fieldnames.append('characterCode') +glyph = namedtuple('glyph', glyph_tuple_fieldnames) # Tuple to represent a Glyph + + +GCpy12_chars = set( + uax24.Latin_list + + uax24.Greek_list + + uax24.Cyrillic_list + + uax24.Hebrew_list + + uax24.Common_list) + +Ren0_6_chars = set( + uax24.Han_list + + uax24.Katakana_list + + uax24.Hiragana_list + + uax24.Bopomofo_list + + uax24.Hangul_list +) + +class imscHrmValidator: + """Class for validating an EBU-TT-D document against the IMSC-HRM. + + IMSC-HRM is specified at https://www.w3.org/TR/imsc-hrm/ . + """ + + # IMSC HRM constants + _ipd = 1 + _BDraw = 12 + + # Things we need + _glyphCache = set() # Array of glyphs + _doc = None + _p_to_parent_div_with_background_color = {} + # _any_region_has_background_color_and_show_background_always = False + _region_ids_with_always_background = None + _cell_height = float(1/15) + + def _getIsdTimes(self) -> list: + """Get the set of ISD times.""" + include_extra_times = [] + if len(self._region_ids_with_always_background) > 0: + # in EBU-TT-D region elements cannot have begin or end times, + # so if they have an opaque background colour and + # showBackground="always" (the default) then there must + # be an ISD beginning at time zero, but the document's + # timeline may exclude it: force it to be present. + include_extra_times.append(timedelta(seconds=0)) + isd_times = sorted(set([*include_extra_times, *[t.when for t in self._doc.timeline]])) + return isd_times + + def _getIsd(self, isd_begin_time, isd_end_time): + """Get the elements in the ISD for a particular time""" + return self._doc.lookup_range_on_timeline(isd_begin_time, isd_end_time if isd_end_time is not None else None) + + def _isEmptyISD(self, isd) -> bool: + """Determine if the ISD is empty or renders some text.""" + if len(self._region_ids_with_always_background) > 0: + return False + + empty = True + for e in isd: + for c in e.orderedContent(): + v = c.value + if isinstance(c, NonElementContent): + region = e._validated_region + if region is None: + region = e._inherited_region + if region is None: + continue # prune content with no associated region + empty = (len(v) == 0) + elif isinstance(v, d_span_type): + region = e._validated_region + if region is None: + region = e._inherited_region + if region is None: + continue # prune content with no associated region + + for sc in v.orderedContent(): + if isinstance(sc, NonElementContent): + empty = (len(sc.value) == 0) + if not empty: + break + + if not empty: + break + + return empty + + def _hasBackgroundColor(self, element) -> bool: + rv = False + + backgroundColor = element.computed_style.backgroundColor + if backgroundColor is not None: + if isinstance(backgroundColor, namedColorType): + backgroundColor = rgbaHexColorType(named_color_to_rgba(backgroundColor)) + if isinstance(backgroundColor, rgbaHexColorType) and len(backgroundColor) == 9: + opacity = backgroundColor[-2:] + rv = (opacity != '00') + elif isinstance(backgroundColor, rgbHexColorType): + rv = True + + log.debug('backgroundColor {} has opacity {}'.format(backgroundColor, rv)) + return rv + + def _preprocess_regions(self): + """Check if any region has an opaque backgroundColor and showBackground set to always. + + If this is the case, then no ISD is ever empty, because every ISD has to paint that + region's background. + """ + # self._any_region_has_background_color_and_show_background_always = False + self._region_ids_with_always_background = set() + for region in self._doc.binding.head.layout.region: + if self._hasBackgroundColor(region) and \ + (region.showBackground is None or region.showBackground=="always"): + self._any_region_has_background_color_and_show_background_always = True + log.debug( + 'Found region {} with opaque backgroundColor and showBackground="always"'.format( + region.id)) + self._region_ids_with_always_background.add(region.id) + + def _preprocess_divs(self): + """For all divs with an opaque backgroundColor, record their p children + + This is so we can count divs in NGP: the timeline only gives us p elements.""" + + self._p_to_parent_div_with_background_color.clear() + for div in self._doc.binding.body.div: + if (self._hasBackgroundColor(div)): + for p in div.p: + # div elements might not have an id, so we need to map to the object + self._p_to_parent_div_with_background_color[p.id] = div + + def _drawingAreaS(self, isd) -> float: + """Calculate the drawing area of the active regions in the ISD. + + We need to sum the product of region area and total number of elements + in the tree rooted at each region including body, div, p and span that + have a tts:backgroundColor whose opacity is not zero. + + A feature of EBU-TT-D is that divs cannot contain divs and spans cannot + contain spans, so we can check the body's style once, for each region, + and need to traverse up to the div parent of each p to count the unique + divs too. We can ignore character content children because by definition + they cannot set the backgroundColor attribute, + whose default has opacity 0.""" + + region_set = set() + region_to_element_count = {} + region_to_div_map = {} # will map all the divs selected into each region + body_has_background_color = 0 + body = self._doc.binding.body + + if self._hasBackgroundColor(body): + body_has_background_color = 1 + log.debug('body has background color') + + for region_id in self._region_ids_with_always_background: + region_set.add(self._doc.get_element_by_id(region_id)) + region_to_element_count[region_id] = 1 + body_has_background_color + region_to_div_map[region_id] = set() + + for e in isd: + if isinstance(e, d_p_type): + log.debug('processing p id {}'.format(e.id)) + region = e._validated_region + if region is None: + region = e._inherited_region + if region is None: + # It's possible to have a p with no associated region. + # If this happens, then skip it as though it were pruned. + continue + region_set.add(region) + if region.id not in region_to_element_count: + log.debug('adding new region id {}'.format(region.id)) + region_to_element_count[region.id] = body_has_background_color + region_to_div_map[region.id] = set() + if self._hasBackgroundColor(region): + region_to_element_count[region.id] += 1 + log.debug('region has a background color') + if self._hasBackgroundColor(e): + region_to_element_count[region.id] += 1 + log.debug('p has a background color') + # Make sure we count the div for this region if it has a backgroundColor + if e.id in self._p_to_parent_div_with_background_color: + log.debug('this p is in a div with a background color') + region_to_div_map[region.id].add(self._p_to_parent_div_with_background_color[e.id]) + log.debug('processing {} span children of p id {}'.format(len(e.span), e.id)) + for span_child in e.span: + if self._hasBackgroundColor(span_child): + log.debug('span has background color') + region_to_element_count[region.id] += 1 + else: + log.debug('span with no background color') + + for rid in region_to_element_count.keys(): + region_to_element_count[rid] += len(region_to_div_map[rid]) + log.debug(region_to_element_count) + + PAINT = 0.0 + for region in region_set: + extent = PercentageExtentType(region.extent) + + NSIZE = extent.horizontal/100 * extent.vertical/100 + PAINT += NSIZE * region_to_element_count[region.id] + log.debug('Region {} has NSIZE = {} and NBG = {}'.format( + region.id, + NSIZE, + region_to_element_count[region.id] + )) + + S = 1 + PAINT # CLEAR = 1 + log.debug('S = {}, PAINT = {}'.format(S, PAINT)) + + return S + + def _getGlyphStyles(self, e) -> dict: + """ Get the set of glyph styles from the provided element.""" + + rv = dict.fromkeys(glyphStyles) + cs = e.computed_style + for style_attr in glyphStyles: + rv[style_attr] = getattr(cs, style_attr) + return rv + + def _calc_NRGA(self, fontSize) -> float: + if isinstance(fontSize, CellFontSizeType): + return (fontSize.vertical * self._cell_height)**2 + else: + log.error('unexpected fontSize {} of type {}'.format( + fontSize, + type(fontSize).__name__)) + breakpoint() + + return 100 # silly big number + + def _GCpy(self, char) -> float: + log.debug('char {} is {}in GCpy12_chars'.format( + char, + '' if char in GCpy12_chars else 'not ' + )) + return 12 if char in GCpy12_chars else 3 + + def _Ren(self, char) -> float: + log.debug('char {} is {}in Ren0_6_chars'.format( + char, + '' if char in Ren0_6_chars else 'not ' + )) + return 0.6 if char in Ren0_6_chars else 1.2 + + def _checkGlyphCacheSize(self) -> bool: + """Compute sum of NRGA over all glyphs and check it is not largher than NGBS""" + NGBS = 1 + NRGA_SUM = 0 + for g in self._glyphCache: + NRGA_SUM += self._calc_NRGA(g.fontSize) + + log.debug('Glyph cache size check: NRGA_SUM = {}'.format(NRGA_SUM)) + + return (NRGA_SUM <= NGBS) + + def _textDuration(self, isd) -> float: + """Compute the painting duration for the text in the ISD. + + We will assume that the glyph cache is in a good state to start.""" + this_text = '' + this_style = dict.fromkeys(glyphStyles) + DURT = 0 + + next_glyph_cache = set() + + log.debug('Calculating textDuration for isd') + + # breakpoint() + for p in isd: + # should be a p + if isinstance(p, d_p_type): + log.debug('Processing p id={}'.format(p.id)) + for poci in p.orderedContent(): + if isinstance(poci, NonElementContent): + log.debug('processing character content child of p') + this_text = poci.value + this_style = self._getGlyphStyles(p) + elif isinstance(poci.value, d_span_type): + log.debug('processing a span') + this_style = self._getGlyphStyles(poci.value) + this_text = '' + for soci in poci.value.orderedContent(): + if isinstance(soci, NonElementContent): + this_text += soci.value + else: + continue + + log.debug('this_text: {}'.format(this_text)) + log.debug('this_style: {} '.format(this_style)) + this_NRGA = self._calc_NRGA(this_style['fontSize']) + log.debug('this_NRGA = {}'.format(this_NRGA)) + # iterate through text and style processing glyphs + for char in this_text: + charCode = ord(char) + # there must be a better way to make our glyph tuple + # than the next 3 lines, but I haven't found it. + tsc = this_style.copy() + tsc.update({'characterCode': charCode}) + this_glyph = glyph(**tsc) + if this_glyph in self._glyphCache: + log.debug('glyph for {} ({}) is in the glyph cache'.format(char, charCode)) + DURT += this_NRGA / self._GCpy(charCode) + next_glyph_cache.add(this_glyph) + elif this_glyph in next_glyph_cache: + log.debug('glyph for {} ({}) already rendered in this ISD'.format(char, charCode)) + DURT += this_NRGA / self._GCpy(charCode) + else: + log.debug('rendering glyph for {} ({})'.format(char, charCode)) + DURT += this_NRGA / self._Ren(charCode) + next_glyph_cache.add(this_glyph) + else: # not a p + log.warning('Found non p element type {}'.format(type(p).__name__)) + + self._glyphCache = next_glyph_cache + + log.debug('Returning DURT = {}'.format(DURT)) + return DURT + + def _paintingDuration(self, isd) -> float: + """Compute the total painting duration for the ISD.""" + return self._drawingAreaS(isd)/self._BDraw + self._textDuration(isd) + + def _setup(self, doc: EBUTTDDocument) -> None: + self._glyphCache = set() + self._doc = doc + self._preprocess_divs() + self._preprocess_regions() + if doc.binding.cellResolution is not None: + self._cell_height = 1/doc.binding.cellResolution.vertical + else: + self._cell_height = 1/15 + log.debug('Cell height = {}'.format(self._cell_height)) + + def validate(self, doc: EBUTTDDocument) -> bool: + """Validate the EBU-TT-D document against the IMSC-HRM. + + :param doc EBUTTDDocument: a validated EBUTTDDocument object + + :return: True if the document is valid, False otherwise. + """ + self._setup(doc) + + last_nonzero_presentation_time = timedelta(seconds=0 - self._ipd) + + rv = True + + timeline = self._getIsdTimes(); + log.debug('timeline: {}'.format(timeline)) + + timeline_entries = len(timeline) + + for timeline_idx in range(0, timeline_entries): + isd = self._getIsd( + timeline[timeline_idx], + timeline[timeline_idx + 1] if (timeline_idx + 1) < timeline_entries else None) + + if self._isEmptyISD(isd): + log.debug('ISD beginning at {} is empty'.format(timeline[timeline_idx])) + continue + else: + log.debug('ISD beginning at {} is not empty'.format(timeline[timeline_idx])) + + # Work out how long we have to draw this + available_draw_time = \ + (timeline[timeline_idx] - last_nonzero_presentation_time).total_seconds() + if available_draw_time > self._ipd: + available_draw_time = self._ipd + + # remember for next time round the loop + last_nonzero_presentation_time = timeline[timeline_idx] + + painting_dur = self._paintingDuration(isd) + log.debug( + 'ISD beginning at {} has painting duration {} and available time {}'.format( + timeline[timeline_idx], + painting_dur, + available_draw_time + )) + if painting_dur > available_draw_time: + rv = False + log.error('ISD at {} fails validation'.format(timeline[timeline_idx])) + + if not self._checkGlyphCacheSize(): + rv = False + log.error('Glyph cache total NRGA is larger than NGBS') + + if rv: + log.info('Document is valid') + else: + log.error('Document is not valid') + + return rv diff --git a/ebu_tt_live/scripts/test/data/document_ebuttd.xml b/ebu_tt_live/scripts/test/data/document_ebuttd.xml new file mode 100644 index 000000000..a407865b4 --- /dev/null +++ b/ebu_tt_live/scripts/test/data/document_ebuttd.xml @@ -0,0 +1,31 @@ + + + + + +