From 1712253957803d5d4976ec73c99b18d50d0d600a Mon Sep 17 00:00:00 2001 From: Mark Gillard Date: Sat, 11 Sep 2021 10:54:13 +0300 Subject: [PATCH] fixed a crash during html post-processoing also: - fixed `implementation_headers` not working when paths use backslashes - added warnings when `implementation_headers` doesn't match anything - added `sources.ignore` --- poxy/data/version.txt | 2 +- poxy/fixers.py | 1 + poxy/main.py | 2 +- poxy/project.py | 20 ++++++++--- poxy/run.py | 80 ++++++++++++++++++++++++++++++------------- poxy/soup.py | 6 ++-- 6 files changed, 77 insertions(+), 34 deletions(-) diff --git a/poxy/data/version.txt b/poxy/data/version.txt index 0bfccb0..8f0916f 100644 --- a/poxy/data/version.txt +++ b/poxy/data/version.txt @@ -1 +1 @@ -0.4.5 +0.5.0 diff --git a/poxy/fixers.py b/poxy/fixers.py index bbda1cd..ff70bc7 100644 --- a/poxy/fixers.py +++ b/poxy/fixers.py @@ -680,6 +680,7 @@ def __call__(self, doc, context): strings = [] for tag in tags: strings = strings + soup.string_descendants(tag, lambda t: soup.find_parent(t, 'a', tag) is None) + strings = [s for s in strings if s.parent is not None] for expr, uri in context.autolinks: if uri == doc.path.name: # don't create unnecessary self-links continue diff --git a/poxy/main.py b/poxy/main.py index 7f53b70..dfb3191 100644 --- a/poxy/main.py +++ b/poxy/main.py @@ -57,7 +57,7 @@ def _run(invoker=True): help=r'path to poxy.toml or a directory containing it (default: %(default)s)' ) args.add_argument( - r'-v', r'--verbose', + r'-v', r'--verbose', action=r'store_true', help=r"enable very noisy diagnostic output" ) diff --git a/poxy/project.py b/poxy/project.py index ffdaf95..03b0ddf 100644 --- a/poxy/project.py +++ b/poxy/project.py @@ -740,6 +740,7 @@ class _Inputs(object): schema = { Optional(r'paths') : ValueOrArray(str, name=r'paths'), Optional(r'recursive_paths') : ValueOrArray(str, name=r'recursive_paths'), + Optional(r'ignore') : ValueOrArray(str, name=r'ignore'), } def __init__(self, config, key, input_dir, additional_inputs=None, additional_recursive_inputs=None): @@ -761,7 +762,7 @@ def __init__(self, config, key, input_dir, additional_inputs=None, additional_re if config is not None and key in config: paths = paths + [p for p in coerce_collection(config[key])] paths = [p for p in paths if p] - paths = [str(p).strip() for p in paths] + paths = [str(p).strip().replace('\\', '/') for p in paths] paths = [Path(p) for p in paths if p] paths = [Path(input_dir, p) if not p.is_absolute() else p for p in paths] paths = [p.resolve() for p in paths] @@ -774,6 +775,15 @@ def __init__(self, config, key, input_dir, additional_inputs=None, additional_re if recursive and path.is_dir(): for subdir in enum_subdirs(path, filter=lambda p: not p.name.startswith(r'.'), recursive=True): all_paths.add(subdir) + + ignores = set() + if config is not None and r'ignore' in config: + for s in coerce_collection(config[r'ignore']): + ignore = s.strip() + ignores = [re.compile(i) for i in ignores] + for ignore in ignores: + all_paths = [p for p in all_paths if not ignore.search(str(p))] + self.paths = list(all_paths) self.paths.sort() @@ -811,7 +821,7 @@ class _Sources(_FilteredInputs): schema = combine_dicts(_FilteredInputs.schema, { Optional(r'strip_paths') : ValueOrArray(str, name=r'strip_paths'), Optional(r'strip_includes') : ValueOrArray(str, name=r'strip_includes'), - Optional(r'extract_all') : bool, + Optional(r'extract_all') : bool }) def __init__(self, config, key, input_dir, additional_inputs=None, additional_recursive_inputs=None): @@ -1447,12 +1457,12 @@ def __init__(self, config_path, output_dir, threads, cleanup, verbose, mcss_dir, self.implementation_headers = [] if 'implementation_headers' in config: for k, v in config['implementation_headers'].items(): - header = k.strip() + header = k.strip().replace('\\', '/') impls = coerce_collection(v) - impls = [i.strip() for i in impls] + impls = [i.strip().replace('\\', '/') for i in impls] impls = [i for i in impls if i] if header and impls: - self.implementation_headers .append((header, impls)) + self.implementation_headers.append((header, impls)) self.implementation_headers = tuple(self.implementation_headers) self.verbose_value(r'Context.implementation_headers', self.implementation_headers) diff --git a/poxy/run.py b/poxy/run.py index 58780ab..c5559cc 100644 --- a/poxy/run.py +++ b/poxy/run.py @@ -421,6 +421,8 @@ def _postprocess_xml(context): implementation_header_mappings = None implementation_header_innernamespaces = None implementation_header_sectiondefs = None + implementation_header_unused_keys = None + implementation_header_unused_values = None if context.implementation_headers: implementation_header_data = [ ( @@ -431,6 +433,13 @@ def _postprocess_xml(context): ) for hp, impl in context.implementation_headers ] + implementation_header_unused_keys = set() + for hp, impl in context.implementation_headers: + implementation_header_unused_keys.add(hp) + implementation_header_unused_values = dict() + for hdata in implementation_header_data: + for (ip, ifn, iid) in hdata[3]: + implementation_header_unused_values[iid] = (ip, hdata[0]) implementation_header_mappings = dict() implementation_header_innernamespaces = dict() implementation_header_sectiondefs = dict() @@ -440,23 +449,23 @@ def _postprocess_xml(context): for (ip, ifn, iid) in hdata[3]: implementation_header_mappings[iid] = hdata + # process xml files if 1: # pre-pass to delete junk files if 1: - # delete the new Doxyfile.xml (https://github.com/doxygen/doxygen/pull/8463) + # delete Doxyfile.xml (https://github.com/doxygen/doxygen/pull/8463) # (it breaks m.css otherwise) if not context.xml_only: delete_file(Path(context.xml_dir, r'Doxyfile.xml'), logger=context.verbose_logger) # 'file' entries for markdown and dox files - dox_files = (r'.dox', r'.md') - dox_files = [rf'*{doxygen.mangle_name(ext)}.xml' for ext in dox_files] + dox_files = [rf'*{doxygen.mangle_name(ext)}.xml' for ext in (r'.dox', r'.md')] dox_files.append(r'md_home.xml') for xml_file in get_all_files(context.xml_dir, any=dox_files): delete_file(xml_file, logger=context.verbose_logger) - # 'dir' entries which contain nothing + # 'dir' entries for empty directories deleted = True while deleted: deleted = False @@ -697,12 +706,15 @@ def _postprocess_xml(context): # rip the good bits out of implementation headers if context.implementation_headers: - if compounddef.get(r'id') in implementation_header_mappings: - hid = implementation_header_mappings[compounddef.get("id")][2] + iid = compounddef.get(r'id') + if iid in implementation_header_mappings: + hid = implementation_header_mappings[iid][2] innernamespaces = compounddef.findall(r'innernamespace') if innernamespaces: implementation_header_innernamespaces[hid] = implementation_header_innernamespaces[hid] + innernamespaces extracted_implementation = True + if iid in implementation_header_unused_values: + del implementation_header_unused_values[iid] for tag in innernamespaces: compounddef.remove(tag) changed = True @@ -710,6 +722,8 @@ def _postprocess_xml(context): if sectiondefs: implementation_header_sectiondefs[hid] = implementation_header_sectiondefs[hid] + sectiondefs extracted_implementation = True + if iid in implementation_header_unused_values: + del implementation_header_unused_values[iid] for tag in sectiondefs: compounddef.remove(tag) changed = True @@ -787,8 +801,18 @@ def _postprocess_xml(context): changed = True if changed: + implementation_header_unused_keys.remove(hp) write_xml_to_file(xml, xml_file) + # sanity-check implementation header state + if implementation_header_unused_keys: + for key in implementation_header_unused_keys: + context.warning(rf"implementation_header: nothing extracted for '{key}'") + if implementation_header_unused_values: + for iid, idata in implementation_header_unused_values.items(): + context.warning(rf"implementation_header: nothing extracted from '{idata[0]}' for '{idata[1]}'") + + # delete the impl header xml files if 1 and context.implementation_headers: for hdata in implementation_header_data: @@ -836,25 +860,33 @@ def _postprocess_html_file(path, context=None): context.verbose(rf'Post-processing {path}') html_changed = False - if html_fixers: - doc = soup.HTMLDocument(path, logger=context.verbose_logger) - for fix in html_fixers: - if fix(doc, context): - doc.smooth() - html_changed = True - if html_changed: - doc.flush() - plain_text_changed = False - if plain_text_fixers: - doc = [ read_all_text_from_file(path, logger=context.verbose_logger) ] - for fix in plain_text_fixers: - if fix(doc, context): - plain_text_changed = True - if plain_text_changed: - context.verbose(rf'Writing {path}') - with open(path, 'w', encoding='utf-8', newline='\n') as f: - f.write(doc[0]) + + try: + if html_fixers: + doc = soup.HTMLDocument(path, logger=context.verbose_logger) + for fix in html_fixers: + if fix(doc, context): + doc.smooth() + html_changed = True + if html_changed: + doc.flush() + + if plain_text_fixers: + doc = [ read_all_text_from_file(path, logger=context.verbose_logger) ] + for fix in plain_text_fixers: + if fix(doc, context): + plain_text_changed = True + if plain_text_changed: + context.verbose(rf'Writing {path}') + with open(path, 'w', encoding='utf-8', newline='\n') as f: + f.write(doc[0]) + except Exception as e: + context.info(rf'{type(e).__name__} raised while post-processing {path}') + raise + except: + context.info(rf'Error occurred while post-processing {path}') + raise return html_changed or plain_text_changed diff --git a/poxy/soup.py b/poxy/soup.py index 433a317..72ba446 100644 --- a/poxy/soup.py +++ b/poxy/soup.py @@ -42,12 +42,12 @@ def destroy_node(node): def replace_tag(tag, new_tag_str): + assert tag.parent is not None newTags = [] if new_tag_str: doc = bs4.BeautifulSoup(new_tag_str, 'html5lib') - if (len(doc.body.contents) > 0): - newTags = [f for f in doc.body.contents] - newTags = [f.extract() for f in newTags] + if len(doc.body.contents) > 0: + newTags = [f.extract() for f in doc.body.contents] prev = tag for newTag in newTags: prev.insert_after(newTag)