Skip to content

Commit

Permalink
fixed a crash during html post-processoing
Browse files Browse the repository at this point in the history
also:
- fixed `implementation_headers` not working when paths use backslashes
- added warnings when `implementation_headers` doesn't match anything
- added `sources.ignore`
  • Loading branch information
marzer committed Sep 11, 2021
1 parent 292cf78 commit 1712253
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 34 deletions.
2 changes: 1 addition & 1 deletion poxy/data/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.5
0.5.0
1 change: 1 addition & 0 deletions poxy/fixers.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,7 @@ def __call__(self, doc, context):
strings = []
for tag in tags:
strings = strings + soup.string_descendants(tag, lambda t: soup.find_parent(t, 'a', tag) is None)
strings = [s for s in strings if s.parent is not None]
for expr, uri in context.autolinks:
if uri == doc.path.name: # don't create unnecessary self-links
continue
Expand Down
2 changes: 1 addition & 1 deletion poxy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _run(invoker=True):
help=r'path to poxy.toml or a directory containing it (default: %(default)s)'
)
args.add_argument(
r'-v', r'--verbose',
r'-v', r'--verbose',
action=r'store_true',
help=r"enable very noisy diagnostic output"
)
Expand Down
20 changes: 15 additions & 5 deletions poxy/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,7 @@ class _Inputs(object):
schema = {
Optional(r'paths') : ValueOrArray(str, name=r'paths'),
Optional(r'recursive_paths') : ValueOrArray(str, name=r'recursive_paths'),
Optional(r'ignore') : ValueOrArray(str, name=r'ignore'),
}

def __init__(self, config, key, input_dir, additional_inputs=None, additional_recursive_inputs=None):
Expand All @@ -761,7 +762,7 @@ def __init__(self, config, key, input_dir, additional_inputs=None, additional_re
if config is not None and key in config:
paths = paths + [p for p in coerce_collection(config[key])]
paths = [p for p in paths if p]
paths = [str(p).strip() for p in paths]
paths = [str(p).strip().replace('\\', '/') for p in paths]
paths = [Path(p) for p in paths if p]
paths = [Path(input_dir, p) if not p.is_absolute() else p for p in paths]
paths = [p.resolve() for p in paths]
Expand All @@ -774,6 +775,15 @@ def __init__(self, config, key, input_dir, additional_inputs=None, additional_re
if recursive and path.is_dir():
for subdir in enum_subdirs(path, filter=lambda p: not p.name.startswith(r'.'), recursive=True):
all_paths.add(subdir)

ignores = set()
if config is not None and r'ignore' in config:
for s in coerce_collection(config[r'ignore']):
ignore = s.strip()
ignores = [re.compile(i) for i in ignores]
for ignore in ignores:
all_paths = [p for p in all_paths if not ignore.search(str(p))]

self.paths = list(all_paths)
self.paths.sort()

Expand Down Expand Up @@ -811,7 +821,7 @@ class _Sources(_FilteredInputs):
schema = combine_dicts(_FilteredInputs.schema, {
Optional(r'strip_paths') : ValueOrArray(str, name=r'strip_paths'),
Optional(r'strip_includes') : ValueOrArray(str, name=r'strip_includes'),
Optional(r'extract_all') : bool,
Optional(r'extract_all') : bool
})

def __init__(self, config, key, input_dir, additional_inputs=None, additional_recursive_inputs=None):
Expand Down Expand Up @@ -1447,12 +1457,12 @@ def __init__(self, config_path, output_dir, threads, cleanup, verbose, mcss_dir,
self.implementation_headers = []
if 'implementation_headers' in config:
for k, v in config['implementation_headers'].items():
header = k.strip()
header = k.strip().replace('\\', '/')
impls = coerce_collection(v)
impls = [i.strip() for i in impls]
impls = [i.strip().replace('\\', '/') for i in impls]
impls = [i for i in impls if i]
if header and impls:
self.implementation_headers .append((header, impls))
self.implementation_headers.append((header, impls))
self.implementation_headers = tuple(self.implementation_headers)
self.verbose_value(r'Context.implementation_headers', self.implementation_headers)

Expand Down
80 changes: 56 additions & 24 deletions poxy/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,8 @@ def _postprocess_xml(context):
implementation_header_mappings = None
implementation_header_innernamespaces = None
implementation_header_sectiondefs = None
implementation_header_unused_keys = None
implementation_header_unused_values = None
if context.implementation_headers:
implementation_header_data = [
(
Expand All @@ -431,6 +433,13 @@ def _postprocess_xml(context):
)
for hp, impl in context.implementation_headers
]
implementation_header_unused_keys = set()
for hp, impl in context.implementation_headers:
implementation_header_unused_keys.add(hp)
implementation_header_unused_values = dict()
for hdata in implementation_header_data:
for (ip, ifn, iid) in hdata[3]:
implementation_header_unused_values[iid] = (ip, hdata[0])
implementation_header_mappings = dict()
implementation_header_innernamespaces = dict()
implementation_header_sectiondefs = dict()
Expand All @@ -440,23 +449,23 @@ def _postprocess_xml(context):
for (ip, ifn, iid) in hdata[3]:
implementation_header_mappings[iid] = hdata

# process xml files
if 1:

# pre-pass to delete junk files
if 1:
# delete the new Doxyfile.xml (https://github.com/doxygen/doxygen/pull/8463)
# delete Doxyfile.xml (https://github.com/doxygen/doxygen/pull/8463)
# (it breaks m.css otherwise)
if not context.xml_only:
delete_file(Path(context.xml_dir, r'Doxyfile.xml'), logger=context.verbose_logger)

# 'file' entries for markdown and dox files
dox_files = (r'.dox', r'.md')
dox_files = [rf'*{doxygen.mangle_name(ext)}.xml' for ext in dox_files]
dox_files = [rf'*{doxygen.mangle_name(ext)}.xml' for ext in (r'.dox', r'.md')]
dox_files.append(r'md_home.xml')
for xml_file in get_all_files(context.xml_dir, any=dox_files):
delete_file(xml_file, logger=context.verbose_logger)

# 'dir' entries which contain nothing
# 'dir' entries for empty directories
deleted = True
while deleted:
deleted = False
Expand Down Expand Up @@ -697,19 +706,24 @@ def _postprocess_xml(context):

# rip the good bits out of implementation headers
if context.implementation_headers:
if compounddef.get(r'id') in implementation_header_mappings:
hid = implementation_header_mappings[compounddef.get("id")][2]
iid = compounddef.get(r'id')
if iid in implementation_header_mappings:
hid = implementation_header_mappings[iid][2]
innernamespaces = compounddef.findall(r'innernamespace')
if innernamespaces:
implementation_header_innernamespaces[hid] = implementation_header_innernamespaces[hid] + innernamespaces
extracted_implementation = True
if iid in implementation_header_unused_values:
del implementation_header_unused_values[iid]
for tag in innernamespaces:
compounddef.remove(tag)
changed = True
sectiondefs = compounddef.findall(r'sectiondef')
if sectiondefs:
implementation_header_sectiondefs[hid] = implementation_header_sectiondefs[hid] + sectiondefs
extracted_implementation = True
if iid in implementation_header_unused_values:
del implementation_header_unused_values[iid]
for tag in sectiondefs:
compounddef.remove(tag)
changed = True
Expand Down Expand Up @@ -787,8 +801,18 @@ def _postprocess_xml(context):
changed = True

if changed:
implementation_header_unused_keys.remove(hp)
write_xml_to_file(xml, xml_file)

# sanity-check implementation header state
if implementation_header_unused_keys:
for key in implementation_header_unused_keys:
context.warning(rf"implementation_header: nothing extracted for '{key}'")
if implementation_header_unused_values:
for iid, idata in implementation_header_unused_values.items():
context.warning(rf"implementation_header: nothing extracted from '{idata[0]}' for '{idata[1]}'")


# delete the impl header xml files
if 1 and context.implementation_headers:
for hdata in implementation_header_data:
Expand Down Expand Up @@ -836,25 +860,33 @@ def _postprocess_html_file(path, context=None):

context.verbose(rf'Post-processing {path}')
html_changed = False
if html_fixers:
doc = soup.HTMLDocument(path, logger=context.verbose_logger)
for fix in html_fixers:
if fix(doc, context):
doc.smooth()
html_changed = True
if html_changed:
doc.flush()

plain_text_changed = False
if plain_text_fixers:
doc = [ read_all_text_from_file(path, logger=context.verbose_logger) ]
for fix in plain_text_fixers:
if fix(doc, context):
plain_text_changed = True
if plain_text_changed:
context.verbose(rf'Writing {path}')
with open(path, 'w', encoding='utf-8', newline='\n') as f:
f.write(doc[0])

try:
if html_fixers:
doc = soup.HTMLDocument(path, logger=context.verbose_logger)
for fix in html_fixers:
if fix(doc, context):
doc.smooth()
html_changed = True
if html_changed:
doc.flush()

if plain_text_fixers:
doc = [ read_all_text_from_file(path, logger=context.verbose_logger) ]
for fix in plain_text_fixers:
if fix(doc, context):
plain_text_changed = True
if plain_text_changed:
context.verbose(rf'Writing {path}')
with open(path, 'w', encoding='utf-8', newline='\n') as f:
f.write(doc[0])
except Exception as e:
context.info(rf'{type(e).__name__} raised while post-processing {path}')
raise
except:
context.info(rf'Error occurred while post-processing {path}')
raise

return html_changed or plain_text_changed

Expand Down
6 changes: 3 additions & 3 deletions poxy/soup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ def destroy_node(node):


def replace_tag(tag, new_tag_str):
assert tag.parent is not None
newTags = []
if new_tag_str:
doc = bs4.BeautifulSoup(new_tag_str, 'html5lib')
if (len(doc.body.contents) > 0):
newTags = [f for f in doc.body.contents]
newTags = [f.extract() for f in newTags]
if len(doc.body.contents) > 0:
newTags = [f.extract() for f in doc.body.contents]
prev = tag
for newTag in newTags:
prev.insert_after(newTag)
Expand Down

0 comments on commit 1712253

Please sign in to comment.