This repository has been archived by the owner on Jan 27, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #39 from ferrocene/jh/sync-with-monorepo
Synchronize with monorepo
- Loading branch information
Showing
16 changed files
with
893 additions
and
261 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,4 @@ | |
|
||
__pycache__ | ||
*.py[co] | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
# SPDX-License-Identifier: MIT OR Apache-2.0 | ||
# SPDX-FileCopyrightText: The Ferrocene Developers | ||
|
||
# This extension automatically inserts links across the whole document pointing | ||
# to the relevant terms in the glossary, without having to manually annotate | ||
# words with :term:`Glossary Entry`. | ||
# | ||
# The extension requires no configuration, and just adding a glossary directive | ||
# is enough for the extension to work. | ||
|
||
from . import debug, lexer | ||
from .lexer import Term, MatchedTerm | ||
from dataclasses import dataclass | ||
from docutils import nodes | ||
from sphinx import addnodes | ||
from sphinx.environment.collectors import EnvironmentCollector | ||
from sphinx.transforms import SphinxTransform | ||
import sphinx | ||
import string | ||
|
||
|
||
class GlossaryCollector(EnvironmentCollector): | ||
def clear_doc(self, app, env, docname): | ||
state = State.get(env) | ||
state.terms = [item for item in state.terms if item.document != docname] | ||
|
||
def merge_other(self, app, env, docnames, other): | ||
state = State.get(env) | ||
other_state = State.get(other) | ||
for item in other_state.terms: | ||
if item.document in docnames: | ||
state.terms.append(item) | ||
|
||
def process_doc(self, app, document): | ||
state = State.get(app.env) | ||
for glossary in document.findall(addnodes.glossary): | ||
for term in glossary.findall(nodes.term): | ||
name = term.astext() | ||
state.terms.append( | ||
Term( | ||
name=name, | ||
document=app.env.docname, | ||
anchor=term["ids"][0], | ||
# If the term is only made of uppercase letters we | ||
# treat it as an abbreviation rather than a term. | ||
abbreviation=all(c in string.ascii_uppercase for c in name), | ||
) | ||
) | ||
|
||
|
||
class LinkToTermsTransform(SphinxTransform): | ||
default_priority = 100 | ||
|
||
def apply(self): | ||
state = State.get(self.env) | ||
for node in lexer.find_lexable_nodes(self.document): | ||
self.apply_to_node(state, node.node) | ||
|
||
def apply_to_node(self, state, node): | ||
lexed = list(lexer.lexer(node.astext(), state.terms)) | ||
if len(lexed) == 1 and type(lexed[0]) is str: | ||
# Do nothing if the lexed version returned the same string. | ||
pass | ||
else: | ||
container = nodes.inline() | ||
for part in lexed: | ||
if type(part) is str: | ||
container.append(nodes.Text(part)) | ||
elif type(part) is MatchedTerm: | ||
container.append(self.make_link(part)) | ||
else: | ||
raise RuntimeError("unexpected result of lexer") | ||
node.parent.replace(node, container) | ||
|
||
def make_link(self, matched): | ||
node = sphinx.util.nodes.make_refnode( | ||
self.app.builder, | ||
self.env.docname, | ||
matched.term.document, | ||
matched.term.anchor, | ||
nodes.Text(matched.text), | ||
) | ||
node["classes"].append("ferrocene-autoglossary") | ||
return node | ||
|
||
|
||
class PruneGlossaryTransform(SphinxTransform): | ||
default_priority = 500 | ||
|
||
def apply(self): | ||
glossaries = list(self.document.findall(addnodes.glossary)) | ||
if glossaries: | ||
used_terms = self.discover_used_terms() | ||
for glossary in glossaries: | ||
self.prune(glossary, used_terms) | ||
|
||
# We have to re-scan all the documents to see which terms are used, | ||
# duplicating the scanning effort. Ideally we would first run the | ||
# LinkToTermsTransform transform and collect from there the terms that were | ||
# linked, and then pass that information to this transform. | ||
# | ||
# Unfortunately that's not possible, because each "post transform" is | ||
# executed on the process handling that file, and there is no way for those | ||
# processes to synchronize and communicate. | ||
def discover_used_terms(self): | ||
# The key of this dict is the name of the term, while the value is the | ||
# "dependencies" of the term. When the term is referred to by any page | ||
# other than the glossary the dependencies will be None, while if it's | ||
# only referenced by other glossary entries the value will be the names | ||
# of those entries. | ||
used_terms = dict() | ||
|
||
state = State.get(self.env) | ||
for docname in self.env.all_docs.keys(): | ||
doctree = self.env.get_doctree(docname) | ||
for node in lexer.find_lexable_nodes(doctree): | ||
for part in lexer.lexer(node.node.astext(), state.terms): | ||
if type(part) is not MatchedTerm: | ||
continue | ||
name = part.term.name | ||
# Join the list of dependencies, setting None when either | ||
# side is None. | ||
if name not in used_terms: | ||
used_terms[name] = node.inside_definition_of | ||
elif used_terms[name] is not None: | ||
if node.inside_definition_of is None: | ||
used_terms[name] = None | ||
else: | ||
used_terms[name].update(node.inside_definition_of) | ||
|
||
# Keep resolving dependencies until no change is made in the previous | ||
# iteration. This makes sure that term "A" referred by term "B" | ||
# referred by term "C" referred by the rest of the text is kept. | ||
changed = True | ||
while changed: | ||
changed = False | ||
for term, depends_on in dict(used_terms.items()).items(): | ||
if depends_on is None: | ||
continue | ||
for dependency in depends_on: | ||
if dependency in used_terms and used_terms[dependency] is None: | ||
used_terms[term] = None | ||
changed = True | ||
break | ||
|
||
return {term for term, deps in used_terms.items() if deps is None} | ||
|
||
def prune(self, glossary, used_terms): | ||
for item in list(glossary.findall(nodes.definition_list_item)): | ||
for term in item.findall(nodes.term): | ||
if term.astext() in used_terms: | ||
break | ||
else: | ||
item.parent.remove(item) | ||
|
||
|
||
@dataclass | ||
class State: | ||
terms: list[Term] | ||
|
||
def get(env): | ||
key = "ferrocene_autoglossary" | ||
if not hasattr(env, key): | ||
setattr(env, key, State(terms=list())) | ||
return getattr(env, key) | ||
|
||
|
||
def setup(app): | ||
app.add_env_collector(GlossaryCollector) | ||
app.add_post_transform(LinkToTermsTransform) | ||
app.add_post_transform(PruneGlossaryTransform) | ||
|
||
debug.setup(app) | ||
|
||
return { | ||
"version": "0", | ||
"parallel_read_safe": True, | ||
"parallel_write_safe": True, | ||
# The version needs to be updated whenever there is a breaking change | ||
# in the data stored in the environment. Bumping the version number | ||
# will ensure Sphinx will do a clean build. | ||
# | ||
# Version history: | ||
# - 0: initial implementation | ||
"env_version": "0", | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# SPDX-License-Identifier: MIT OR Apache-2.0 | ||
# SPDX-FileCopyrightText: The Ferrocene Developers | ||
|
||
# This module appends to a file all lexed nodes that have at least one match | ||
# within them, if the AUTOGLOSSARY_DEBUG_FILE environment variable is set when | ||
# building the docs (and contains the path to that file). It is helpful when | ||
# making changes to the lexer. | ||
|
||
from . import lexer | ||
from .lexer import MatchedTerm | ||
import os | ||
|
||
|
||
def write_lexer_outcome(app, env): | ||
debug_file = os.environ.get("AUTOGLOSSARY_DEBUG_FILE") | ||
if debug_file is None or app.builder.name == "ferrocene-intersphinx": | ||
return | ||
|
||
# The problem this tries to prevent is that the build system changes the | ||
# current directory before invoking Sphinx, which will result in the file | ||
# not being placed where the user expects with a relative path. | ||
# | ||
# This would be fixable by having the build system resolve the path before | ||
# providing it to the extension, but it doesn't make sense to add that | ||
# complexity for a rarely-used debug feature. | ||
if not os.path.isabs(debug_file): | ||
raise RuntimeError("AUTOGLOSSARY_DEBUG_FILE must be an absolute path") | ||
output = open(debug_file, "a") | ||
|
||
from . import State # Imported here to avoid circular dependencies | ||
|
||
terms = State.get(env).terms | ||
|
||
for docname in sorted(env.found_docs): | ||
doctree = env.get_doctree(docname) | ||
for node in lexer.find_lexable_nodes(doctree): | ||
rendered = render_lexed_node(terms, node.node) | ||
if rendered is None: | ||
continue | ||
rendered = rendered.replace("\n", " ") | ||
output.write(f"{node_location(node.node)}: {rendered}\n") | ||
|
||
|
||
def render_lexed_node(terms, node): | ||
result = "" | ||
has_matches = False | ||
|
||
for token in lexer.lexer(node.astext(), terms): | ||
if type(token) is MatchedTerm: | ||
result += f"[{token.text}]" | ||
has_matches = True | ||
elif type(token) is str: | ||
result += token | ||
else: | ||
raise RuntimeError("invalid token type") | ||
|
||
if has_matches: | ||
return result | ||
|
||
|
||
def node_location(node): | ||
if ( | ||
getattr(node, "source", None) is not None | ||
and getattr(node, "line", None) is not None | ||
): | ||
return f"{node.source}:{node.line}" | ||
elif node.parent is not None: | ||
return node_location(node.parent) | ||
else: | ||
return "<unknown>" | ||
|
||
|
||
def setup(app): | ||
app.connect("env-check-consistency", write_lexer_outcome) |
Oops, something went wrong.