diff --git a/README.md b/README.md index 0712cb8..348e70b 100644 --- a/README.md +++ b/README.md @@ -1,65 +1,102 @@ # spec-parser Automagically process the model of the SPDXv3 specification to validate input -or to generate stuff. +and/or generate stuff. -## Usage +## Functionality -```shell -python3 ./main.py -h -usage: main.py [-h] [-d] [-f] [-n] [-q] [-v] [-V] input_dir [output_dir] +The software always reads and validates the complete model (given as input). + +It then optionally generates one or more of the following outputs: + +1. JSON dump of the parsed model, to load all data without parsing +2. MkDocs files, to be used by mkdocs to generate a website +3. PlantUML file, to be used by plantuml to generate a diagram +4. RDF files (ontology and context), for any need +5. TeX files, to be used by LaTeX to generate a printable version +6. Web page files, to provide information on the RDF URIs + +If no generation is specified in the command line, +the default functionality is to generate everything. + +## Usage -Generate documentation from an SPDX 3.0 model +``` +usage: main.py [-h] [-V] [-d] [-v] [-f] [-n] + [-o OUTPUT] + [-j] [-J dir] + [-m] [-M dir] + [-p] [-P dir] + [-r] [-R dir] + [-t] [-T dir] + [-w] [-W dir] + input_dir + +Generate documentation from an SPDXv3 model. positional arguments: - input_dir Directory containing the input specification files - output_dir Directory to write the output files to + input_dir Path to the input 'model' directory. options: - -h, --help show this help message and exit - -d, --debug Print debug output - -f, --force Overwrite existing generated files - -n, --nooutput Do not generate anything, only check input - -q, --quiet Print no output - -v, --verbose Print verbose output - -V, --version show program's version number and exit -``` + -h, --help Show this help message and exit + -d, --debug Print debug output + -f, --force Force overwrite of existing output directories. + -j, --generate-jsondump Generate a dump of the model in JSON format. + -J, --output-jsondump OUTPUT_JSONDUMP Output directory for JSON dump file. + -m, --generate-mkdocs Generate mkdocs output. + -M, --output-mkdocs OUTPUT_MKDOCS Output directory for mkdocs files. + -n, --no-output Perform no output generation, only input validation. + -o, --output OUTPUT Single output directory for all output types. + -p, --generate-plantuml Generate PlantUML output. + -P, --output-plantuml OUTPUT_PLANTUML Output directory for PlantUML files. + -r, --generate-rdf Generate RDF output. + -R, --output-rdf OUTPUT_RDF Output directory for RDF files. + -t, --generate-tex Generate TeX output. + -T, --output-tex OUTPUT_TEX Output directory for TeX files. + -v, --verbose Print verbose output + -V, --version Show program version number and exit + -w, --generate-webpages Generate web pages output. + -W, --output-webpages OUTPUT_WEBPAGES Output directory for web pages. -Note that not all flags are functional yet. +``` ### Checking input +If no generation is needed and only input validation is required: + ```shell python3 main.py -n some/where/.../model ``` Note that no dependencies are needed. -### Generate output - -```shell -python3 -m pip install -r requirements.txt -python3 main.py some/where/.../model some/where/else/.../output_dir -``` +## Prerequisites -## Current status (mostly complete / in progress) +| **Action** | *Prerequisites* | +|---|---| +| input validation (`-n`/`--no-output`) | None | +| JSON dump generation | [jsonpickle](https://pypi.org/project/jsonpickle/) Python module | +| MkDocs generation | [Jinja2](https://pypi.org/project/Jinja2/) Python module | +| PlantUML generation | None | +| RDF generation | [RDFlib](https://pypi.org/project/rdflib/) Python module | +| TeX generation | [Jinja2](https://pypi.org/project/Jinja2/) Python module and [pandoc](https://pandoc.org/) software | +| Web pages generation | [Jinja2](https://pypi.org/project/Jinja2/) Python module | -- [x] parse everything in model -- [x] generate mkdocs input -- [x] generate JSON dump -- [x] generate diagrams -- [x] generate RDF ontology -- [x] generate JSON-LD context +The software will check for the presence of prerequisites, +according to the calling arguments, +and exit if they are not present. ## Contributing Contributions are always welcome! -Feel free to open issues for any behavior that is (or even simply does not -seem) correct. +Feel free to open issues for any behavior that is not +(or even simply does not seem) correct. -However, due to the pressure for releasing SPDXv3, development is happening in -fast mode, and not always refelcted in this repository. To save everyone -valuable time, if you want to contribute code: clearly indicate in the -corresponding issue your willingness to work on it, and _wait_ for the -assignment of the issue to you. +However, due to the pressure for releasing SPDXv3, +development is happening in fast mode, +and not always refelcted in this repository. +To save everyone valuable time, if you want to contribute code: +clearly indicate in the corresponding issue +your willingness to work on it, +and _wait_ for the assignment of the issue to you. diff --git a/main.py b/main.py index 3420006..0f2798c 100644 --- a/main.py +++ b/main.py @@ -6,8 +6,8 @@ from spec_parser import Model if __name__ == "__main__": - cfg = RunParams() + cfg = RunParams("spec-parser") - m = Model(cfg.input_dir) - if not cfg.opt_nooutput: - m.gen_all(cfg.output_dir, cfg) + m = Model(cfg.input_path) + if not cfg.no_output: + m.generate(cfg) diff --git a/ruff.toml b/ruff.toml index 09b006a..4ac019e 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,30 +1,34 @@ fix = false indent-width = 4 -line-length = 140 +line-length = 145 output-format = "full" # output-format = "grouped" preview = false -required-version = ">=0.4" +required-version = ">=0.9" show-fixes = true [format] docstring-code-format = false indent-style = "space" line-ending = "native" +preview = false quote-style = "double" skip-magic-trailing-comma = false [lint] dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" fixable = ["ALL"] -ignore = [ "C408", "C901", "COM812", "G004", "ISC001", "RET505", "S101", ] -ignore-init-module-imports = true +ignore = [ "C408", "C901", "COM812", "EM102", "G004", "ISC001", "RET505", "S101", ] +logger-objects = ["cfg.log"] +preview = false +# anything but "AIR", "ANN", "D", "NPY", "PD", select = [ -"A", "AIR", "ARG", "ASYNC", "B", "C", "BLE", "C4", "COM", "DJ", "DTZ", -"EM", "ERA", "E", "W", "EXE", "F", "FA", "FBT", "FIX", "FLY", "G", "I", -"ICN", "INP", "INT", "ISC", "LOG", "N", "NPY", "PD", "PERF", "PGH", "PIE", -"PL", "PT", "PTH", "PYI", "Q", "R", "RET", "RSE", "RUF", "S", "SIM", -"SLF", "SLOT", "T10", "T20", "TCH", "TD", "TID", "TRIO", "TRY", "UP", "YTT", +"A", "ARG", "ASYNC", "ASYNC1", "B", "BLE", "C", "C4", "C90", "COM", "CPY", +"DJ", "DOC", "DTZ", "E", "EM", "ERA", "EXE", "F", "FA", "FAST", "FBT", "FIX", +"FLY", "FURB", "G", "I", "ICN", "INP", "INT", "ISC", "LOG", "N", "PERF", +"PGH", "PIE", "PL", "PT", "PTH", "PYI", "Q", "R", "RET", "RSE", "RUF", "S", +"SIM", "SLF", "SLOT", "T10", "T20", "TC", "TD", "TID", "TRY", "UP", "W", "YTT", + ] unfixable = [] @@ -42,7 +46,14 @@ unfixable = [] "PLR0915", # too many statements, for gen_rdf_ontology ] +[lint.isort] +case-sensitive = true +force-sort-within-sections = false +from-first = false +lines-between-types = 0 +split-on-trailing-comma = false + [lint.pylint] max-branches = 35 -max-statements = 55 +max-statements = 65 diff --git a/runparams.py b/runparams.py index aff64b1..23ef043 100644 --- a/runparams.py +++ b/runparams.py @@ -3,87 +3,148 @@ # SPDX-License-Identifier: Apache-2.0 import argparse +import importlib.util import logging +import shutil import sys from datetime import datetime, timezone +from pathlib import Path +from types import SimpleNamespace -class RunParams: - def __init__(self): +class RunParams(SimpleNamespace): + def __init__(self, name): self._ts = datetime.now(timezone.utc) - self.process_args() + self.log = logging.getLogger(name) + opt_force = self.process_args() + self.check_requirements() + if logging.ERROR in self.log._cache: + sys.exit(1) + self.create_output_dirs(opt_force) @property def autogen_header(self): return f"Automatically generated by spec-parser v{self.parser_version} on {self._ts.isoformat()}" - @property - def input_dir(self): - return self.args.input_dir - - @property - def output_dir(self): - return self.args.output_dir - - @property - def opt_debug(self): - return self.args.debug - - @property - def opt_force(self): - return self.args.force - - @property - def opt_nooutput(self): - return self.args.nooutput - - @property - def opt_quiet(self): - return self.args.quiet - - @property - def opt_verbose(self): - return self.args.verbose - @property def parser_version(self): return sys.modules["spec_parser"].__version__ @property def all_as_dict(self): - return { - k: getattr(self, k) - for k in ( - "autogen_header", - "input_dir", - "output_dir", - "opt_debug", - "opt_force", - "opt_quiet", - "opt_verbose", - "parser_version", - ) - } - - # Additional future improvements: - # - add more parameters, specified as command-line arguments - # - separate output dirs for mkdocs / RDF /JSON-LD / ... - # - maybe flags whether something might not be generated? - # - etc. - - def process_args(self, args=sys.argv[1:]): - parser = argparse.ArgumentParser(description="Generate documentation from an SPDX 3.0 model") - parser.add_argument("input_dir", help="Directory containing the input specification files") - parser.add_argument("output_dir", nargs="?", help="Directory to write the output files to") + return {k: getattr(self, k) for k in ("autogen_header",)} + + def check_requirements(self): + def check_import_module(module_name, condition): + if importlib.util.find_spec(module_name) is None: + self.log.error(f"Python module '{module_name}' is required when {condition} is specified. Make sure it's installed.") + + def check_external_program(program_name, condition): + if shutil.which(program_name) is None: + self.log.error( + f"Program '{program_name}' is required when {condition} is specified. Make sure it's installed and present in your PATH." + ) + + if self.generate_jsondump: + check_import_module("jsonpickle", "JSON dump generation") + if self.generate_mkdocs: + check_import_module("jinja2", "MkDocs generation") + if self.generate_rdf: + check_import_module("rdflib", "RDF generation") + if self.generate_tex: + check_external_program("pandoc", "TeX generation") + check_import_module("jinja2", "TeX generation") + + def process_args(self, opts=sys.argv[1:]): + def check_input_path(p): + if not p.exists(): + raise argparse.ArgumentTypeError(f"Input directory '{p}' does not exist.") + if not p.is_dir(): + raise argparse.ArgumentTypeError(f"Input path '{p}' is not a directory.") + if p.name != "model": + raise argparse.ArgumentTypeError(f"Input directory '{p}' must be named 'model'.") + + parser = argparse.ArgumentParser(description="Generate documentation from an SPDXv3 model.") + + parser.add_argument("input_dir", type=str, help="Path to the input 'model' directory.") + parser.add_argument("-d", "--debug", action="store_true", help="Print debug output") - parser.add_argument("-f", "--force", action="store_true", help="Overwrite existing generated files") - parser.add_argument("-n", "--nooutput", action="store_true", help="Do not generate anything, only check input") - parser.add_argument("-q", "--quiet", action="store_true", help="Print no output") + parser.add_argument("-f", "--force", action="store_true", help="Force overwrite of existing output directories.") + parser.add_argument("-j", "--generate-jsondump", action="store_true", help="Generate a dump of the model in JSON format.") + parser.add_argument("-J", "--output-jsondump", type=str, help="Output directory for JSON dump file.") + parser.add_argument("-m", "--generate-mkdocs", action="store_true", help="Generate mkdocs output.") + parser.add_argument("-M", "--output-mkdocs", type=str, help="Output directory for mkdocs files.") + parser.add_argument("-n", "--no-output", action="store_true", help="Perform no output generation, only input validation.") + parser.add_argument("-o", "--output", type=str, help="Single output directory for all output types.") + parser.add_argument("-p", "--generate-plantuml", action="store_true", help="Generate PlantUML output.") + parser.add_argument("-P", "--output-plantuml", type=str, help="Output directory for PlantUML files.") + parser.add_argument("-r", "--generate-rdf", action="store_true", help="Generate RDF output.") + parser.add_argument("-R", "--output-rdf", type=str, help="Output directory for RDF files.") + parser.add_argument("-t", "--generate-tex", action="store_true", help="Generate TeX output.") + parser.add_argument("-T", "--output-tex", type=str, help="Output directory for TeX files.") parser.add_argument("-v", "--verbose", action="store_true", help="Print verbose output") - parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {RunParams.parser_version}") - self.args = parser.parse_args(args) + parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {self.parser_version}") + parser.add_argument("-w", "--generate-webpages", action="store_true", help="Generate web pages output.") + parser.add_argument("-W", "--output-webpages", type=str, help="Output directory for web pages.") + + opts = parser.parse_args() + gen_list = ["jsondump", "mkdocs", "plantuml", "rdf", "tex", "webpages"] + desc_list = ["JSON dump", "MkDocs", "PlantUML", "RDF", "TeX", "Web pages"] + + if opts.verbose: + self.log.basicConfig(level=logging.INFO) + if opts.debug: + self.log.basicConfig(level=logging.DEBUG) + + self.input_path = Path(opts.input_dir) + check_input_path(self.input_path) + + if opts.no_output: + self.no_output = True + if any(getattr(opts, "generate_" + g) for g in gen_list): + self.log.warning("Incompatible flag combination: -n/--no-output overwrites any generation") + for g in gen_list: + setattr(self, "generate_" + g, False) + else: + self.no_output = False + if not any(getattr(opts, "generate_" + g) for g in gen_list): + for g in gen_list: + setattr(self, "generate_" + g, True) + else: + for g in gen_list: + setattr(self, "generate_" + g, getattr(opts, "generate_" + g)) + + if opts.output: + self.output_path = Path(opts.output) + if self.output_path.exists() and not opts.force: + self.log.error("Output directory '{self.output_path}' already exists (use -f/--force to overwrite).") + + for desc, g in zip(desc_list, gen_list): + genflag = "generate_" + g + if getattr(self, genflag, False): + outdir = "output_" + g + outpath = outdir + "_path" + if d := getattr(opts, outdir, None): + setattr(self, outpath, Path(d)) + elif p := getattr(self, "output_path", None): + setattr(self, outpath, p / g) + else: + self.log.error(f"{desc} was specified, but no output directory.") + if p := getattr(self, outpath, None): + if p.exists() and not opts.force: + self.log.error(f"Output directory '{p}' already exists (use -f/--force to overwrite).") + + return opts.force + + + def create_output_dirs(self, force): + gen_list = ["jsondump", "mkdocs", "plantuml", "rdf", "tex", "webpages"] + for g in gen_list: + genflag = "generate_" + g + if getattr(self, genflag, False): + outpath = "output_" + g + "_path" + p = getattr(self, outpath) + if force and p.exists(): + shutil.rmtree(p) + p.mkdir(parents=True) - if self.opt_nooutput and self.output_dir: - logging.warning(f"Ignoring output directory {self.output_dir} specified with --nooutput") - if not self.opt_nooutput and not self.output_dir: - logging.critical("No output directory specified!") diff --git a/spec_parser/__init__.py b/spec_parser/__init__.py index 21b4575..207e212 100644 --- a/spec_parser/__init__.py +++ b/spec_parser/__init__.py @@ -2,6 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 -__version__ = "2.5.0" +__version__ = "3.0.1" from .model import Model diff --git a/spec_parser/jsondump.py b/spec_parser/jsondump.py index a56157c..34199dd 100644 --- a/spec_parser/jsondump.py +++ b/spec_parser/jsondump.py @@ -2,14 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 -from pathlib import Path - import jsonpickle - -def gen_jsondump(model, outdir, cfg): - p = Path(outdir) / "jsondump" - p.mkdir() - - f = p / "model.json" +def gen_jsondump(model, outpath, cfg): + f = outpath / "model.json" f.write_text(jsonpickle.encode(model, indent=2, warn=True)) diff --git a/spec_parser/mdparsing.py b/spec_parser/mdparsing.py index 4b41615..bfa1927 100644 --- a/spec_parser/mdparsing.py +++ b/spec_parser/mdparsing.py @@ -5,6 +5,7 @@ import logging import re +logger = logging.getLogger(__name__) class SpecFile: RE_SPLIT_TO_SECTIONS = re.compile(r"\n(?=(?:\Z|# |## ))") @@ -19,20 +20,20 @@ def __init__(self, fpath=None): self.load(fpath) def load(self, fpath): - logging.debug(f"### loading {fpath.parent}/{fpath.name}") + logger.debug(f"### loading {fpath.parent}/{fpath.name}") filecontent = fpath.read_text(encoding="utf-8") parts = re.split(self.RE_SPLIT_TO_SECTIONS, filecontent) m = re.fullmatch(self.RE_EXTRACT_LICENSE, parts[0]) if m is None: - logging.error(f"File {fpath!s} does not start with license.") + logger.error(f"File {fpath!s} does not start with license.") else: self.license = m.group(1) m = re.fullmatch(self.RE_EXTRACT_NAME, parts[1]) if m is None: - logging.error(f"File {fpath!s} does not have name after license.") + logger.error(f"File {fpath!s} does not have name after license.") else: self.name = m.group(1) @@ -65,7 +66,7 @@ def load(self, content): for l in content.splitlines(): m = re.fullmatch(self.RE_EXTRACT_KEY_VALUE, l) if m is None: - logging.error(f"Single list parsing error in line `{l}'") + logger.error(f"Single list parsing error in line `{l}'") else: key = m.group(1) val = m.group(2).strip() @@ -83,19 +84,15 @@ def load(self, content): if l.startswith("-"): m = re.fullmatch(self.RE_EXTRACT_TOP_LEVEL, l) if m is None: - logging.error(f"Top-level nested list parsing error in line `{l}'") + logger.error(f"Top-level nested list parsing error in line `{l}'") else: item = m.group(1) self.ikv[item] = dict() else: m = re.fullmatch(self.RE_EXTRACT_KEY_VALUE, l) if m is None: - logging.error(f"Nested list parsing error in line `{l}'") + logger.error(f"Nested list parsing error in line `{l}'") else: key = m.group(1) val = m.group(2).strip() self.ikv[item][key] = val - - -if __name__ == "__main__": - fn = "/home/zvr/github/spdx/spdx-3-model/model/Core/Classes/Element.md" diff --git a/spec_parser/mkdocs.py b/spec_parser/mkdocs.py index dd09238..c7c2516 100644 --- a/spec_parser/mkdocs.py +++ b/spec_parser/mkdocs.py @@ -2,12 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 -from pathlib import Path - from jinja2 import Environment, PackageLoader, select_autoescape -def gen_mkdocs(model, outdir, cfg): +def gen_mkdocs(model, outpath, cfg): jinja = Environment( loader=PackageLoader("spec_parser", package_path="templates/mkdocs"), autoescape=select_autoescape(), @@ -18,12 +16,10 @@ def gen_mkdocs(model, outdir, cfg): jinja.globals["class_link"] = class_link jinja.globals["property_link"] = property_link jinja.globals["ext_property_link"] = ext_property_link - jinja.globals["type_link"] = lambda x, showshort=False: type_link(x, model, showshort) + jinja.globals["type_link"] = lambda x, showshort=False: type_link(x, model, showshort=showshort) jinja.globals["not_none"] = lambda x: str(x) if x is not None else "" - op = Path(outdir) - p = op / "mkdocs" - p.mkdir() + p = outpath for ns in model.namespaces: d = p / ns.name @@ -56,8 +52,7 @@ def _gen_filelist(nsname, itemslist, heading): nameslist = [c.name for c in itemslist.values()] if nameslist: ret.append(f" - {heading}:") - for n in sorted(nameslist): - ret.append(f" - '{n}': model/{nsname}/{heading}/{n}.md") + ret.extend(f" - '{n}': model/{nsname}/{heading}/{n}.md" for n in sorted(nameslist)) return ret files = dict() @@ -90,7 +85,7 @@ def _gen_filelist(nsname, itemslist, heading): ]: filelines.extend(files[nsname]) - fn = op / "model-files.yml" + fn = outpath / "model-files.yml" fn.write_text("\n".join(filelines)) @@ -102,7 +97,7 @@ def class_link(name): return f"[{name}](../Classes/{name}.md)" -def property_link(name, showshort=False): +def property_link(name, *, showshort=False): if name.startswith("/"): _, other_ns, name = name.split("/") showname = name if showshort else f"/{other_ns}/{name}" @@ -119,7 +114,7 @@ def ext_property_link(name): return ret -def type_link(name, model, showshort=False): +def type_link(name, model, *, showshort=False): if name.startswith("/"): dirname = "Classes" if name in model.vocabularies: diff --git a/spec_parser/model.py b/spec_parser/model.py index 885796c..43436e4 100644 --- a/spec_parser/model.py +++ b/spec_parser/model.py @@ -4,7 +4,6 @@ import logging from copy import deepcopy -from pathlib import Path from .mdparsing import ( ContentSection, @@ -13,9 +12,10 @@ SpecFile, ) +logger = logging.getLogger(__name__) class Model: - def __init__(self, indir=None): + def __init__(self, inpath=None): self.name = None self.namespaces = [] self.classes = dict() @@ -24,21 +24,16 @@ def __init__(self, indir=None): self.individuals = dict() self.datatypes = dict() - if indir is not None: - self.load(indir) + if inpath is not None: + self.load(inpath) - def load(self, indir): - self.toplevel = p = Path(indir) - if not p.is_dir(): - logging.error(f"{indir}: not a directory") - return - if p.name != "model": - logging.warning(f'{indir}: input not named "model"') + def load(self, inpath): + p = inpath for d in [d for d in p.iterdir() if d.is_dir() and d.name[0].isupper()]: nsp = p / d.name / f"{d.name}.md" if not nsp.is_file(): - logging.error(f"Missing top-level namespace file {nsp.name}") + logger.error(f"Missing top-level namespace file {nsp.name}") continue ns = Namespace(nsp) @@ -84,7 +79,7 @@ def load(self, indir): self.datatypes[k] = n ns.datatypes[k] = n - logging.info( + logger.info( f"Loaded {len(self.namespaces)} namespaces, {len(self.classes)} classes, " f"{len(self.properties)} properties, {len(self.vocabularies)} vocabularies, " f"{len(self.individuals)} individuals, {len(self.datatypes)} datatypes", @@ -93,7 +88,7 @@ def load(self, indir): def process_after_load(self): self.types = self.classes | self.vocabularies | self.datatypes - logging.info(f"Total {len(self.types)} types") + logger.info(f"Total {len(self.types)} types") # add used_in information to properties for c in self.classes.values(): @@ -103,7 +98,7 @@ def process_after_load(self): proptype = self.properties[pname].metadata["Range"] ptype = pkv["type"] if proptype != ptype and (not p.startswith("/") or proptype.rpartition("/")[-1] != ptype.rpartition("/")[-1]): - logging.error(f"In class {c.fqname}, property {p} has type {ptype} but the range of {pname} is {proptype}") + logger.error(f"In class {c.fqname}, property {p} has type {ptype} but the range of {pname} is {proptype}") self.properties[pname].used_in.append(c.fqname) # add class inheritance stack @@ -159,27 +154,28 @@ def _tsort_recursive(inh, cn, visited, stack): assert c.all_properties[shortname]["fullname"] == f"/{pns}/{shortname}" for k, v in pkv.items(): if c.all_properties[shortname][k] == v: - logging.warning(f"In class {c.fqname} property {p} has same {k} as the parent class") + logger.warning(f"In class {c.fqname} property {p} has same {k} as the parent class") c.all_properties[shortname][k] = v - def gen_all(self, outdir, cfg): - from .jsondump import gen_jsondump - from .mkdocs import gen_mkdocs - from .plantuml import gen_plantuml - from .rdf import gen_rdf - from .tex import gen_tex - - p = Path(outdir) - if p.exists() and not cfg.opt_force: - logging.error(f"Destination for mkdocs {outdir} already exists, will not overwrite") - return - p.mkdir(parents=True) - - gen_mkdocs(self, outdir, cfg) - gen_rdf(self, outdir, cfg) - gen_tex(self, outdir, cfg) - gen_plantuml(self, outdir, cfg) - gen_jsondump(self, outdir, cfg) + def generate(self, cfg): + if cfg.generate_jsondump: + from .jsondump import gen_jsondump + gen_jsondump(self, cfg.output_jsondump_path, cfg) + if cfg.generate_mkdocs: + from .mkdocs import gen_mkdocs + gen_mkdocs(self, cfg.output_mkdocs_path, cfg) + if cfg.generate_plantuml: + from .plantuml import gen_plantuml + gen_plantuml(self, cfg.output_plantuml_path, cfg) + if cfg.generate_rdf: + from .rdf import gen_rdf + gen_rdf(self, cfg.output_rdf_path, cfg) + if cfg.generate_tex: + from .tex import gen_tex + gen_tex(self, cfg.output_tex_path, cfg) + if cfg.generate_webpages: + from .webpages import gen_webpages + gen_webpages(self, cfg.output_webpages_path, cfg) class Namespace: diff --git a/spec_parser/plantuml.py b/spec_parser/plantuml.py index 79bfb3c..5ed33cc 100644 --- a/spec_parser/plantuml.py +++ b/spec_parser/plantuml.py @@ -2,14 +2,10 @@ # SPDX-License-Identifier: Apache-2.0 -from pathlib import Path +def gen_plantuml(model, outpath, cfg): -def gen_plantuml(model, outdir, cfg): - p = Path(outdir) / "diagram" - p.mkdir(exist_ok=True) - - f = p / "model.plantuml" + f = outpath / "model.plantuml" s = f""" @startuml diff --git a/spec_parser/rdf.py b/spec_parser/rdf.py index 794be5c..56ee673 100644 --- a/spec_parser/rdf.py +++ b/spec_parser/rdf.py @@ -4,7 +4,6 @@ import json import logging -from pathlib import Path from rdflib import ( BNode, @@ -19,10 +18,10 @@ URI_BASE = "https://spdx.org/rdf/3.0.1/terms/" +logger = logging.getLogger(__name__) -def gen_rdf(model, outdir, cfg): - p = Path(outdir) / "rdf" - p.mkdir() +def gen_rdf(model, outpath, cfg): + p = outpath ret = gen_rdf_ontology(model) for ext in ["hext", "json-ld", "longturtle", "n3", "nt", "pretty-xml", "trig", "ttl", "xml"]: @@ -34,8 +33,6 @@ def gen_rdf(model, outdir, cfg): with fn.open("w") as f: json.dump(ctx, f, sort_keys=True, indent=2) - p = Path(outdir) / "diagram" - p.mkdir(exist_ok=True) fn = p / "spdx-model.dot" with fn.open("w") as f: rdf2dot(ret, f) @@ -45,7 +42,7 @@ def xsd_range(rng, propname): if rng.startswith("xsd:"): return URIRef("http://www.w3.org/2001/XMLSchema#" + rng[4:]) - logging.warning(f"Uknown namespace in range <{rng}> of property {propname}") + logger.warning(f"Uknown namespace in range <{rng}> of property {propname}") return None @@ -102,7 +99,6 @@ def gen_rdf_classes(model, g): g.add((bnode, SH.path, RDF.type)) notNode = BNode() g.add((bnode, SH["not"], notNode)) - hasValueNode = BNode() g.add((notNode, SH["hasValue"], node)) msg = Literal( f"{node} is an abstract class and should not be instantiated directly. Instantiate a subclass instead.", @@ -292,7 +288,7 @@ def get_subject_term(subject): if key in terms: current = terms[key]["@id"] if isinstance(terms[key], dict) else terms[key] - logging.error(f"ERROR: Duplicate context key '{key}' for '{subject}'. Already mapped to '{current}'") + logger.error(f"ERROR: Duplicate context key '{key}' for '{subject}'. Already mapped to '{current}'") continue terms[key] = get_subject_term(subject) diff --git a/spec_parser/templates/tex/vocabulary.tex.j2 b/spec_parser/templates/tex/vocabulary.tex.j2 index d7e6f92..c3b42a4 100644 --- a/spec_parser/templates/tex/vocabulary.tex.j2 +++ b/spec_parser/templates/tex/vocabulary.tex.j2 @@ -11,7 +11,7 @@ \spdxpagepart{Entries} \begin{description} {% for name, val in entries | dictsort %} -\item[ {{-to_tex(name)-}} ] +\item[ {{-tex_escape(name)-}} ] {{-markdown_to_tex(val)-}} {% endfor %} \end{description} diff --git a/spec_parser/tex.py b/spec_parser/tex.py index e0772a7..b2cb207 100644 --- a/spec_parser/tex.py +++ b/spec_parser/tex.py @@ -2,13 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 -import re -from pathlib import Path +import subprocess from jinja2 import Environment, PackageLoader, select_autoescape -def gen_tex(model, outdir, cfg): +def gen_tex(model, outpath, cfg): jinja = Environment( loader=PackageLoader("spec_parser", package_path="templates/tex"), autoescape=select_autoescape(), @@ -17,12 +16,10 @@ def gen_tex(model, outdir, cfg): ) jinja.globals = cfg.all_as_dict jinja.globals["not_none"] = lambda x: str(x) if x is not None else "" - jinja.globals["to_tex"] = to_tex + jinja.globals["tex_escape"] = tex_escape jinja.globals["markdown_to_tex"] = markdown_to_tex - op = Path(outdir) - p = op / "tex" - p.mkdir() + p = outpath for ns in model.namespaces: d = p / ns.name @@ -55,8 +52,7 @@ def _gen_filelist(nsname, itemslist, heading): nameslist = [c.name for c in itemslist.values()] if nameslist: ret.append(f"\\spdxcategory{{{heading}}}") - for n in sorted(nameslist): - ret.append(f"\\input{{model/{nsname}/{heading}/{n}}}") + ret.extend(f"\\input{{model/{nsname}/{heading}/{n}}}" for n in sorted(nameslist)) return ret files = dict() @@ -88,46 +84,23 @@ def _gen_filelist(nsname, itemslist, heading): ]: filelines.extend(files[nsname]) - fn = op / "model-files.tex" + fn = p / "model-files.tex" fn.write_text("\n".join(filelines)) -def to_tex(s): +def tex_escape(s): s = s.replace("\\", "\\textbackslash{}") s = s.replace("_", "\\_") s = s.replace("&", "\\&") s = s.replace("#", "\\#") s = s.replace("^", "\\^") s = s.replace("$", "\\$") - # s = s.replace("\\", "\\textbackslash{}") - # s = s.replace("<", "$<$") - # s = s.replace(">", "$>$") - # s = s.replace("{", "\\{") - # s = s.replace("}", "\\}") - # s = s.replace("~", "\\textasciitilde{}") return s -LINK_REGEXP = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)") -BOLD_REGEXP = re.compile(r"\*\*([^\*]+)\*\*") -ITALIC_REGEXP = re.compile(r"\*([^\*]+)\*") -PREFORMATTED_REGEXP = re.compile(r"\`([^\`]+)\`") -PREFORMATTED_LINES_REGEXP = re.compile(r"\`\`\`([^\`]+)\`\`\`") - - -def foo(description): - description = re.sub(r"\*\*(.+?)\*\*", r"\\textbf{\1}", description) - description = re.sub(r"\*(.+?)\*", r"\\textit{\1}", description) - description = re.sub(r"`(.+?)`", r"\\texttt{\1}", description) - description = re.sub(r"\[(.*?)\]\((.*?)\)", r"\\href{\2}{\1}", description) - - -import subprocess - - def markdown_to_tex(s): # Call pandoc to convert from Markdown to TeX process = subprocess.run( - ["pandoc", "-f", "markdown", "-t", "latex"], input=s.encode("utf-8"), stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False + ["pandoc", "-f", "markdown", "-t", "latex"], input=s.encode("utf-8"), capture_output=True, check=False ) return process.stdout.decode("utf-8") diff --git a/spec_parser/webpages.py b/spec_parser/webpages.py new file mode 100644 index 0000000..1015acd --- /dev/null +++ b/spec_parser/webpages.py @@ -0,0 +1,7 @@ +# saving the model as web pages + +# SPDX-License-Identifier: Apache-2.0 + + +def gen_webpages(model, outpath, cfg): + pass