diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ab56651..217c7fc 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -25,7 +25,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9] os: [ubuntu-latest, windows-latest] steps: @@ -48,6 +48,7 @@ jobs: pytest --cov=rst_to_myst --cov-report=xml --cov-report=term-missing - name: Upload to Codecov + if: matrix.os == 'ubuntu-latest' uses: codecov/codecov-action@v1 with: name: pytests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5527938..09cbae5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: end-of-file-fixer - id: mixed-line-ending @@ -12,19 +12,19 @@ repos: - id: check-yaml - id: check-toml - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.7.0 + rev: v1.9.0 hooks: - id: python-check-blanket-noqa - repo: https://github.com/timothycrosley/isort - rev: 5.6.4 + rev: 5.8.0 hooks: - id: isort - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 21.6b0 hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.2 hooks: - id: flake8 additional_dependencies: diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 0000000..1863741 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,13 @@ +version: 2 + +python: + version: 3 + install: + - method: pip + path: . + extra_requirements: + - docs + +sphinx: + builder: html + fail_on_warning: true diff --git a/README.md b/README.md index c6c4af2..255651a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# rst-to-myst [UNDER-DEVELOPMENT] +# rst-to-myst [![Build Status][ci-badge]][ci-link] [![codecov.io][cov-badge]][cov-link] @@ -21,179 +21,34 @@ or with sphinx: pip install rst-to-myst[sphinx] ``` -## Basic Usage - -### Command-Line Interface (CLI) - -For all commands see: - -```bash -rst2myst --help -``` - -Parse *via* stdin: - -```console -$ echo ":role:`content`" | rst2myst parse -{role}`content` -``` - -Parse *via* file: - -```console -$ rst2myst parse -f path/to/file.rst -... -``` - -Warnings are written to `stderr` and converted text to `stdout`. - -List available directives/roles: - -```console -$ rst2myst directives list -acks admonition ... - -$ rst2myst roles list -abbr abbreviation ... -``` - -Show details of a specific directive/role: - -```console -$ rst2myst directives show admonition -class: docutils.parsers.rst.directives.admonitions.Admonition -description: '' -has_content: true -name: admonition -optional_arguments: 0 -options: - class: class_option - name: unchanged -required_arguments: 1 - -$ rst2myst roles show abbreviation -description: |- - Generic interpreted text role, where the interpreted text is simply - wrapped with the provided node class. -module: docutils.parsers.rst.roles -name: abbreviation -``` - -### Python Interface (API) - -```python -from rst_to_myst import convert - -text, stderr_stream = convert(""" -Some RST -======== - -To **convert** -""") -``` - -## Advanced Usage - -You can select a language to translate directive/role names: +To then run a basic conversion of a whole project: ```console -$ rst2myst parse -l fr -f path/to/file.rst -... +$ rst2myst convert docs/**/*.rst ``` -You can select whether sphinx directives/roles are loaded: +For greater control, you can pass configuration with CLI options, or via a YAML configuration file: ```console -$ rst2myst parse --no-sphinx -f path/to/file.rst -... +$ rst2myst convert --config config.yaml docs/**/*.rst ``` -You can load directives/roles from extensions: - -```console -$ rst2myst parse -e sphinx.ext.autodoc -e sphinx_panels -f path/to/file.rst -... +`config.yaml`: + +```yaml +language: en +sphinx: true +extensions: +- sphinx_panels +default_domain: py +consecutive_numbering: true +colon_fences: true +dollar_math: true +conversions: + sphinx_panels.dropdpwn.DropdownDirective: parse_all ``` -Directives are converted according to [rst_to_myst/data/directives.yml](rst_to_myst/data/directives.yml), which can also be updated with an external YAML file, using the `-c/--conversions` option. -This is a mapping of directive import paths to a conversion type: - -- "eval_rst" (the default): no conversion, wrap in MyST eval_rst directive - ```` - ```{eval_rst} - .. name:: argument `link`_ - :option: value - - content `link`_ - ``` - ```` -- "direct": convert directly to MyST directive, keeping original argument/content - ```` - ```{name} argument `link`_ - :option: value - - content `link`_ - ``` - ```` -- "argument_only": convert to MyST directive and convert the argument to Markdown - ```` - ```{name} argument [link](link) - :option: value - - content `link`_ - ``` - ```` -- "content_only": convert to MyST directive and convert the content to Markdown - ```` - ```{name} argument `link`_ - :option: value - - content [link](link) - ``` - ```` -- "argument_content": convert to MyST directive and convert the content to Markdown - ```` - ```{name} argument [link](link) - :option: value - - content [link](link) - ``` - ```` - -If a conversion type is prepended by "_colon", use `:::` delimiters instad of ```` ``` ````, -e.g. "argument_content_colon" - -```` -:::{name} argument [link](link) -:option: value - -content [link](link) -::: -```` - -## Conversion Notes - -The conversion is designed to be fault tolerant, -i.e. it will not check if referenced targets, roles, directives, etc exist nor fail if they do not. - -The only syntax where some checks are required is matching anonymous references and auto-number/symbol footnotes with their definitions; these definitions must be available. - -- enumerated lists with roman numerals or alphabetic prefixes will be converted to numbers -- only one kind of footnote (i.e. no symbol prefixes) -- citation are turned into footnotes, with label prepended by `cite_prefix` -- inline targets are not convertible (and so ignored) -- If tables are not compatible with Markdown (single header row, no merged cells, etc), then they will be wrapped in an `eval_rst` -- Markdown blockquotes do not have an attribution syntax, so it is converted instead to `

—text

` (the standard HTML render) - -## TODO - -The conversion covers almost all syntaxes (see ) except: - -- line blocks -- field lists (except at top of document, which are converted to front matter) -- option lists - -Also custom functions for directive parsing would be desirable. +See the documentation for more information. ## Development @@ -230,6 +85,15 @@ or trigger the GitHub Action job, by creating a release with a tag equal to the Note, this requires generating an API key on PyPi and adding it to the repository `Settings/Secrets`, under the name `PYPI_KEY`. +## TODO + +The conversion covers almost all syntaxes (see ) except: + +- line blocks +- option lists + +Also custom functions for directive parsing would be desirable. + [ci-badge]: https://github.com/executablebooks/rst-to-myst/workflows/CI/badge.svg?branch=main [ci-link]: https://github.com/executablebooks/rst-to-myst/actions?query=workflow%3ACI+branch%3Amain+event%3Apush [cov-badge]: https://codecov.io/gh/executablebooks/rst-to-myst/branch/main/graph/badge.svg diff --git a/docs/source/api.rst b/docs/source/api.rst new file mode 100644 index 0000000..c4a68dd --- /dev/null +++ b/docs/source/api.rst @@ -0,0 +1,29 @@ +Python API +========== + +Text to docutils AST +-------------------- + +.. autofunction:: rst_to_myst.parser.to_docutils_ast + +docutils AST to Markdown-It Tokens +----------------------------------- + +.. autoclass:: rst_to_myst.markdownit.RenderOutput + :members: + +.. autoclass:: rst_to_myst.markdownit.MarkdownItRenderer + :members: + +Markdown-It Tokens to Text +-------------------------- + +.. autofunction:: rst_to_myst.mdformat_render.from_tokens + +Full Conversion +--------------- + +.. autoclass:: rst_to_myst.mdformat_render.ConvertedOutput + :members: + +.. autofunction:: rst_to_myst.mdformat_render.rst_to_myst diff --git a/docs/source/cli.rst b/docs/source/cli.rst new file mode 100644 index 0000000..e831b41 --- /dev/null +++ b/docs/source/cli.rst @@ -0,0 +1,6 @@ +CLI Commands +============ + +.. click:: rst_to_myst.cli:main + :prog: rst2myst + :nested: full diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..6cf8488 --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,46 @@ +"""Configuration for Sphinx documentation build. + +It is recommended to use tox to run the build (see tox.ini): +`tox -e docs-clean` and `tox -e docs-update`, +or directly: `sphinx-build -n -W --keep-going docs/source docs/_build` +""" +from rst_to_myst import __version__ + +project = "RST-to-MyST" +copyright = "2021, Executable Book Project" # noqa: A001 +author = "Executable Book Project" +version = __version__ + +extensions = [ + # read Markdown files + "myst_parser", + "sphinx_panels", + # document CLI + "sphinx_click", + # document API + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.viewcode", +] + +html_theme = "sphinx_book_theme" +html_title = f"RST-to-MyST: v{__version__}" +html_theme_options = { + "home_page_in_toc": True, + "github_url": "https://github.com/executablebooks/rst-to-myst", + "repository_url": "https://github.com/executablebooks/rst-to-myst", + "use_issues_button": True, + "use_repository_button": True, + "repository_branch": "main", + "path_to_docs": "docs", +} + +intersphinx_mapping = { + "python": ("https://docs.python.org/3.8", None), + "sphinx": ("https://www.sphinx-doc.org/en/master", None), + "markdown_it": ("https://markdown-it-py.readthedocs.io/en/latest", None), +} + +nitpick_ignore = [ + ("py:class", name) for name in ["IO", "_io.StringIO", "docutils.nodes.document"] +] diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 0000000..389a813 --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,58 @@ +# RST-to-MyST + +A tool for converting [ReStructuredText](https://docutils.sourceforge.io/) to [MyST Markdown](https://myst-parser.readthedocs.io/). + +## Getting Started + +To install from PyPI: + +```shell +pip install "rst-to-myst[sphinx]" +``` + +It is recommended to install into an isolated environment. +One way to do this is using [pipx](https://pypa.github.io/pipx/): + +```console +$ pipx install "rst-to-myst[sphinx]" +$ pipx list +venvs are in /Users/username/.local/pipx/venvs +apps are exposed on your $PATH at /Users/username/.local/bin + package rst-to-myst 0.1.2, Python 3.7.3 + - rst2myst +``` + +To then run a basic conversion of a whole project: + +```console +$ rst2myst convert docs/**/*.rst +``` + +For greater control, you can pass configuration with CLI options, or via a YAML configuration file: + +```console +$ rst2myst convert --config config.yaml docs/**/*.rst +``` + +`config.yaml`: + +```yaml +language: en +sphinx: true +extensions: +- sphinx_panels +default_domain: py +consecutive_numbering: true +colon_fences: true +dollar_math: true +conversions: + sphinx_panels.dropdpwn.DropdownDirective: parse_all +``` + +```{toctree} +:hidden: + +usage +cli +api +``` diff --git a/docs/source/usage.md b/docs/source/usage.md new file mode 100644 index 0000000..5044ecc --- /dev/null +++ b/docs/source/usage.md @@ -0,0 +1,181 @@ +# Extended Guide + +## How it works + +1. The RST text is converted to a modified version of docutils AST, which preserves lossless information about the source text. +2. The docutils AST is converted to [Markdown-It](https://markdown-it-py.readthedocs.io) syntax tokens. +3. The tokens are converted to Markdown text with [mdformat](https://mdformat.readthedocs.io). + +The conversion is designed to be fault tolerant, i.e. it will not check if referenced targets, roles, directives, etc exist nor fail if they do not. + +The only syntax where some checks are required is matching anonymous references and auto-number/symbol footnotes with their definitions; these definitions must be available. + +Conversion notes: + +- enumerated lists with roman numerals or alphabetic prefixes will be converted to numbers +- only one kind of footnote (i.e. no symbol prefixes) +- citation are turned into footnotes, with label prepended by `cite_prefix` +- inline targets are not convertible (and so ignored) +- If tables are not compatible with Markdown (single header row, no merged cells, etc), then they will be wrapped in an `eval_rst` +- Markdown blockquotes do not have an attribution syntax, so it is converted instead to `

—text

` (the standard HTML render) + +## Converting text snippets + +Either use the `stream` CLI command, parsing in `stdin`: + +```console +$ echo ":role:`content`" | rst2myst stream - +{role}`content` +``` + +or use the API: + +```python +from rst_to_myst import rst_to_myst +output = rst_to_myst(":role:`content`") +print(output.text) +``` + +## Converting multiple files + +Use the `convert` CLI command, with standard file globbing. +The `--dry-run` option will run without actually writing any files: + +```console +$ rst2myst convert --dry-run docs/**/*.rst +docs/source/api.rst -> docs/source/api.md +CONVERTED (extensions: []) +docs/source/cli.rst -> docs/source/cli.md +CONVERTED (extensions: ['deflist']) + +FINISHED ALL! (extensions: ['deflist']) +``` + +Extensions specify which MyST optional extensions are required to reparse the Markdown text. + +## Configuring the conversion + +The [CLI](./cli.rst) and [API](./api.rst) documentation list all the available configurations. + +For the CLI, you can directly use the option flags, or you can provide all the options in a YAML configuration file, with the `--config` option: + +```console +$ rst2myst convert --config config.yaml docs/**/*.rst +``` + +YAML config options mirror the CLI options, except using `_` instead of `-`, e.g. + +```yaml +language: en +sphinx: true +extensions: + - sphinx_panels +default_domain: py +consecutive_numbering: true +colon_fences: true +dollar_math: true +conversions: + sphinx_panels.dropdpwn.DropdownDirective: parse_all +``` + +### Directive conversion + +Directives are converted according to a mapping of the directive module path to a conversion type: + +- "eval_rst" (the default): no conversion, wrap in MyST `eval_rst` directive + + ```` + ```{eval_rst} + .. name:: argument `link`_ + :option: value + + content `link`_ + ``` + ```` + +- "direct": convert directly to MyST directive, keeping original argument/content + + ```` + ```{name} argument `link`_ + :option: value + + content `link`_ + ``` + ```` + +- "parse_argument": convert to MyST directive and convert the argument to Markdown + + ```` + ```{name} argument [link](link) + :option: value + + content `link`_ + ``` + ```` + +- "parse_content": convert to MyST directive and convert the content to Markdown + + ```` + ```{name} argument `link`_ + :option: value + + content [link](link) + ``` + ```` + +- "parse_all": convert to MyST directive and convert the content to Markdown + + ```` + ```{name} argument [link](link) + :option: value + + content [link](link) + ``` + ```` + +The default conversions are listed below, or you can use the `conversions` options to update these conversions. +Also use the `colon_fence` option to control whether directives with Markdown content are delimited by `:::`. + +````{dropdown} **Directive conversion defaults** + +```{literalinclude} ../../rst_to_myst/data/directives.yml +:language: yaml +``` + +```` + +## Additional Functionality + +### Listing available directives/roles + +List available directives/roles: + +```console +$ rst2myst directives list +acks admonition ... + +$ rst2myst roles list +abbr abbreviation ... +``` + +Show details of a specific directive/role: + +```console +$ rst2myst directives show admonition +class: docutils.parsers.rst.directives.admonitions.Admonition +description: '' +has_content: true +name: admonition +optional_arguments: 0 +options: + class: class_option + name: unchanged +required_arguments: 1 + +$ rst2myst roles show abbreviation +description: |- + Generic interpreted text role, where the interpreted text is simply + wrapped with the provided node class. +module: docutils.parsers.rst.roles +name: abbreviation +``` diff --git a/pyproject.toml b/pyproject.toml index 2bb0a3f..3753d48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,18 +17,34 @@ classifiers = [ description-file = "README.md" keywords = "restructuredtext,markdown,myst" -requires-python=">=3.6" -requires=["docutils==0.15", "importlib_resources~=3.1", "pyyaml", "click~=7.1"] +requires-python=">=3.7" +requires=[ + "docutils>=0.15,<0.18", + "importlib_resources~=3.1;python_version<'3.9'", + "pyyaml", + "markdown-it-py~=1.0", + "mdformat~=0.7.6", + "mdformat-myst~=0.1.4", + "mdformat-deflist~=0.1.0", + "click~=7.1" +] [tool.flit.entrypoints."console_scripts"] rst2myst = "rst_to_myst.cli:main" [tool.flit.metadata.requires-extra] -sphinx = ["sphinx~=3.2"] +sphinx = ["sphinx>=3.2,<5"] test = [ "pytest~=6.0", "coverage", "pytest-cov", + "pytest-regressions" +] +docs = [ + "myst-parser~=0.15.0", + "sphinx-book-theme", + "sphinx-click~=2.6", + "sphinx-panels", ] [tool.flit.sdist] diff --git a/rst_to_myst/__init__.py b/rst_to_myst/__init__.py index 0fca28e..f81ffa4 100644 --- a/rst_to_myst/__init__.py +++ b/rst_to_myst/__init__.py @@ -1,6 +1,6 @@ """Convert RST to MyST-Markdown.""" +from .mdformat_render import rst_to_myst # noqa: F401 from .namespace import compile_namespace # noqa: F401 -from .parser import to_ast # noqa: F401 -from .renderer import convert, render # noqa: F401 +from .parser import to_docutils_ast # noqa: F401 -__version__ = "0.1.2" +__version__ = "0.2.0" diff --git a/rst_to_myst/cli.py b/rst_to_myst/cli.py index 22ed405..52f9d0b 100644 --- a/rst_to_myst/cli.py +++ b/rst_to_myst/cli.py @@ -1,17 +1,45 @@ from io import TextIOWrapper from pathlib import Path +from typing import List, Mapping, Optional import click import yaml -from . import compile_namespace, convert, to_ast +from . import compile_namespace, rst_to_myst, to_docutils_ast from .utils import yaml_dump @click.group(context_settings={"help_option_names": ["-h", "--help"]}) @click.version_option() def main(): - """CLI for rst-to-myst""" + """CLI for converting ReStructuredText to MyST Markdown.""" + + +def read_config(ctx, param, value): + if not value: + return + try: + with open(value, encoding="utf8") as handle: + data = yaml.safe_load(handle) + except Exception as exc: + raise click.BadOptionUsage( + "--config", f"Error reading configuration file: {exc}", ctx + ) + + ctx.default_map = ctx.default_map or {} + ctx.default_map.update(data or {}) + + return value + + +OPT_CONFIG = click.option( + "--config", + help="YAML file to read default configuration from", + is_eager=True, + expose_value=False, + type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True), + callback=read_config, +) OPT_LANGUAGE = click.option( @@ -23,24 +51,42 @@ def main(): help="Language code for directive names", ) -# TODO don't hang when when no stdin provided (test file.isatty()?) -OPT_READ = click.option( - "--file", - "-f", - type=click.File("r"), - default="-", - help="Input file [default: stdin]", + +ARG_STREAM = click.argument("stream", type=click.File("r"), metavar="PATH_OR_STDIN") + + +ARG_PATHS = click.argument( + "paths", + type=click.Path(exists=True, file_okay=True, dir_okay=True), + nargs=-1, +) + +OPT_ENCODING = click.option( + "--encoding", default="utf8", show_default=True, help="Encoding for read/write" ) def read_conversions(ctx, param, value): if not value: return {} - path = Path(value) - if not path.exists(): - raise click.BadParameter(f"Path does not exist: {value}") - with path.open("r") as handle: - data = yaml.safe_load(handle) + if isinstance(value, Mapping): + # read from config file + data = value + else: + path = Path(str(value)) + if not path.exists(): + raise click.BadOptionUsage( + "--conversions", f"Path does not exist: {value}", ctx + ) + try: + with path.open("r") as handle: + data = yaml.safe_load(handle) + except Exception as exc: + raise click.BadOptionUsage( + "--conversions", f"Error reading conversions file: {exc}", ctx + ) + if not isinstance(value, Mapping): + raise click.BadOptionUsage("--conversions", f"Not a mapping: {value!r}", ctx) return data @@ -50,7 +96,7 @@ def read_conversions(ctx, param, value): default=None, callback=read_conversions, metavar="PATH", - help="YAML file containing directive conversions", + help="YAML file mapping directives -> conversions", ) @@ -68,27 +114,82 @@ def check_sphinx(ctx, param, value): OPT_SPHINX = click.option( "--sphinx/--no-sphinx", - "-s/-ns", is_flag=True, default=True, + show_default=True, callback=check_sphinx, help="Load sphinx.", ) + + +def split_extension(ctx, param, value): + if isinstance(value, list): + # if reading from config + return value + return [ext.strip() for ext in value.split(",")] if value else [] + + OPT_EXTENSIONS = click.option( - "--extensions", "-e", multiple=True, help="Load sphinx extensions." + "--extensions", + "-e", + callback=split_extension, + help="A comma-separated list of sphinx extensions to load.", +) + +OPT_DEFAULT_DOMAIN = click.option( + "--default-domain", + "-dd", + default="py", + show_default=True, + help="Default sphinx domain", +) +OPT_DEFAULT_ROLE = click.option( + "--default-role", + "-dr", + default=None, + help="Default sphinx role [default: convert to literal]", +) +OPT_CITE_PREFIX = click.option( + "--cite-prefix", + "-cp", + default="cite", + show_default=True, + help="Prefix to add to citation references", +) +OPT_RAISE_ON_WARNING = click.option( + "--raise-on-warning", "-W", is_flag=True, help="Raise exception on parsing warning" +) +OPT_CONSECUTIVE_NUMBERING = click.option( + "--consecutive-numbering/--no-consecutive-numbering", + default=True, + show_default=True, + help="Apply consecutive numbering to ordered lists", +) +OPT_COLON_FENCES = click.option( + "--colon-fences/--no-colon-fences", + default=True, + show_default=True, + help="Use colon fences for directives with parsed content", +) +OPT_DOLLAR_MATH = click.option( + "--dollar-math/--no-dollar-math", + default=True, + show_default=True, + help="Convert math roles to dollar delimited math", ) @main.command("ast") -@OPT_READ +@ARG_STREAM @OPT_LANGUAGE @OPT_SPHINX @OPT_EXTENSIONS @OPT_CONVERSIONS -def ast(file: TextIOWrapper, language: str, sphinx: bool, extensions, conversions): - """Convert ReStructuredText to an Abstract Syntax Tree.""" - text = file.read() - document, _ = to_ast( +@OPT_CONFIG +def ast(stream: TextIOWrapper, language: str, sphinx: bool, extensions, conversions): + """Parse file / stdin (-) and print RST Abstract Syntax Tree.""" + text = stream.read() + document, _ = to_docutils_ast( text, warning_stream=click.get_text_stream("stderr"), language_code=language, @@ -100,24 +201,167 @@ def ast(file: TextIOWrapper, language: str, sphinx: bool, extensions, conversion click.echo(output) -@main.command("parse") -@OPT_READ +@main.command("tokens") +@ARG_STREAM @OPT_LANGUAGE @OPT_SPHINX @OPT_EXTENSIONS +@OPT_DEFAULT_DOMAIN +@OPT_DEFAULT_ROLE +@OPT_CITE_PREFIX +@OPT_COLON_FENCES +@OPT_DOLLAR_MATH @OPT_CONVERSIONS -def parse(file: TextIOWrapper, language: str, sphinx: bool, extensions, conversions): - """Convert ReStructuredText to MyST Markdown.""" - text = file.read() - output, _ = convert( +@OPT_CONFIG +def tokens( + stream: TextIOWrapper, + language: str, + sphinx: bool, + extensions: List[str], + default_domain: str, + default_role: Optional[str], + cite_prefix: str, + colon_fences: bool, + dollar_math: bool, + conversions, +): + """Parse file / stdin (-) and print Markdown-It tokens.""" + text = stream.read() + output = rst_to_myst( text, - click.get_text_stream("stderr"), + warning_stream=click.get_text_stream("stderr"), language_code=language, use_sphinx=sphinx, extensions=extensions, conversions=conversions, + default_domain=default_domain, + default_role=default_role, + cite_prefix=cite_prefix + "_", + colon_fences=colon_fences, + dollar_math=dollar_math, ) - click.echo(output) + click.echo(yaml_dump([token.as_dict() for token in output.tokens])) + + +@main.command("stream") +@ARG_STREAM +@OPT_LANGUAGE +@OPT_SPHINX +@OPT_EXTENSIONS +@OPT_DEFAULT_DOMAIN +@OPT_DEFAULT_ROLE +@OPT_CITE_PREFIX +@OPT_CONSECUTIVE_NUMBERING +@OPT_COLON_FENCES +@OPT_DOLLAR_MATH +@OPT_CONVERSIONS +@OPT_CONFIG +def stream( + stream: TextIOWrapper, + language: str, + sphinx: bool, + extensions: List[str], + default_domain: str, + default_role: Optional[str], + cite_prefix: str, + consecutive_numbering: bool, + colon_fences: bool, + dollar_math: bool, + conversions, +): + """Parse file / stdin (-) and print Markdown text.""" + text = stream.read() + output = rst_to_myst( + text, + warning_stream=click.get_text_stream("stderr"), + language_code=language, + use_sphinx=sphinx, + extensions=extensions, + conversions=conversions, + default_domain=default_domain, + default_role=default_role, + cite_prefix=cite_prefix + "_", + consecutive_numbering=consecutive_numbering, + colon_fences=colon_fences, + dollar_math=dollar_math, + ) + click.echo(output.text) + + +@main.command("convert") +@ARG_PATHS +@click.option("--dry-run", "-d", is_flag=True, help="Do not write/remove any files") +@click.option("--replace-files", "-R", is_flag=True, help="Remove parsed files") +@click.option("--stop-on-fail", "-S", is_flag=True, help="Stop on first failure") +@OPT_RAISE_ON_WARNING +@OPT_LANGUAGE +@OPT_SPHINX +@OPT_EXTENSIONS +@OPT_DEFAULT_DOMAIN +@OPT_DEFAULT_ROLE +@OPT_CITE_PREFIX +@OPT_CONSECUTIVE_NUMBERING +@OPT_COLON_FENCES +@OPT_DOLLAR_MATH +@OPT_CONVERSIONS +@OPT_ENCODING +@OPT_CONFIG +def convert( + paths: List[str], + dry_run: bool, + replace_files: bool, + raise_on_warning: bool, + stop_on_fail: bool, + language: str, + sphinx: bool, + extensions: List[str], + default_domain: str, + default_role: Optional[str], + cite_prefix: str, + consecutive_numbering: bool, + colon_fences: bool, + dollar_math: bool, + conversions, + encoding: str, +): + """Convert one or more files.""" + myst_extensions = set() + for path in paths: + path = Path(path) + output_path = path.parent / (path.stem + ".md") + click.secho(f"{path} -> {output_path}", fg="blue") + input_text = path.read_text(encoding) + try: + output = rst_to_myst( + input_text, + warning_stream=click.get_text_stream("stderr"), + raise_on_warning=raise_on_warning, + language_code=language, + use_sphinx=sphinx, + extensions=extensions, + conversions=conversions, + default_domain=default_domain, + default_role=default_role, + cite_prefix=cite_prefix + "_", + consecutive_numbering=consecutive_numbering, + colon_fences=colon_fences, + dollar_math=dollar_math, + ) + except Exception as exc: + click.secho(f"FAILED:\n{exc}", fg="red") + if stop_on_fail: + raise SystemExit(1) + continue + + click.secho(f"CONVERTED (extensions: {list(output.extensions)!r})", fg="green") + myst_extensions.update(output.extensions) + if dry_run: + continue + output_path.write_text(output.text, encoding=encoding) + if replace_files and output_path != path: + path.unlink() + click.echo("") + click.secho(f"FINISHED ALL! (extensions: {list(myst_extensions)!r})", fg="green") @main.group("directives") @@ -140,7 +384,7 @@ def directives_list(sphinx, extensions): @OPT_EXTENSIONS @OPT_LANGUAGE def directives_show(name, sphinx, extensions, language): - """List available directives.""" + """Show information about a single role.""" namespace = compile_namespace( extensions=extensions, use_sphinx=sphinx, language_code=language ) @@ -160,7 +404,7 @@ def roles(): @OPT_SPHINX @OPT_EXTENSIONS def roles_list(sphinx, extensions): - """List available directives.""" + """List available roles.""" namespace = compile_namespace(extensions=extensions, use_sphinx=sphinx) click.echo(" ".join(namespace.list_roles())) @@ -171,7 +415,7 @@ def roles_list(sphinx, extensions): @OPT_EXTENSIONS @OPT_LANGUAGE def roles_show(name, sphinx, extensions, language): - """List available directives.""" + """Show information about a single role.""" namespace = compile_namespace( extensions=extensions, use_sphinx=sphinx, language_code=language ) diff --git a/rst_to_myst/data/directives.yml b/rst_to_myst/data/directives.yml index 61aa00f..a8464e2 100644 --- a/rst_to_myst/data/directives.yml +++ b/rst_to_myst/data/directives.yml @@ -1,53 +1,51 @@ # value one of: # - "eval_rst": no conversion, wrap in MyST eval_rst directive # - "direct": convert directly to MyST directive, keeping original argument/content -# - "argument_only": convert to MyST directive and convert the argument to Markdown -# - "content_only": convert to MyST directive and convert the content to Markdown -# - "argument_content": convert to MyST directive and convert the content to Markdown - -# if prepended by "_colon", use ::: delimiters instad of ``` +# - "parse_argument": convert to MyST directive and convert the argument to Markdown +# - "parse_content": convert to MyST directive and convert the content to Markdown +# - "parse_all": convert to MyST directive and convert the content to Markdown # admonitions (docutils) -docutils.parsers.rst.directives.admonitions.Admonition: argument_content_colon -docutils.parsers.rst.directives.admonitions.Attention: content_only_colon -docutils.parsers.rst.directives.admonitions.Caution: content_only_colon -docutils.parsers.rst.directives.admonitions.Danger: content_only_colon -docutils.parsers.rst.directives.admonitions.Error: content_only_colon -docutils.parsers.rst.directives.admonitions.Hint: content_only_colon -docutils.parsers.rst.directives.admonitions.Important: content_only_colon -docutils.parsers.rst.directives.admonitions.Note: content_only_colon -docutils.parsers.rst.directives.admonitions.Tip: content_only_colon -docutils.parsers.rst.directives.admonitions.Warning: content_only_colon +docutils.parsers.rst.directives.admonitions.Admonition: parse_all +docutils.parsers.rst.directives.admonitions.Attention: parse_content +docutils.parsers.rst.directives.admonitions.Caution: parse_content +docutils.parsers.rst.directives.admonitions.Danger: parse_content +docutils.parsers.rst.directives.admonitions.Error: parse_content +docutils.parsers.rst.directives.admonitions.Hint: parse_content +docutils.parsers.rst.directives.admonitions.Important: parse_content +docutils.parsers.rst.directives.admonitions.Note: parse_content +docutils.parsers.rst.directives.admonitions.Tip: parse_content +docutils.parsers.rst.directives.admonitions.Warning: parse_content # docutils other (see https://docutils.sourceforge.io/docs/ref/rst/directives.html#figure) docutils.parsers.rst.directives.body.CodeBlock: direct docutils.parsers.rst.directives.body.Compound: eval_rst -docutils.parsers.rst.directives.body.Container: content_only_colon +docutils.parsers.rst.directives.body.Container: parse_content docutils.parsers.rst.directives.body.Epigraph: eval_rst docutils.parsers.rst.directives.body.Highlights: eval_rst docutils.parsers.rst.directives.body.LineBlock: eval_rst docutils.parsers.rst.directives.body.MathBlock: eval_rst docutils.parsers.rst.directives.body.ParsedLiteral: eval_rst docutils.parsers.rst.directives.body.PullQuote: eval_rst -docutils.parsers.rst.directives.body.Rubric: argument_only_colon -docutils.parsers.rst.directives.body.Sidebar: argument_content_colon -docutils.parsers.rst.directives.body.Topic: argument_content_colon +docutils.parsers.rst.directives.body.Rubric: parse_argument +docutils.parsers.rst.directives.body.Sidebar: parse_all +docutils.parsers.rst.directives.body.Topic: parse_all docutils.parsers.rst.directives.html.Meta: eval_rst -docutils.parsers.rst.directives.images.Figure: content_only_colon +docutils.parsers.rst.directives.images.Figure: parse_content docutils.parsers.rst.directives.images.Image: direct docutils.parsers.rst.directives.misc.Class: eval_rst -docutils.parsers.rst.directives.misc.Date: eval_rst +docutils.parsers.rst.directives.misc.Date: direct docutils.parsers.rst.directives.misc.DefaultRole: eval_rst docutils.parsers.rst.directives.misc.Include: direct docutils.parsers.rst.directives.misc.Raw: direct -docutils.parsers.rst.directives.misc.Replace: content_only +docutils.parsers.rst.directives.misc.Replace: parse_content docutils.parsers.rst.directives.misc.Role: eval_rst docutils.parsers.rst.directives.misc.TestDirective: eval_rst docutils.parsers.rst.directives.misc.Title: direct docutils.parsers.rst.directives.misc.Unicode: eval_rst -docutils.parsers.rst.directives.parts.Contents: argument_only_colon -docutils.parsers.rst.directives.parts.Footer: content_only_colon -docutils.parsers.rst.directives.parts.Header: content_only_colon +docutils.parsers.rst.directives.parts.Contents: parse_argument +docutils.parsers.rst.directives.parts.Footer: parse_content +docutils.parsers.rst.directives.parts.Header: parse_content docutils.parsers.rst.directives.parts.Sectnum: eval_rst docutils.parsers.rst.directives.references.TargetNotes: eval_rst docutils.parsers.rst.directives.tables.CSVTable: direct @@ -67,14 +65,14 @@ sphinx.directives.patches.CSVTable: eval_rst # list-table sphinx.directives.patches.ListTable: eval_rst # figure -sphinx.directives.patches.Figure: content_only_colon +sphinx.directives.patches.Figure: parse_content # meta sphinx.directives.patches.Meta: eval_rst # deprecated, versionadded, versionchanged -sphinx.domains.changeset.VersionChange: content_only_colon +sphinx.domains.changeset.VersionChange: parse_content # seealso -sphinx.directives.other.SeeAlso: content_only_colon +sphinx.directives.other.SeeAlso: parse_content # index sphinx.domains.index.IndexDirective: direct # default-domain @@ -105,7 +103,7 @@ sphinx.directives.other.Acks: eval_rst # hlist sphinx.directives.other.HList: eval_rst # only -sphinx.directives.other.Only: content_only_titles +sphinx.directives.other.Only: parse_content_titles # c:member # c:var @@ -207,6 +205,10 @@ sphinx.domains.std.Cmdoption: eval_rst # std:envvar sphinx.domains.std.EnvVar: eval_rst # std:glossary -sphinx.domains.std.Glossary: eval_rst +sphinx.domains.std.Glossary: parse_content # std:productionlist sphinx.domains.std.ProductionList: eval_rst + +# third-party directives +sphinxcontrib.bibtex.directives.BibliographyDirective: direct +sphinx_panels.dropdpwn.DropdownDirective: parse_all diff --git a/rst_to_myst/inliner.py b/rst_to_myst/inliner.py index fbd884b..843b47f 100644 --- a/rst_to_myst/inliner.py +++ b/rst_to_myst/inliner.py @@ -372,8 +372,6 @@ def parse( 4. If not found or invalid, generate a warning and ignore the start-string. 5. Implicit inline markup (e.g. standalone URIs) is found last. """ - # TODO Needs to be refactored for nested inline markup - # (add nested_parse() method?) self.reporter = memo.reporter # type: Reporter self.document = memo.document # type: nodes.document self.language = memo.language @@ -506,7 +504,7 @@ def interpreted_or_phrase_ref(self, match: Match, lineno: int) -> DispatchResult def phrase_ref( self, before: str, after: str, rawsource: str, escaped: str, text: str ) -> DispatchResult: - """Handle phrase references e.g. `phrase ref`_, `embedded `_ """ + """Handle phrase references e.g. `phrase ref`_, `embedded `_""" match = self.patterns.embedded_link.search(escaped) if match: # embedded or text = unescape(escaped[: match.start(0)]) @@ -618,7 +616,7 @@ def literal(self, match: Match, lineno: int) -> DispatchResult: return before, inlines, remaining, sysmessages def inline_internal_target(self, match: Match, lineno: int) -> DispatchResult: - """Handle an inline internal target, e.g. _`target` """ + """Handle an inline internal target, e.g. _`target`""" before, inlines, remaining, sysmessages, endstring = self.inline_obj( match, lineno, self.patterns.target, nodes.target ) @@ -632,7 +630,7 @@ def inline_internal_target(self, match: Match, lineno: int) -> DispatchResult: return before, inlines, remaining, sysmessages def substitution_reference(self, match: Match, lineno: int) -> DispatchResult: - """Handle a substitution reference, e.g. |sub| """ + """Handle a substitution reference, e.g. |sub|""" before, inlines, remaining, sysmessages, endstring = self.inline_obj( match, lineno, self.patterns.substitution_ref, nodes.substitution_reference ) @@ -687,7 +685,7 @@ def footnote_reference(self, match: Match, lineno: int) -> DispatchResult: def reference( self, match: Match, lineno: int, anonymous: bool = False ) -> DispatchResult: - """Handle simple references, e.g. reference_ and anonymous__ """ + """Handle simple references, e.g. reference_ and anonymous__""" referencename = match.group("refname") refname = normalize_name(referencename) referencenode = nodes.reference( @@ -707,7 +705,7 @@ def reference( return (string[:matchstart], [referencenode], string[matchend:], []) def anonymous_reference(self, match: Match, lineno: int) -> DispatchResult: - """Handle anonymous references, e.g. anonymous__ """ + """Handle anonymous references, e.g. anonymous__""" return self.reference(match, lineno, anonymous=True) def inline_obj( @@ -805,7 +803,7 @@ def standalone_uri(self, match: Match, lineno: int) -> ImplicitResult: raise MarkupMismatch("not a valid scheme") def pep_reference(self, match: Match, lineno: int) -> ImplicitResult: - """Handle reference to a PEP (Python Enhancement Proposal), e.g. `PEP 287`__ """ + """Handle reference to a PEP (Python Enhancement Proposal), e.g. `PEP 287`__""" text = match.group(0) if text.startswith("pep-"): pepnum = int(match.group("pepnum1")) @@ -821,7 +819,7 @@ def pep_reference(self, match: Match, lineno: int) -> ImplicitResult: return [nodes.reference(unescape(text, True), unescaped, refuri=ref)] def rfc_reference(self, match: Match, lineno: int) -> ImplicitResult: - """Handle reference to a RFC (Request For Comments), e.g. `RFC 2822`__ """ + """Handle reference to a RFC (Request For Comments), e.g. `RFC 2822`__""" text = match.group(0) if text.startswith("RFC"): rfcnum = int(match.group("rfcnum")) @@ -833,6 +831,8 @@ def rfc_reference(self, match: Match, lineno: int) -> ImplicitResult: class InlinerMyst(Inliner): + """Inliner that does not run roles.""" + def interpreted( self, rawsource: str, text: str, role: str, lineno: int ) -> Tuple[List[nodes.Node], List[nodes.system_message]]: diff --git a/rst_to_myst/markdownit.py b/rst_to_myst/markdownit.py new file mode 100644 index 0000000..f7e4275 --- /dev/null +++ b/rst_to_myst/markdownit.py @@ -0,0 +1,657 @@ +"""Convert to markdown-it tokens, which can then be rendered by mdformat.""" +from io import StringIO +from textwrap import indent +from typing import IO, Any, Dict, List, NamedTuple, Optional, Tuple + +from docutils import nodes +from markdown_it.token import Token + + +class RenderOutput(NamedTuple): + tokens: List[Token] + env: Dict[str, Any] + + +class MarkdownItRenderer(nodes.GenericNodeVisitor): + """Render docutils AST to Markdown-It token stream.""" + + def __init__( + self, + document: nodes.document, + *, + warning_stream: Optional[IO] = None, + raise_on_warning: bool = False, + cite_prefix: str = "cite_", + default_role: Optional[str] = None, + colon_fences: bool = True, + dollar_math: bool = True, + ): + self._document = document + self._warning_stream = warning_stream or StringIO() + self.raise_on_warning = raise_on_warning + # prefix added to citation labels + self.cite_prefix = cite_prefix + # if no default role, convert to literal + self.default_role = default_role + self.colon_fences = colon_fences + self.dollar_math = dollar_math + + self.reset_state() + + def reset_state(self): + # record current state, that can affect children tokens + self._tokens: List[Token] = [] + self._env = {"references": {}, "duplicate_refs": []} + self._inline: Optional[Token] = None + self.parent_tokens: Dict[str, int] = {} + # [(key path, tokens), ...] + self._front_matter_tokens: List[Tuple[List[str], List[Token]]] = [] + self._tight_list = True + + @property + def document(self) -> nodes.document: + return self._document + + def warning(self, message: str, line: Optional[int]): + if line is not None: + self._warning_stream.write(f"RENDER WARNING:{line}: {message}\n") + else: + self._warning_stream.write(f"RENDER WARNING: {message}\n") + + def to_tokens(self) -> RenderOutput: + """Reset tokens and convert full document.""" + self.reset_state() + self._document.walkabout(self) + + # add front-matter that should be nested parsed + if self._front_matter_tokens: + fm_tokens = [] + fm_tokens.append(Token("front_matter_tokens_open", "", 1)) + for key_path, tokens in self._front_matter_tokens: + fm_tokens.append( + Token("front_matter_key_open", "", 1, meta={"key_path": key_path}) + ) + fm_tokens.extend(tokens) + fm_tokens.append(Token("front_matter_key_close", "", -1)) + fm_tokens.append(Token("front_matter_tokens_close", "", -1)) + self._tokens = fm_tokens + self._tokens + + return RenderOutput(self._tokens[:], self._env) + + def nested_parse(self, nodes: List[nodes.Element]) -> List[Token]: + new_inst = MarkdownItRenderer( + document=self._document, + warning_stream=self._warning_stream, + cite_prefix=self.cite_prefix, + default_role=self.default_role, + colon_fences=self.colon_fences, + dollar_math=self.dollar_math, + ) + for node in nodes: + node.walkabout(new_inst) + return new_inst._tokens + + def add_token( + self, ttype: str, tag: str, nesting: int, *, content: str = "", **kwargs: Any + ) -> Token: + """A markdown-it token to the stream, handling inline tokens and children.""" + token = Token(ttype, tag, nesting, content=content, **kwargs) + # record entries and exits + if ttype.endswith("_open"): + self.parent_tokens.setdefault(ttype[:-5], 0) + self.parent_tokens[ttype[:-5]] += 1 + if ttype.endswith("_close"): + self.parent_tokens.setdefault(ttype[:-6], 0) + self.parent_tokens[ttype[:-6]] -= 1 + if self.parent_tokens[ttype[:-6]] <= 0: + self.parent_tokens.pop(ttype[:-6]) + # decide whether we should be adding as an inline child + if ttype in {"paragraph_open", "heading_open", "th_open", "td_open", "dt_open"}: + self._tokens.append(token) + self._inline = Token("inline", "", 0, children=[]) + self._tokens.append(self._inline) + elif ttype in { + "paragraph_close", + "heading_close", + "th_close", + "td_close", + "dt_close", + }: + self._tokens.append(token) + self._inline = None + elif self._inline: + self._inline.children.append(token) + else: + self._tokens.append(token) + return token + + def default_visit(self, node): + self.unknown_visit(node) + + def default_departure(self, node): + self.unknown_departure(node) + + def unknown_visit(self, node): + message = f"no visit method for: {node.__class__}" + self.warning(message, node.line) + if self.raise_on_warning: + raise NotImplementedError(message) + + def unknown_departure(self, node): + message = f"no depart method for: {node.__class__}" + self.warning(message, node.line) + if self.raise_on_warning: + raise NotImplementedError(message) + + # Skipped components + + def visit_document(self, node): + pass + + def depart_document(self, node): + pass + + def visit_Element(self, node): + pass + + def depart_Element(self, node): + pass + + def visit_system_message(self, node): + # ignore + raise nodes.SkipNode + + def visit_problematic(self, node): + # ignore + raise nodes.SkipNode + + # CommonMark components + + def visit_section(self, node): + pass # handled by title + + def depart_section(self, node): + pass + + def visit_title(self, node): + token = self.add_token("heading_open", f"h{node['level']}", 1) + token.markup = "#" * node["level"] + + def depart_title(self, node): + token = self.add_token("heading_close", f"h{node['level']}", -1) + token.markup = "#" * node["level"] + + def visit_paragraph(self, node): + if self.parent_tokens.get("th") or self.parent_tokens.get("td"): + # table cells are treated as paragraphs already + return + token = self.add_token("paragraph_open", "p", 1) + if self.parent_tokens.get("list_item") and self._tight_list: + # paragraphs in tight lists are hidden + token.hidden = True + + def depart_paragraph(self, node): + if self.parent_tokens.get("th") or self.parent_tokens.get("td"): + # table cells are treated as paragraphs already + return + self.add_token("paragraph_close", "p", -1) + + def visit_Text(self, node): + self.add_token("text", "", 0, content=node.astext()) + raise nodes.SkipNode + + def visit_emphasis(self, node): + self.add_token("em_open", "em", 1, markup="*") + + def depart_emphasis(self, node): + self.add_token("em_close", "em", -1, markup="*") + + def visit_strong(self, node): + self.add_token("strong_open", "strong", 1, markup="**") + + def depart_strong(self, node): + self.add_token("strong_close", "strong", -1, markup="**") + + def visit_transition(self, node): + self.add_token("hr", "hr", 0, markup="---") + raise nodes.SkipNode + + def visit_bullet_list(self, node): + self.add_token("bullet_list_open", "ul", 1, markup=node["bullet"]) + + def depart_bullet_list(self, node): + self.add_token("bullet_list_close", "ul", -1, markup=node["bullet"]) + + def visit_enumerated_list(self, node): + token = self.add_token("ordered_list_open", "ol", 1, markup=".") + if "start" in node: + token.attrs["start"] = node["start"] + + def depart_enumerated_list(self, node): + self.add_token("ordered_list_close", "ol", -1, markup=".") + + def visit_list_item(self, node): + token = self.add_token("list_item_open", "li", 1) + if "style" in node: + if node["style"] == "bullet": + token.markup = node["prefix"].strip() + elif node["style"] == "enumerated": + token.markup = "." + # a list is loose if any of its list items directly contain + # two block-level elements, otherwise tight. In this case paragraphs are hidden + self._tight_list = len(node.children) < 2 + + def depart_list_item(self, node): + self.add_token("list_item_close", "li", -1) + + def visit_literal(self, node): + self.add_token("code_inline", "code", 0, markup="`", content=node.astext()) + raise nodes.SkipNode + + def visit_literal_block(self, node): + text = node.astext() + if not text.endswith("\n"): + text += "\n" + self.add_token("code_block", "code", 0, content=text) + raise nodes.SkipNode + + def visit_block_quote(self, node): + self.add_token("blockquote_open", "blockquote", 1, markup=">") + + def depart_block_quote(self, node): + self.add_token("blockquote_close", "blockquote", -1, markup=">") + + def visit_attribution(self, node): + # Markdown block quotes do not have an attribution syntax, + # so we add a best approximation + token = self.add_token("html_inline", "", 0) + token.content = f'

-{node.astext()}

' + raise nodes.SkipNode + + def visit_reference(self, node): + # we assume all reference names are plain text + text = node.astext() + + if "standalone_uri" in node: + # autolink + token = self.add_token("link_open", "a", 1, markup="autolink", info="auto") + token.attrs["href"] = node["refuri"] + self.add_token("text", "", 0, content=node["refuri"]) + self.add_token("link_close", "a", -1, markup="autolink", info="auto") + elif "refname" in node: + # reference a link definition `[refname]: url`, or a target `(refname)=` + # TODO ensure mdformat does not wrap in <> + token = self.add_token( + "link_open", + "a", + 1, + attrs={"href": node["refname"]}, + # TODO should only add label if target found? + meta={"label": node["refname"]}, + ) + self.add_token("text", "", 0, content=text) + self.add_token("link_close", "a", -1) + elif "refuri" in node: + # external link + # TODO ensure prefixed with http://? + token = self.add_token("link_open", "a", 1, attrs={"href": node["refuri"]}) + self.add_token("text", "", 0, content=text) + self.add_token("link_close", "a", -1) + elif "refid" in node: + # anonymous links, pointing to internal targets + # TODO ensure mdformat does not wrap in <> + token = self.add_token( + "link_open", + "a", + 1, + attrs={"href": node["refid"]}, + ) + self.add_token("text", "", 0, content=text) + self.add_token("link_close", "a", -1) + else: + message = f"unknown reference type: {node.rawsource}" + self.warning(message, node.line) + if self.raise_on_warning: + raise NotImplementedError(message) + + raise nodes.SkipNode + + def visit_target(self, node): + if "inline" in node and node["inline"]: + # TODO inline targets + message = f"inline targets not implemented: {node.rawsource}" + self.warning(message, node.line) + if self.raise_on_warning: + raise NotImplementedError(message) + self.add_token( + "code_inline", "code", 0, markup="`", content=str(node.rawsource) + ) + raise nodes.SkipNode + + if "refuri" in node: + for name in node["names"]: + # TODO warn about name starting ^ (clashes with footnotes) + if name not in self._env["references"]: + self._env["references"][name] = { + "title": "", + "href": node["refuri"], + "map": [node.line, node.line], + } + else: + self._env["duplicate_refs"].append( + { + "label": name, + "title": "", + "href": node["refuri"], + "map": [node.line, node.line], + } + ) + elif "names" in node: + for name in node["names"]: + self.add_token( + "myst_target", "", 0, attrs={"class": "myst-target"}, content=name + ) + if "refid" in node: + self.add_token( + "myst_target", + "", + 0, + attrs={"class": "myst-target"}, + content=node["refid"], + ) + + # TODO check for content? + raise nodes.SkipNode + + # Standard CommonMark extensions + + def parse_gfm_table(self, node) -> bool: + """Check whether an RST table can be converted to a GFM one. + + RST tables can have e.g. cells spanning multiple columns/rows, + which the GitHub Flavoured Markdown (GFM) table variant does not support. + """ + # must have one child tgroup + if len(node.children) != 1 or not isinstance(node.children[0], nodes.tgroup): + return False + # tgroup should contain the number of columns + tgroup = node.children[0] + if "cols" not in tgroup: + return False + ncolumns = tgroup["cols"] + # trgoup should contain children: (colspec)*, thead, tbody + if len(tgroup.children) < 2: + return False + if not isinstance(tgroup.children[-2], nodes.thead): + return False + if not isinstance(tgroup.children[-1], nodes.tbody): + return False + thead = tgroup.children[-2] + tbody = tgroup.children[-1] + # the header can only have one row with the full amount of columns + if len(thead.children) != 1 or len(thead.children[0]) != ncolumns: + return False + # each body row should have the full amount of columns + for row in tbody.children: + if len(row.children) != ncolumns: + return False + return True + + def visit_table(self, node): + + if not self.parse_gfm_table(node): + text = node.rawsource + if not text.endswith("\n"): + text += "\n" + self.add_token( + "fence", "code", 0, content=text, markup="```", info="{eval_rst}" + ) + raise nodes.SkipNode + + self.add_token("table_open", "table", 1) + + def depart_table(self, node): + self.add_token("table_close", "table", -1) + + def visit_tgroup(self, node): + pass + + def depart_tgroup(self, node): + pass + + def visit_colspec(self, node): + raise nodes.SkipNode + + def visit_thead(self, node): + self.add_token("thead_open", "thead", 1) + + def depart_thead(self, node): + self.add_token("thead_close", "thead", -1) + + def visit_tbody(self, node): + self.add_token("tbody_open", "tbody", 1) + + def depart_tbody(self, node): + self.add_token("tbody_close", "tbody", -1) + + def visit_row(self, node): + self.add_token("tr_open", "tr", 1) + + def depart_row(self, node): + self.add_token("tr_close", "tr", -1) + + def visit_entry(self, node): + tag = "th" if self.parent_tokens.get("thead") else "td" + self.add_token(f"{tag}_open", tag, 1) + + def depart_entry(self, node): + tag = "th" if self.parent_tokens.get("thead") else "td" + # Markdown cells can not include newlines + # TODO improve or upstream this "fix" + # maybe replace with html_inline
tokens (text will be escaped) + if self._inline: + for child in self._inline.children: + child.content = child.content.replace("\n", " ") + self.add_token(f"{tag}_close", tag, -1) + + # TODO check if handling of is/subId required for footnotes + + def visit_footnote(self, node, refname=None): + refname = refname or node["ids"][0] # assume there is only one id + self.add_token("footnote_block_open", "", 1) + self.add_token("footnote_open", "", 1, meta={"label": refname, "id": 0}) + + def depart_footnote(self, node): + self.add_token("footnote_close", "", -1) + self.add_token("footnote_block_close", "", -1) + + def visit_citation(self, node): + # treated same as for visit_footnote, but with specific prefix + # TODO fails if duplicate refname, since names is empty + refname = node["names"][0] # assume there is only one name + refname = f"{self.cite_prefix}{refname}" + return self.visit_footnote(node, refname=refname) + + def depart_citation(self, node): + # treated same as for depart_footnote + return self.depart_footnote(node) + + def visit_footnote_reference(self, node): + if "refname" in node: + refname = node["refname"] + elif "refid" in node: + refname = node["refid"] + else: + message = f"unknown footnote reference type: {node.rawsource}" + self.warning(message, node.line) + if self.raise_on_warning: + raise NotImplementedError(message) + + self.add_token( + "footnote_ref", "", 0, meta={"label": refname, "id": 0, "subId": 0} + ) + + raise nodes.SkipNode + + def visit_citation_reference(self, node): + refname = node["refname"] if "refname" in node else node["refid"] + # for compatibility we treat citations the same as footnotes, with a prefix + refname = f"{self.cite_prefix}{refname}" + self.add_token( + "footnote_ref", "", 0, meta={"label": refname, "id": 0, "subId": 0} + ) + # the node also contains the refname as text, but we don't need that + raise nodes.SkipNode + + def visit_definition_list(self, node): + self.add_token("dl_open", "dl", 1) + + def depart_definition_list(self, node): + self.add_token("dl_close", "dl", -1) + + def visit_definition_list_item(self, node): + pass + + def depart_definition_list_item(self, node): + pass + + def visit_term(self, node): + self.add_token("dt_open", "dt", 1) + + def depart_term(self, node): + self.add_token("dt_close", "dt", -1) + + def visit_classifier(self, node): + # classifiers can follow a term, e.g. `term : classifier` + # TODO record term classifiers? + raise nodes.SkipNode + + def visit_definition(self, node): + self.add_token("dd_open", "dd", 1) + + def depart_definition(self, node): + self.add_token("dd_close", "dd", -1) + + def visit_FrontMatterNode(self, node): + for field in node: + if not len(field) == 2: + continue + key = field[0][0].astext() + tokens = self.nested_parse(field[1].children) + self._front_matter_tokens.append(([key], tokens)) + + raise nodes.SkipNode + + def visit_field_list(self, node): + if node.rawsource: + text = "\n" + node.rawsource.strip() + "\n" + self.add_token("fence", "code", 0, content=text, info="{eval-rst}") + raise nodes.SkipNode + + # MyST Markdown specific + + def visit_RoleNode(self, node): + # TODO nested parse of specific roles + role = node["role"] or self.default_role + if role: + if self.dollar_math and role == "math": + self.add_token( + "math_inline", "math", 0, markup="$", content=node["text"].strip() + ) + else: + self.add_token( + "myst_role", "", 0, meta={"name": role}, content=node["text"] + ) + else: + self.add_token("code_inline", "code", 0, markup="`", content=node["text"]) + raise nodes.SkipNode + + def visit_comment(self, node): + # TODO alternately use + self.add_token( + "myst_line_comment", + "hr", + 0, + attrs={"class": "myst-line-comment"}, + content=indent(node.astext(), " "), + ) + raise nodes.SkipNode + + def visit_substitution_reference(self, node): + self.add_token("substitution_inline", "span", 0, content=node["refname"]) + # the node also contains the refname as text, but we don't need that + raise nodes.SkipNode + + def visit_substitution_definition(self, node): + if "names" not in node or not node["names"]: + raise nodes.SkipNode + key = node["names"][0] + # substitution definition should always be a single directive node + tokens = self.nested_parse(node.children) + self._front_matter_tokens.append((["substitutions", key], tokens)) + raise nodes.SkipNode + + def visit_EvalRstNode(self, node): + text = node.astext() + if not text.endswith("\n"): + text += "\n" + self.add_token("fence", "code", 0, content=text, info="{eval-rst}") + raise nodes.SkipNode + + def visit_DirectiveNode(self, node): + markup = "`" + if self.colon_fences and node["conversion"] in ( + "parse_content", + "parse_content_titles", + "parse_all", + ): + markup = ":" + if ( + ( + node["name"] == "code-block" + or node["module"] == "sphinx.directives.patches.Code" + ) + and not node["options_list"] + and len(node.children) == 2 + ): + # special case, where we can use standard Markdown fences + argument, content = node.children + self.add_token( + "fence", + "code", + 0, + content=content.astext() + "\n", + markup="```", + info=argument.astext().strip(), + ) + raise nodes.SkipNode + else: + self.add_token( + "directive_open", + "", + 1, + meta={ + key: node[key] + for key in ["name", "module", "conversion", "options_list"] + }, + markup=markup, + ) + + def depart_DirectiveNode(self, node): + self.add_token("directive_close", "", -1) + + def visit_ArgumentNode(self, node): + # TODO might be a better construct to have this as children of inline + self.add_token("directive_arg_open", "", 1) + + def depart_ArgumentNode(self, node): + self.add_token("directive_arg_close", "", -1) + + def visit_ContentNode(self, node): + self.add_token("directive_content_open", "", 1) + + def depart_ContentNode(self, node): + self.add_token("directive_content_close", "", -1) + + # TODO https://docutils.sourceforge.io/docs/user/rst/quickref.htm + # line block, option list diff --git a/rst_to_myst/mdformat_render.py b/rst_to_myst/mdformat_render.py new file mode 100644 index 0000000..54bf76e --- /dev/null +++ b/rst_to_myst/mdformat_render.py @@ -0,0 +1,239 @@ +import logging +from textwrap import indent +from typing import IO, Any, Dict, Iterable, List, NamedTuple, Optional, Set + +from markdown_it.token import Token +from mdformat.plugins import PARSER_EXTENSIONS +from mdformat.renderer import LOGGER, MDRenderer, RenderContext, RenderTreeNode +from mdformat.renderer._util import longest_consecutive_sequence + +from .markdownit import MarkdownItRenderer, RenderOutput +from .parser import to_docutils_ast +from .utils import yaml_dump + + +def _front_matter_tokens_render(node: RenderTreeNode, context: RenderContext) -> str: + """Special render for front-matter whose values also need to be rendered.""" + dct = {} + for child in node.children: + path = child.meta["key_path"] + value = ( + "\n\n".join(subchild.render(context) for subchild in child.children) + if child.children + else True + ) + subdct = dct + for key in path[:-1]: + subdct.setdefault(key, {}) + subdct = subdct[key] + subdct[path[-1]] = value + text = yaml_dump(dct).rstrip() + return f"---\n{text}\n---" + + +def _sub_renderer(node: RenderTreeNode, context: RenderContext) -> str: + """Render a substitution.""" + return f"{{{{ {node.content} }}}}" + + +def _directive_render(node: RenderTreeNode, context: RenderContext) -> str: + """Directive render, for handling directives that may contain child elements.""" + # special directives that should only be used within substitutions + if node.meta["module"].endswith("misc.Replace") and node.children: + return "\n\n".join(child.render(context) for child in node.children[-1]) + if node.meta["module"].endswith("misc.Date"): + return "{sub-ref}`today`" + # TODO handle unicode directive + + name = node.meta["name"] + info_str = option_block = code_block = "" + + if node.children and node.children[0].type == "directive_arg": + info_str = "".join(child.render(context) for child in node.children[0]) + info_str = " ".join(info_str.splitlines()).strip() + if info_str: + info_str = " " + info_str + + if node.meta["options_list"]: + yaml_str = yaml_dump( + { + key: (True if val is None else (int(val) if val.isnumeric() else val)) + for key, val in node.meta["options_list"] + } + ) + option_block = indent(yaml_str, ":", lambda s: True).strip() + + if node.children and node.children[-1].type == "directive_content": + content = "\n\n".join(child.render(context) for child in node.children[-1]) + if not option_block and content.startswith(":"): + # add a new-line, so content is not treated as an option + content = "\n" + content + elif option_block and content: + # new lines between options and content + option_block += "\n\n" + code_block = content + + if option_block or code_block: + # new line before closing fence + code_block += "\n" + + # Info strings of backtick code fences can not contain backticks or tildes. + # If that is the case, we make a tilde code fence instead. + if node.markup and ":" in node.markup: + fence_char = ":" + elif "`" in info_str or "~" in info_str: + fence_char = "~" + else: + fence_char = "`" + + # The code block must not include as long or longer sequence of `fence_char`s + # as the fence string itself + fence_len = max(3, longest_consecutive_sequence(code_block, fence_char) + 1) + fence_str = fence_char * fence_len + return f"{fence_str}{{{name}}}{info_str}\n{option_block}{code_block}{fence_str}" + + +class AdditionalRenderers: + RENDERERS = { + "front_matter_tokens": _front_matter_tokens_render, + "substitution_block": _sub_renderer, + "substitution_inline": _sub_renderer, + "directive": _directive_render, + } + + +def from_tokens( + output: RenderOutput, + *, + consecutive_numbering: bool = True, + warning_stream: Optional[IO] = None, +) -> str: + """Convert markdown-it tokens to text.""" + md_renderer = MDRenderer() + # TODO option for consecutive numbering consecutive_numbering, etc + options = { + "parser_extension": [ + PARSER_EXTENSIONS[name] + for name in ["myst", "tables", "frontmatter", "deflist"] + ] + + [AdditionalRenderers], + "mdformat": {"number": consecutive_numbering}, + } + + # temporarily redirect mdformat logging + warning_handler = None + if warning_stream: + warning_handler = logging.StreamHandler(warning_stream) + warning_handler.setLevel(logging.WARNING) + LOGGER.addHandler(warning_handler) + try: + # mdformat outputs only used reference definitions during 'finalize' + # instead we want to output all parsed reference definitions + text = md_renderer.render(output.tokens, options, output.env, finalize=False) + if output.env["references"]: + if text: + text += "\n\n" + output.env["used_refs"] = set(output.env["references"]) + text += md_renderer._write_references(output.env) + finally: + if warning_handler: + LOGGER.removeHandler(warning_handler) + if text: + text += "\n" + return text + + +def get_myst_extensions(tokens: List[Token]) -> Set[str]: + """Return the MyST extensions required to parse a token sequence.""" + extensions = set() + for token in tokens: + if token.type == "substitution_inline" or token.type == "substitution_block": + extensions.add("substitution") + elif token.type == "front_matter_tokens_open": + key_path = token.meta.get("key_path") + if key_path and key_path[0] == "substitutions": + extensions.add("substitution") + elif token.type == "directive_open" and ":" in token.markup: + extensions.add("colon_fence") + elif token.type == "math_inline" or token.type == "math_block": + extensions.add("dollarmath") + elif token.type == "dl_open": + extensions.add("deflist") + return extensions + + +class ConvertedOutput(NamedTuple): + """Output from ``rst_to_myst``.""" + + text: str + tokens: List[Token] + env: Dict[str, Any] + warning_stream: IO + extensions: Set[str] + + +def rst_to_myst( + text: str, + *, + warning_stream: Optional[IO] = None, + language_code="en", + use_sphinx: bool = True, + extensions: Iterable[str] = (), + conversions: Optional[Dict[str, str]] = None, + default_domain: str = "py", + default_role: Optional[str] = None, + raise_on_warning: bool = False, + cite_prefix: str = "cite_", + consecutive_numbering: bool = True, + colon_fences: bool = True, + dollar_math: bool = True, +) -> ConvertedOutput: + """Convert RST text to MyST Markdown text. + + :param text: The input RST text + + :param warning_stream: The warning IO to write to + :param language_code: the language module to use, + for directive/role name translation + :param use_sphinx: Whether to load sphinx roles, directives and extentions + :param extensions: Sphinx extension to load + :param conversions: Overrides for mapping of how to convert directives; + directive module path -> conversion type + :param default_domain: name of the default sphinx domain + :param default_role: name of the default role, otherwise convert to a literal + + :param cite_prefix: Prefix to add to citation references + :param raise_on_warning: Raise exception on parsing warning + :param consecutive_numbering: Apply consecutive numbering to ordered lists + :param colon_fences: Use colon fences for directives with parsed content + :param dollar_math: Convert ``math`` roles to dollar delimited math + + """ + document, warning_stream = to_docutils_ast( + text, + warning_stream=warning_stream, + language_code=language_code, + use_sphinx=use_sphinx, + extensions=extensions, + default_domain=default_domain, + conversions=conversions, + ) + token_renderer = MarkdownItRenderer( + document, + warning_stream=warning_stream, + cite_prefix=cite_prefix, + raise_on_warning=raise_on_warning, + default_role=default_role, + colon_fences=colon_fences, + dollar_math=dollar_math, + ) + output = token_renderer.to_tokens() + myst_extension = get_myst_extensions(output.tokens) + output_text = from_tokens( + output, + consecutive_numbering=consecutive_numbering, + warning_stream=warning_stream, + ) + return ConvertedOutput( + output_text, output.tokens, output.env, warning_stream, myst_extension + ) diff --git a/rst_to_myst/nodes.py b/rst_to_myst/nodes.py index 473d9d3..8b67957 100644 --- a/rst_to_myst/nodes.py +++ b/rst_to_myst/nodes.py @@ -1,12 +1,37 @@ +from typing import Any, List, Tuple + from docutils import nodes +class EvalRstNode(nodes.Element): + """Should contain a single ``Text`` node with the contents to wrap.""" + + class RoleNode(nodes.Element): pass class DirectiveNode(nodes.Element): - pass + """This node will have an optional ``ArgumentNode`` and/or ``ContentNode`` child.""" + + def __init__( + self, + rawsource, + *, + name: str, + module: str, + conversion: str, + options_list: List[Tuple[str, Any]], + **kwargs + ) -> None: + super().__init__( + rawsource, + name=name, + module=module, + conversion=conversion, + options_list=options_list, + **kwargs + ) class ArgumentNode(nodes.Element): diff --git a/rst_to_myst/parser.py b/rst_to_myst/parser.py index 8089185..de8eee2 100644 --- a/rst_to_myst/parser.py +++ b/rst_to_myst/parser.py @@ -12,16 +12,25 @@ PropagateTargets, ) from docutils.utils import new_document, roman -from importlib_resources import files + +try: + from importlib.resources import files +except ImportError: + from importlib_resources import files from . import data as package_data from .inliner import InlinerMyst from .namespace import compile_namespace -from .nodes import DirectiveNode, FrontMatterNode +from .nodes import FrontMatterNode from .states import get_state_classes -class RSTParser(Parser): +class LosslessRSTParser(Parser): + """Modified RST Parser, allowing for the retrieval of the original source text. + + Principally, roles and directives are not run. + """ + def __init__(self): self.initial_state = "Body" self.state_classes = get_state_classes() @@ -32,16 +41,19 @@ def __init__(self): class IndirectHyperlinks(Transform): + """Resolve indirect hyperlinks.""" + def apply(self): for target in self.document.indirect_targets: if not target.resolved: - self.resolve_indirect_target(target) + self.resolve_indirect_target(target) # TODO implement this resolve? # Do not resolve the actual references, since this replaces the "refname" # self.resolve_indirect_references(target) class StripFootnoteLabel(Transform): - # footnotes and citations can start with a label note, which we do not need + """Footnotes and citations can start with a label note, which we do not need.""" + def apply(self): for node in self.document.traverse( lambda n: isinstance(n, (nodes.footnote, nodes.citation)) @@ -60,6 +72,15 @@ def apply(self): class ResolveListItems(Transform): + """For bullet/enumerated lists, propagate attributes to their child list items. + + Also decide if they are loose/tight:: + + A list is loose if any of its list items are separated by blank lines, + or if any of its list items directly contain two block-level elements + with a blank line between them. Otherwise a list is tight. + """ + def apply(self): for node in self.document.traverse(nodes.bullet_list): prefix = node["bullet"] + " " @@ -67,6 +88,7 @@ def apply(self): if isinstance(child, nodes.list_item): child["style"] = "bullet" child["prefix"] = prefix + for node in self.document.traverse(nodes.enumerated_list): number = 1 if "start" in node: @@ -82,20 +104,12 @@ def apply(self): number += 1 -class DirectiveNesting(Transform): - def apply(self): - for node in self.document.traverse(DirectiveNode): # type: DirectiveNode - # TODO this will overcount if multiple directives at same nesting depth - node["delimiter"] *= ( - 3 - + sum(1 for _ in node.traverse(DirectiveNode, include_self=False)) - # add an extra delimiter if the directive contains a table, - # because we wrap some in eval_rst directive - + (1 if sum(1 for _ in node.traverse(nodes.table)) else 0) - ) +class FrontMatter(Transform): + """Extract an initial field list into a `FrontMatterNode`. + Similar to ``docutils.transforms.frontmatter.DocInfo``. + """ -class FrontMatter(Transform): def apply(self): if not self.document.settings.front_matter: return @@ -103,12 +117,17 @@ def apply(self): if index is None: return candidate = self.document[index] + if isinstance(candidate, nodes.section): + index = candidate.first_child_not_matching_class(nodes.PreBibliographic) + if index is None: + return + candidate = candidate[index] if isinstance(candidate, nodes.field_list): front_matter = FrontMatterNode("", *candidate.children) - self.document[index] = front_matter + candidate.replace_self(front_matter) -def to_ast( +def to_docutils_ast( text: str, uri: str = "source", report_level=2, @@ -121,7 +140,7 @@ def to_ast( conversions=None, front_matter=True, ) -> Tuple[nodes.document, StringIO]: - settings = OptionParser(components=(RSTParser,)).get_default_values() + settings = OptionParser(components=(LosslessRSTParser,)).get_default_values() warning_stream = StringIO() if warning_stream is None else warning_stream settings.warning_stream = warning_stream settings.report_level = report_level # 2=warning @@ -152,7 +171,7 @@ def to_ast( # whether to treat initial field list as front matter document.settings.front_matter = front_matter - parser = RSTParser() + parser = LosslessRSTParser() parser.parse(text, document) # these three transforms are required for converting targets correctly @@ -160,12 +179,11 @@ def to_ast( PropagateTargets, # Propagate empty internal targets to the next element. (260) FrontMatter, # convert initial field list (DocInfo=340) AnonymousHyperlinks, # Link anonymous references to targets. (440) - IndirectHyperlinks, # "refuri" migrated back to all indirect targets (460) + # IndirectHyperlinks, # "refuri" migrated back to all indirect targets (460) Footnotes, # Assign numbers to autonumbered footnotes (620) # bespoke transforms StripFootnoteLabel, ResolveListItems, - DirectiveNesting, ]: transform = transform_cls(document) transform.apply() diff --git a/rst_to_myst/renderer.py b/rst_to_myst/renderer.py deleted file mode 100644 index cfda841..0000000 --- a/rst_to_myst/renderer.py +++ /dev/null @@ -1,585 +0,0 @@ -import copy -from io import StringIO -from textwrap import indent -from typing import IO, Any, Dict, Optional, Tuple - -from docutils import nodes - -from .nodes import ArgumentNode, DirectiveNode -from .parser import to_ast -from .utils import yaml_dump - - -class MystRenderer(nodes.GenericNodeVisitor): - def __init__( - self, document, warning_stream=None, raise_on_error=False, cite_prefix="cite_" - ): - self.document = document - self._rendered: str = "" - self._front_matter: Dict[str, Any] = {} - self._indent: str = "" - self._warning_stream: IO = warning_stream or StringIO() - self.raise_on_error = raise_on_error - self._uri_refnames = None - self.cite_prefix = cite_prefix - self.extensions_required = set() - - def nested_render(self, children, singleline=True, container_cls=nodes.paragraph): - nested_renderer = MystRenderer( - self.document, - warning_stream=self._warning_stream, - raise_on_error=self.raise_on_error, - cite_prefix=self.cite_prefix, - ) - nested_renderer._uri_refnames = self._uri_refnames - container = container_cls() - container.extend(children) - container.walkabout(nested_renderer) - if self._uri_refnames is None: - self._uri_refnames = nested_renderer._uri_refnames - if singleline: - return " ".join(nested_renderer._rendered.splitlines()) - return nested_renderer._rendered - - @property - def rendered(self) -> str: - if self._front_matter: - matter = yaml_dump(self._front_matter) - return f"---\n{matter}\n---\n\n{self._rendered}" - return self._rendered - - def warning(self, message: str): - self._warning_stream.write(f"RENDER WARNING: {message}\n") - - def incr_indent(self, i: Optional[int] = None, string: Optional[str] = None): - assert sum([i is None, string is None]) == 1 - if i is not None: - self._indent += " " * i - else: - self._indent += string - - def decr_indent(self, i: int): - if i > len(self._indent): - raise AssertionError("indent decreased to <0") - self._indent = self._indent[:-i] - - def add_indent(self): - if self._indent: - self._rendered += self._indent - - def add_inline(self, text: str): - if self._indent: - strip = self._indent.strip() - text = indent(text, self._indent, lambda l: strip + l) - if (not self._rendered) or self._rendered[-1] != "\n": - # if the text does not start a newline, then strip the first indent - text = text[len(self._indent) :] - self._rendered += text - - def add_newline(self, i=1): - if i > 1 and self._indent.strip(): - self._rendered += ("\n" + self._indent) * (i - 1) - self._rendered += "\n" - else: - self._rendered += "\n" * i - - def add_lines(self, lines, newline_before=False, newline_after=False): - if newline_before: - self._rendered += "\n" - text = "\n".join(lines) - if self._indent: - strip = self._indent.strip() - text = indent(text, self._indent, lambda l: strip + l) - self._rendered += text - if newline_after: - self._rendered += "\n" - - def default_visit(self, node): - message = f"no visit method for: {node.__class__}" - self.warning(message) - if self.raise_on_error: - raise NotImplementedError(message) - - def default_departure(self, node): - message = f"no depart method for: {node.__class__}" - self.warning(message) - if self.raise_on_error: - raise NotImplementedError(message) - - def visit_document(self, node): - pass - - def depart_document(self, node): - pass - - def visit_Element(self, node): - pass - - def depart_Element(self, node): - pass - - def visit_system_message(self, node): - # ignore - raise nodes.SkipNode - - def visit_problematic(self, node): - # ignore - raise nodes.SkipNode - - def visit_FrontMatterNode(self, node): - for field in node: - if not len(field) == 2: - continue - key = field[0][0].astext() - if not field[1].children: - value = True - else: - value = self.nested_render( - field[1].children, singleline=False, container_cls=nodes.Element - ).rstrip() - self._front_matter[key] = value - raise nodes.SkipNode - - def visit_section(self, node): - pass # handled by title - - def depart_section(self, node): - pass - - def visit_title(self, node): - self.add_lines([("#" * node["level"]) + " "]) - - def depart_title(self, node): - self.add_newline(2) - - def visit_paragraph(self, node): - # don't indent if the first element in another block - if node.parent and node.parent.children[0] != node: - self.add_indent() - elif not isinstance( - node.parent, - ( - nodes.footnote, - nodes.citation, - nodes.list_item, - nodes.block_quote, - nodes.definition, - ), - ): - self.add_indent() - - def depart_paragraph(self, node): - self.add_newline(2) - - def visit_Text(self, node): - self.add_inline(node.astext()) - - def depart_Text(self, node): - pass - - # note: for emphasis and strong, nested inline is not allowed, - # so we do not need to worry about that - - def visit_emphasis(self, node): - self.add_inline("*") - - def depart_emphasis(self, node): - self.add_inline("*") - - def visit_strong(self, node): - self.add_inline("**") - - def depart_strong(self, node): - self.add_inline("**") - - def visit_RoleNode(self, node): - # TODO nested parse of specific roles - self.add_inline(f"{{{node['role']}}}`{node['text']}`") - - def depart_RoleNode(self, node): - pass - - def visit_literal(self, node): - self.add_inline("`") - - def depart_literal(self, node): - self.add_inline("`") - - def visit_transition(self, node): - self.add_lines(["---"]) - - def depart_transition(self, node): - self.add_newline(2) - - def visit_comment(self, node): - self.add_lines([""], True) - self.add_newline(2) - - def visit_literal_block(self, node): - self.add_lines(["```"] + node.astext().splitlines() + ["```"]) - self.add_newline(2) - raise nodes.SkipNode - - def visit_target(self, node): - if "inline" in node and node["inline"]: - # TODO inline targets - message = f"inline targets not implemented: {node.rawsource}" - self.warning(message) - if self.raise_on_error: - raise NotImplementedError(message) - self.add_inline(str(node.rawsource)) - raise nodes.SkipNode - - if "refuri" in node: - for name in node["names"]: - # TODO warn about name starting ^ - self.add_lines([f"[{name}]: {node['refuri']}"]) - self.add_newline(2) - elif "names" in node: - for name in node["names"]: - self.add_lines([f"({name})="]) - self.add_newline(2) - if "refid" in node: - # should only be for anonymous - self.add_lines([f"({node['refid']})="]) - self.add_newline(2) - - # TODO check for content? - raise nodes.SkipNode - - def depart_target(self, node): - pass - - def is_target_uri(self, refname): - """Return True, if a refname points towards a target which is an external URI - - This is used to decide if a reference should be [][refname] or [](refname) - """ - if self._uri_refnames is None: - self._uri_refnames = set() - for target in self.document.traverse(nodes.target): - if "refuri" in target: - for name in target["names"]: - self._uri_refnames.add(name) - return refname in self._uri_refnames - - def visit_reference(self, node): - - if "standalone_uri" in node: - # auto-link - self.add_inline(f"<{node['refuri']}>") - raise nodes.SkipNode - - self.add_inline("[") - - def depart_reference(self, node): - # TODO embedded targets - self.add_inline("]") - if "refname" in node: - if self.is_target_uri(node["refname"]): - # will reference a link definition `[refname]: url` - self.add_inline(f"[{node['refname']}]") - else: - # will reference a target `(refname)=` - self.add_inline(f"({node['refname']})") - elif "refuri" in node: - self.add_inline(f"(<{node['refuri']}>)") - elif "refid" in node: - # this should only be the case for anonymous links, - # pointing to internal targets - self.add_inline(f"({node['refid']})") - else: - message = f"unknown reference type: {node.rawsource}" - self.warning(message) - if self.raise_on_error: - raise NotImplementedError(message) - - def visit_substitution_reference(self, node): - self.extensions_required.add("substitution") - self.add_inline(f"{{{{ {node['refname']} }}}}") - # the node also contains the refname as text, but we don't need that - raise nodes.SkipNode - - def depart_substitution_reference(self, node): - pass - - def visit_substitution_definition(self, node): - self.extensions_required.add("substitution") - if "names" not in node or not node["names"]: - raise nodes.SkipNode - key = node["names"][0] - # substitution should always be a single directive - if ( - node.children - and isinstance(node.children[0], DirectiveNode) - and node.children[0]["name"] == "replace" # TODO translations - and node.children[0]["type"] == "content_only" - ): - # common special case - value = self.nested_render( - node.children[0].children, singleline=False, container_cls=nodes.Element - ).rstrip() - # TODO the "date" directive is the other special case - else: - value = self.nested_render( - node.children, singleline=False, container_cls=nodes.Element - ).rstrip() - self._front_matter.setdefault("substitutions", {})[key] = value - raise nodes.SkipNode - - def visit_footnote_reference(self, node): - if "refname" in node: - # normal reference - self.add_inline(f"[^{node['refname']}]") - elif "refid" in node: - # auto number/symbol reference - self.add_inline(f"[^{node['refid']}]") - else: - message = f"unknown footnote reference type: {node.rawsource}" - self.warning(message) - if self.raise_on_error: - raise NotImplementedError(message) - # the node also contains the refname as text, but we don't need that - raise nodes.SkipNode - - def visit_citation_reference(self, node): - refname = node["refname"] if "refname" in node else node["refid"] - # for compatibility we treat citations the same as footnotes, with a prefix - self.add_inline(f"[^{self.cite_prefix}{refname}]") - # the node also contains the refname as text, but we don't need that - raise nodes.SkipNode - - def visit_footnote(self, node): - refname = node["ids"][0] # TODO assuming there is only one id - self.add_lines([f"[^{refname}]: "]) - self.incr_indent(2) - - def depart_footnote(self, node): - self.decr_indent(2) - # self.add_newline(1) - - def visit_citation(self, node): - # same as for visit_footnote - refname = node["names"][0] # TODO assuming there is only one name - self.add_lines([f"[^{self.cite_prefix}{refname}]: "]) - self.incr_indent(2) - - def depart_citation(self, node): - self.decr_indent(2) - # self.add_newline(1) - - def visit_DirectiveNode(self, node): - # The default is simply to wrap in eval-rst - # TODO decide which/how directives can be converted/expanded - name = node["name"] - if node["type"] == "eval_rst": - content = node["indented"] - indent_len = node["indent"] - self.add_lines( - ["```{eval-rst}", f".. {name}:: " + (content[0] if content else "")] - + indent("\n".join(content[1:]), " " * indent_len).splitlines() - + ["```"], - ) - self.add_newline(2) - raise nodes.SkipNode - - argument = " ".join(node["arg_block"]) - if node.children and isinstance(node.children[0], ArgumentNode): - # perform a separate render of the argument nodes - argument = self.nested_render(node.children[0].children) - - if ":" in node["delimiter"]: - self.extensions_required.add("colon_fence") - - self.add_lines( - [ - f"{node['delimiter']}{{{name}}} {argument}".rstrip(), - ] - + [ - f":{key}: {'true' if val is None else val}" - for key, val in node["options_list"] - ] - + ["", ""] - ) - - def visit_ArgumentNode(self, node): - raise nodes.SkipNode - - def visit_ContentNode(self, node): - pass - - def depart_ContentNode(self, node): - pass - - def depart_DirectiveNode(self, node): - self.add_lines([node["delimiter"]]) - self.add_newline(2) - - # we handle setting list item attributes in a transform - - def visit_bullet_list(self, node): - pass - - def depart_bullet_list(self, node): - self.add_newline() - - def visit_enumerated_list(self, node): - pass - - def depart_enumerated_list(self, node): - self.add_newline() - - def visit_list_item(self, node): - self.add_lines([node["prefix"]]) - self.incr_indent(len(node["prefix"])) - - def depart_list_item(self, node): - # remove new line between items - # indent may contain > if in block quotes - self._rendered = self._rendered.rstrip(" \n\t" + self._indent) - self.add_newline() - self.decr_indent(len(node["prefix"])) - - # for definition lists we just need the term and definition nodes - - def visit_definition_list(self, node): - self.extensions_required.add("deflist") - pass - - def visit_definition_list_item(self, node): - pass - - def depart_definition_list(self, node): - pass - - def depart_definition_list_item(self, node): - pass - - def visit_term(self, node): - self.add_lines([self.nested_render(node.children)]) - self.add_newline(2) - raise nodes.SkipNode - - def visit_definition(self, node): - self.add_lines([": "]) - self.incr_indent(2) - - def depart_definition(self, node): - self.decr_indent(2) - - def visit_block_quote(self, node): - self.add_lines(["> "]) - self.incr_indent(string="> ") - - def depart_block_quote(self, node): - self.decr_indent(2) - - def visit_attribution(self, node): - # Markdown block quotes do not have an attribution syntax, - # so we add a best approximation - self.add_lines([f'

—{node.astext()}

']) - self.add_newline(2) - raise nodes.SkipNode - - def visit_table(self, node): - # convert tables to Markdown if possible, e.g. single header row, etc - cells = self.assess_table(node) - if cells: - self.add_lines( - [ - "| " + " | ".join(cells[0]) + " |", - "| " + " | ".join("-" * len(c) for c in cells[0]) + " |", - ] - + ["| " + " | ".join(row) + " |" for row in cells[1:]] - ) - - else: - self.add_lines(["```{eval_rst}"] + node.rawsource.splitlines() + ["```"]) - self.add_newline(2) - raise nodes.SkipNode - - def assess_table(self, node): - if len(node.children) != 1 or not isinstance(node.children[0], nodes.tgroup): - return None - tgroup = node.children[0] - if "cols" not in tgroup: - return None - ncolumns = tgroup["cols"] - if ( - not len(tgroup.children) > 1 - or not isinstance(tgroup.children[-1], nodes.tbody) - or not isinstance(tgroup.children[-2], nodes.thead) - ): - return None - thead = tgroup.children[-2] - tbody = tgroup.children[-1] - if len(thead.children) != 1 or len(thead.children[0]) != ncolumns: - return None - rows = [copy.copy(thead.children[0].children)] - for row in tbody.children: - if len(row.children) != ncolumns: - return None - rows.append(copy.copy(row.children)) - - # render cells - widths = [0 for _ in rows[0]] - for i, row in enumerate(rows): - for j, col in enumerate(row): - if not isinstance(col, nodes.entry): - return None - if len(col.children) != 1 or not isinstance( - col.children[0], nodes.paragraph - ): - return None - rows[i][j] = self.nested_render(col.children[0].children).strip() - widths[j] = max(widths[j], len(rows[i][j])) - - # align columns - for i, _ in enumerate(rows): - for j, _ in enumerate(row): - rows[i][j] = rows[i][j].ljust(widths[j]) - - return rows - - # TODO https://docutils.sourceforge.io/docs/user/rst/quickref.htm - # line block, field list, option list - - -def render( - document: nodes.document, warning_stream: Optional[IO] = None, **kwargs -) -> Tuple[str, IO]: - renderer = MystRenderer(document, warning_stream, **kwargs) - document.walkabout(renderer) - # TODO also return or print renderer.extensions_required - # TODO remove double black lines - # TODO remove spaces in blank lines - return renderer.rendered, renderer._warning_stream - - -def convert( - text: str, - warning_stream: Optional[IO] = None, - raise_on_error: bool = False, - cite_prefix: str = "cite_", - language_code="en", - use_sphinx=True, - extensions=(), - default_domain="py", - conversions=None, -) -> Tuple[str, IO]: - document, warning_stream = to_ast( - text, - warning_stream=warning_stream, - language_code=language_code, - use_sphinx=use_sphinx, - extensions=extensions, - default_domain=default_domain, - conversions=conversions, - ) - text, warning_stream = render( - document, warning_stream, cite_prefix=cite_prefix, raise_on_error=raise_on_error - ) - return text, warning_stream diff --git a/rst_to_myst/states.py b/rst_to_myst/states.py index 62319b6..9763053 100644 --- a/rst_to_myst/states.py +++ b/rst_to_myst/states.py @@ -1,8 +1,10 @@ +"""docutils states.""" import re +from typing import List, Optional from docutils import nodes from docutils.nodes import fully_normalize_name as normalize_name -from docutils.parsers.rst import states, tableparser +from docutils.parsers.rst import Directive, states, tableparser from docutils.utils import ( BadOptionDataError, BadOptionError, @@ -10,7 +12,7 @@ extract_options, ) -from .nodes import ArgumentNode, ContentNode, DirectiveNode +from .nodes import ArgumentNode, ContentNode, DirectiveNode, EvalRstNode # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together): SIMPLENAME_RE = r"(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*" @@ -109,22 +111,18 @@ def directive(self, match, **option_presets): blank_finish, ) = self.parse_directive_match(match) - directive_node = DirectiveNode( - block_text, - name=type_name, - delimiter="`", - ) - # try to get directive class # directive_class, messages = directives.directive( # type_name, self.memo.language, self.document # ) - directive_class = self.document.settings.namespace.get_directive(type_name) + directive_class: Optional[ + Directive + ] = self.document.settings.namespace.get_directive(type_name) # default to eval rst if directive_class is None: # TODO warning message? - return self.eval_rst(directive_node, indent, indented, blank_finish) + return self.eval_rst(type_name, block_text, indent, indented, blank_finish) # get directive path for lookup directive_path = f"{directive_class.__module__}.{directive_class.__name__}" @@ -132,26 +130,20 @@ def directive(self, match, **option_presets): # lookup directive path conversion = self.document.settings.directive_data.get(directive_path, None) - if (not conversion) or conversion == "eval_rst": - return self.eval_rst(directive_node, indent, indented, blank_finish) - if conversion not in [ "direct", - "argument_only", - "content_only", - "content_only_titles", - "argument_content", - "direct_colon", - "argument_only_colon", - "content_only_colon", - "argument_content_colon", + "parse_argument", + "parse_content", + "parse_content_titles", + "parse_all", ]: - # TODO warning - return self.eval_rst(directive_node, indent, indented, blank_finish) - - directive_node["type"] = conversion - if conversion.endswith("_colon"): - directive_node["delimiter"] = ":" + if conversion and conversion != "eval_rst": + self.reporter.warning( + f'Unknown conversion type "{conversion}"', + nodes.literal_block(block_text, block_text), + line=lineno, + ) + return self.eval_rst(type_name, block_text, indent, indented, blank_finish) try: ( @@ -166,41 +158,52 @@ def directive(self, match, **option_presets): nodes.literal_block(block_text, block_text), line=lineno, ) - return self.eval_rst(directive_node, indent, indented, blank_finish) - - directive_node["arg_block"] = arg_block - directive_node["options_list"] = options_list + return self.eval_rst(type_name, block_text, indent, indented, blank_finish) - if content and conversion == "direct": - content_node = ContentNode() - content_node += nodes.paragraph("", nodes.Text("\n".join(content))) - directive_node += content_node + directive_node = DirectiveNode( + block_text, + name=type_name, + module=directive_path, + conversion=conversion, + options_list=options_list, + ) - if "argument" in conversion: + if directive_class.required_arguments or directive_class.optional_arguments: argument_node = ArgumentNode() directive_node += argument_node - textnodes, messages = self.inline_text("\n".join(arg_block), lineno) - # TODO report messages? - argument_node.extend(textnodes) + if conversion in ("parse_argument", "parse_all"): + textnodes, messages = self.inline_text(" ".join(arg_block), lineno) + # TODO report messages? + argument_node.extend(textnodes) + else: + argument_node += nodes.Text(" ".join(arg_block)) - if content and "content" in conversion: + if directive_class.has_content: content_node = ContentNode() directive_node += content_node - self.nested_parse( - content, - content_offset, - content_node, - match_titles="titles" in conversion, - ) + if conversion in ("parse_content", "parse_content_titles", "parse_all"): + self.nested_parse( + content, + content_offset, + content_node, + match_titles="titles" in conversion, + ) + else: + content_node += nodes.Text("\n".join(content or [])) return [directive_node], blank_finish @staticmethod - def eval_rst(directive_node, indent, indented, blank_finish): - directive_node["type"] = "eval_rst" - directive_node["indent"] = indent - directive_node["indented"] = indented - return [directive_node], blank_finish + def eval_rst( + name: str, block_text: str, indent: int, indented: List[str], blank_finish: bool + ): + """Return an EvalRstNode.""" + node = EvalRstNode(block_text, name=name, indent=indent) + if not block_text.startswith(".. "): + # substitution definition directives + block_text = ".. " + block_text + node += nodes.Text(block_text) + return [node], blank_finish def parse_directive_match(self, match): lineno = self.state_machine.abs_line_number() @@ -354,7 +357,7 @@ def substitution_def(self, match): return [substitution_node], blank_finish def table(self, isolate_function, parser_class): - """Parse a table.""" + """Parse a table, and record the raw text.""" block, messages, blank_finish = isolate_function() if block: try: @@ -382,6 +385,34 @@ def __init__(self, state_machine, debug=False): "initial_state": "Body", } + def field_marker(self, match, context, next_state): + """Field list item. + + Modified to store full text of field_list in ``rawsource`` + """ + field_list = nodes.field_list() + self.parent += field_list + field, blank_finish = self.field(match) + field_list += field + offset = self.state_machine.line_offset + 1 # next line + newline_offset, blank_finish = self.nested_list_parse( + self.state_machine.input_lines[offset:], + input_offset=self.state_machine.abs_line_offset() + 1, + node=field_list, + initial_state="FieldList", + blank_finish=blank_finish, + ) + self.goto_line(newline_offset) + # TODO this slicing of input_lines seems to work, but I'm not exactly sure why + field_list.rawsource += "\n".join( + self.state_machine.input_lines[ + offset - 1 : offset + (newline_offset - field.line) + ] + ) + if not blank_finish: + self.parent += self.unindent_warning("Field list") + return [], next_state, [] + class Explicit(ExplicitMixin, states.Explicit): def __init__(self, state_machine, debug=False): diff --git a/rst_to_myst/utils.py b/rst_to_myst/utils.py index 070fa26..eb2c0c3 100644 --- a/rst_to_myst/utils.py +++ b/rst_to_myst/utils.py @@ -39,5 +39,5 @@ class YamlDumper(yaml.SafeDumper): YamlDumper.add_representer(str, represent_str) -def yaml_dump(data): - return yaml.dump(data, Dumper=YamlDumper) +def yaml_dump(data, sort_keys: bool = True): + return yaml.dump(data, Dumper=YamlDumper, sort_keys=sort_keys) diff --git a/tests/fixtures/ast.txt b/tests/fixtures/ast.txt index 031dd6d..e0cfd0f 100644 --- a/tests/fixtures/ast.txt +++ b/tests/fixtures/ast.txt @@ -185,6 +185,10 @@ directive-eval-rst: content + .. sdf:: + + other + .. hij:: argument :opt: value1 :opt2: value2 @@ -197,12 +201,29 @@ directive-eval-rst: . - - - - - - + + .. name:: argument + + .. xyz:: + :opt: value + + .. lmp:: + + content + + .. sdf:: + + other + + .. hij:: argument + :opt: value1 + :opt2: value2 + + .. hij:: argument + :opt: value1 + :opt2: value2 + + content . @@ -224,9 +245,13 @@ directive-admonition: .. tip:: Content + + .. unknown:: arg_block + + content . - + Abc @@ -236,16 +261,20 @@ directive-admonition: A b - + lmn - + - + Content + + .. unknown:: arg_block + + content . @@ -377,20 +406,27 @@ Block quotes are just: sub-title - - + + .. abc:: x + :a: b + + abc + + .. xyz:: lkjhlkj x - + replacement text - + + + warning.png @@ -462,6 +498,7 @@ Block quotes are just: enumerated list + . tables-simple: diff --git a/tests/fixtures/render.txt b/tests/fixtures/render.txt index 8fa090a..8960156 100644 --- a/tests/fixtures/render.txt +++ b/tests/fixtures/render.txt @@ -1,307 +1,341 @@ -inline: +text: . -a *some emphasis* **some bold** b -c :role:`content` d -e ``literal`` f -g reference_ h -i `a phrase`_ j -k `text `_ l -m anonymous__ n -o (inline target not supported) p -q |sub| r -s [1]_ [#]_ [*]_ t -u [CIT2002]_ v -w http://a.net/ x -y `uri `_ z - -.. _reference: http://www.example.com - -.. __: - -anonymous target paragraph - -.. [#] auto-number footnote. -.. [*] auto-symbol footnote. - next line - - new paragraph -.. [CIT2002] *a* citation footnote - +some text +. +some text . -a *some emphasis* **some bold** b -c {role}`content` d -e `literal` f -g [reference][reference] h -i [a phrase](a phrase) j -k [text](ref) l -m [anonymous](id5) n -o (inline target not supported) p -q {{ sub }} r -s [^1] [^id6] [^id7] t -u [^cite_cit2002] v -w x -y [uri]() z - -[reference]: http://www.example.com - -(id5)= - -anonymous target paragraph - -[^id6]: auto-number footnote. - -[^id7]: auto-symbol footnote. - next line - new paragraph +emphasis: +. +*emphasis* +. +*emphasis* +. -[^cite_cit2002]: *a* citation footnote +strong: +. +**strong** +. +**strong** . -indents: +literal: . -- a - b - *c* - **d** - `e`_ - :f:`g` +``abc`` . -- a - b - *c* - **d** - [e](e) - {f}`g` +`abc` . -titles: +literal block: . -header 1 -======== +:: -header 2a ---------- + literal text -header 3 -........ +normal text +. +``` +literal text +``` -header 2b ---------- +normal text . -# header 1 -## header 2a +block quotes +. +a -### header 3 + b -## header 2b + -- attribution -. + nested -paragraphs: -. -para 1 + - with + - bullet list -para 2 -line 2 + 1. with + 2. enumerated list -para 3 +c . -para 1 +a -para 2 -line 2 +> b +> +>

-attribution

-para 3 -. +> > nested +> +> - with +> - bullet list +> +> 1. with +> 2. enumerated list -comments: -. -.. This is a comment. -. - +c . transition: . ---- . ---- - +______________________________________________________________________ . -directives: +headings: . -.. image:: images/ball1.gif +heading 1 +========= -.. figure:: images/ball1.gif - :option: value +heading 2-1 +----------- - Content +heading 3 +********* -.. note:: - :class: something - :name: else +heading 2-2 +----------- +. +# heading 1 + +## heading 2-1 - .. admonition:: Some :role:`a` +### heading 3 - Content :role:`a` +## heading 2-2 . -```{image} images/ball1.gif -``` +bullet-list: +. +- a +- b +- c -:::{figure} images/ball1.gif -:option: value +* d -Content + * e + * f -::: +* g +. +- a +- b +- c -::::{note} -:class: something -:name: else +* d -:::{admonition} Some {role}`a` + - e + - f -Content {role}`a` +* g +. -::: +enumerated list: +. +1. a +2. b +3. c -:::: +#. d +#. e +11. f +12. g . +1. a +2. b +3. c +4. d +5. e -lists: +11) f +12) g . -a -- b -- *c* +comment: +. +.. This is a comment. - * x +.. + This whole indented block + is a comment. -1. d -2. e - f + Still in the comment. - g +. +% This is a comment. - 5. x +% This whole indented block +% is a comment. +% +% Still in the comment. . -a -- b -- *c* +autolink: +. +http://a.net/ +. + +. - * x +external link: +. +`Link text `_ +. +[Link text](https://domain.invalid/) +. -1. d -2. e - f +external link definition: +. +This is a paragraph that contains `a link`_. - g +.. _a link: https://domain.invalid/ +. +This is a paragraph that contains [a link]. - 5. x +[a link]: https://domain.invalid/ . -literal-block: +internal link, no-definition +. +`a link`_ `text `_ +. +[a link] [text](alink) . -:: - Some text - More text +link definition only +. +.. _a link: https://domain.invalid/ +. +[a link]: https://domain.invalid/ +. -para +target: . -``` -Some text -More text -``` +.. _a: -para +ab +== . +(a)= -definition-list: +# ab . -what `a`_ - Definition lists associate a term with - a definition `a`_. -how - The term is a one-line phrase, and the - definition is one or more paragraphs or - body elements, indented relative to the - term. - - - Blank lines are not allowed - between term and definition. +footnote: +. +[1]_ [#]_ [*]_ -para +.. [1] normal footnote + next line +.. [#] auto-number footnote. +.. [*] auto-symbol footnote. . -what [a](a) +[^id4] [^id5] [^id6] -: Definition lists associate a term with - a definition [a](a). +[^id4]: normal footnote + next line -how +[^id5]: auto-number footnote. -: The term is a one-line phrase, and the - definition is one or more paragraphs or - body elements, indented relative to the - term. +[^id6]: auto-symbol footnote. +. - - Blank lines are not allowed - between term and definition. +citation: +. +[a]_ -para +.. [a] citation footnote + next line + new paragraph . +[^cite_a] + +[^cite_a]: citation footnote + next line -block-quotes: + new paragraph . -Block quotes are just: - Indented +roles: +. +`a` :b:`c` +. +`a` {b}`c` +. - paragraphs +table, simple: +. +== == +A B +== == +C D +== == +. +| A | B | +| --- | --- | +| C | D | +. - and they - may nest. +table, nested syntax: +. +===== ============= +*A* http://a.net/ +===== ============= +``C`` [1]_ +===== ============= - -- attribution +.. [1] footnote +. +| *A* | | +| --- | --------------- | +| `C` | [^id2] | - - with - - bullet list +[^id2]: footnote +. - 1. with - 2. enumerated list +tables-grid: +. ++------------------------+------------+----------+----------+ +| Header row, column 1 | Header 2 | Header 3 | Header 4 | +| (header rows optional) | | | | ++========================+============+==========+==========+ +| body row 1, column 1 | column 2 | column 3 | column 4 | ++------------------------+------------+----------+----------+ +| body row 2 | ... | ... | | ++------------------------+------------+----------+----------+ para . -Block quotes are just: - -> Indented -> -> paragraphs -> -> > and they -> > may nest. -> > -> >

—attribution

-> > -> - with -> - bullet list - -> 1. with -> 2. enumerated list +| Header row, column 1 (header rows optional) | Header 2 | Header 3 | Header 4 | +| ------------------------------------------- | -------- | -------- | -------- | +| body row 1, column 1 | column 2 | column 3 | column 4 | +| body row 2 | ... | ... | | para . +table, multi-head: +. +== == +A B +X Y +== == +C D +== == +. +```{eval_rst} +== == +A B +X Y +== == +C D +== == +``` +. + front-matter: . :Authors: @@ -323,134 +357,233 @@ Authors: |- Dedication: To my father. Version: 1.0 of 2001/08/08 orphan: true - --- +. +substitution-reference: +. +|sub| +. +{{ sub }} . -substitution-definitions: +substitution-definition: . +:orphan: + .. |name| replace:: replacement `a`_ .. |caution| image:: warning.png :alt: Warning! . --- +orphan: true substitutions: caution: |- ```{image} warning.png :alt: Warning! - ``` - name: replacement [a](a) - + name: replacement [a] --- - - . -tables-simple: +definition list . -===== ===== ======= -`a`_ B A and B -===== ===== ======= -False False False -True False False -False True False -True True `a`_ -===== ===== ======= +term (up to a line of text) + Definition of the term, which must be indented -para + and can even consist of multiple paragraphs + +next term + Description ``a`` `a`_. . -| [a](a) | B | A and B | -| ------ | ----- | ------- | -| False | False | False | -| True | False | False | -| False | True | False | -| True | True | [a](a) | +term (up to a line of text) -para +: Definition of the term, which must be indented + + and can even consist of multiple paragraphs + +next term + +: Description `a` [a]. . -tables-grid: + +directive-eval-rst . -+------------------------+------------+----------+----------+ -| Header row, column 1 | Header 2 | Header 3 | Header 4 | -| (header rows optional) | | | | -+========================+============+==========+==========+ -| body row 1, column 1 | column 2 | column 3 | column 4 | -+------------------------+------------+----------+----------+ -| body row 2 | ... | ... | | -+------------------------+------------+----------+----------+ +.. unknown:: argument + +.. xyz:: + :opt: value + +.. lmp:: + + content + + .. sdf:: + + other + +.. hij:: argument + :opt: value1 + :opt2: value2 + +.. hij:: argument + :opt: value1 + :opt2: value2 + + ````content```` -para . -```{eval_rst} -+------------------------+------------+----------+----------+ -| Header row, column 1 | Header 2 | Header 3 | Header 4 | -| (header rows optional) | | | | -+========================+============+==========+==========+ -| body row 1, column 1 | column 2 | column 3 | column 4 | -+------------------------+------------+----------+----------+ -| body row 2 | ... | ... | | -+------------------------+------------+----------+----------+ +```{eval-rst} +.. unknown:: argument ``` -para +```{eval-rst} +.. xyz:: + :opt: value +``` + +```{eval-rst} +.. lmp:: + + content + + .. sdf:: + + other +``` + +```{eval-rst} +.. hij:: argument + :opt: value1 + :opt2: value2 +``` + +`````{eval-rst} +.. hij:: argument + :opt: value1 + :opt2: value2 + + ````content```` +````` . -match_titles: +directive-admonition: . -.. computational-economics documentation master file +initial paragraph + +.. admonition:: Abc *d* `a`_ + :class: xyz + :name: df + + A *b* http://a.net/ + + next paragraph + +.. note:: -.. only:: html + .. tip:: - #### - Home - #### + Content -.. only:: latex + .. note:: - ########################## - Datascience for Economists - ########################## + Content 2 -.. toctree:: - :maxdepth: 2 - :titlesonly: + .. unknown:: arg_block - introduction/index - python_fundamentals/index - scientific/index - pandas/index - applications/index + content +final paragraph . - +initial paragraph + +:::{admonition} Abc *d* [a] +:class: xyz +:name: df + +A *b* + +next paragraph +::: + +:::::{note} + +::::{tip} +Content -```{only} html +:::{note} +Content 2 +::: +:::: -# Home +```{eval-rst} +.. unknown:: arg_block + content ``` +::::: -```{only} latex +final paragraph +. -# Datascience for Economists +field-list +. +paragraph + +:name: value +:other: multiline + value + more +paragraph +. +paragraph + +```{eval-rst} + +:name: value +:other: multiline + value + more ``` -```{toctree} -:maxdepth: 2 -:titlesonly: true +paragraph +. + +dollarmath +. +:math:`a^2 + b^2 = c^2` +. +$a^2 + b^2 = c^2$ +. -introduction/index -python_fundamentals/index -scientific/index -pandas/index -applications/index +code-block-no-args +. +.. code-block:: python + + def some_function(): + interesting = False + print 'This line is highlighted.' + print 'This one is not...' + print '...but this one is.' + +.. code-block:: yaml + + a: 1 + b: "a" +. +```python +def some_function(): + interesting = False + print 'This line is highlighted.' + print 'This one is not...' + print '...but this one is.' +``` +```yaml +a: 1 +b: "a" ``` . diff --git a/tests/fixtures/render_extra.txt b/tests/fixtures/render_extra.txt new file mode 100644 index 0000000..1fc39ae --- /dev/null +++ b/tests/fixtures/render_extra.txt @@ -0,0 +1,442 @@ +inline: +. +a *some emphasis* **some bold** b +c :role:`content` d +e ``literal`` f +g reference_ h +i `a phrase`_ j +k `text `_ l +m anonymous__ n +o (inline target not supported) p +q |sub| r +s [1]_ [#]_ [*]_ t +u [CIT2002]_ v +w http://a.net/ x +y `uri `_ z + +.. _reference: http://www.example.com + +.. __: + +anonymous target paragraph + +.. [#] auto-number footnote. +.. [*] auto-symbol footnote. + next line + + new paragraph +.. [CIT2002] *a* citation footnote + +. +a *some emphasis* **some bold** b +c {role}`content` d +e `literal` f +g [reference] h +i [a phrase] j +k [text][ref] l +m [anonymous](id5) n +o (inline target not supported) p +q {{ sub }} r +s [^1] [^id6] [^id7] t +u [^cite_cit2002] v +w x +y [uri](a.org) z + +(id5)= + +anonymous target paragraph + +[^id6]: auto-number footnote. + +[^id7]: auto-symbol footnote. + next line + + new paragraph + +[^cite_cit2002]: *a* citation footnote + +[reference]: http://www.example.com +. + +indents: +. +- a + b + *c* + **d** + `e`_ + :f:`g` +. +- a + b + *c* + **d** + [e] + {f}`g` +. + +titles: +. +header 1 +======== + +header 2a +--------- + +header 3 +........ + +header 2b +--------- +. +# header 1 + +## header 2a + +### header 3 + +## header 2b + +. + +paragraphs: +. +para 1 + +para 2 +line 2 + +para 3 +. +para 1 + +para 2 +line 2 + +para 3 +. + +comments: +. +.. This is a comment. +. +% This is a comment. +. + +transition: +. +---- +. +______________________________________________________________________ +. + +directives: +. +.. image:: images/ball1.gif + +.. figure:: images/ball1.gif + :option: value + + Content + +.. note:: + :class: something + :name: else + + .. admonition:: Some :role:`a` + + Content :role:`a` +. +```{image} images/ball1.gif +``` + +:::{figure} images/ball1.gif +:option: value + +Content +::: + +::::{note} +:class: something +:name: else + +:::{admonition} Some {role}`a` +Content {role}`a` +::: +:::: +. + +lists: +. +paragraph + +- bullet *list* +- tight + +paragraph + +- bullet ``list`` +- loose and nested + + * x + +1. enumerated *list* +2. tight + +paragraph + +5. enumerated *list* +6. tight and specific start number + +paragraph + +1. enumerated *list* +2. loose + multiple + + paragraphs + +paragraph +. +paragraph + +- bullet *list* +- tight + +paragraph + +- bullet `list` + +- loose and nested + + - x + +1. enumerated *list* +2. tight + +paragraph + +5. enumerated *list* +6. tight and specific start number + +paragraph + +1. enumerated *list* + +2. loose + multiple + + paragraphs + +paragraph +. + +literal-block: +. +:: + + Some text + More text + +para +. +``` +Some text +More text +``` + +para +. + +definition-list: +. +what `a`_ + Definition lists associate a term with + a definition `a`_. + +how + The term is a one-line phrase, and the + definition is one or more paragraphs or + body elements, indented relative to the + term. + + - Blank lines are not allowed + between term and definition. + +para +. +what [a] + +: Definition lists associate a term with + a definition [a]. + +how + +: The term is a one-line phrase, and the + definition is one or more paragraphs or + body elements, indented relative to the + term. + + - Blank lines are not allowed + between term and definition. + +para + +. + +block-quotes: +. +Block quotes are just: + + Indented + + paragraphs + + and they + may nest. + + -- attribution + + - with + - bullet list + + 1. with + 2. enumerated list + +para +. +Block quotes are just: + +> Indented +> +> paragraphs +> +> > and they +> > may nest. +> > +> >

-attribution

+> +> - with +> - bullet list +> +> 1. with +> 2. enumerated list + +para +. + +match_titles: +. +.. computational-economics documentation master file + +.. only:: html + + #### + Home + #### + +.. only:: latex + + ########################## + Datascience for Economists + ########################## + +.. toctree:: + :maxdepth: 2 + :titlesonly: + + introduction/index + python_fundamentals/index + scientific/index + pandas/index + applications/index + +. +% computational-economics documentation master file + +:::{only} html +# Home +::: + +:::{only} latex +# Datascience for Economists +::: + +```{toctree} +:maxdepth: 2 +:titlesonly: true + +introduction/index +python_fundamentals/index +scientific/index +pandas/index +applications/index +``` +. + +list-indented +. +This is a numbered list! + +#. Step 1 +#. Step 2 +#. Step 3 +#. Step 4 + +This is a numbered list with indentation! + + #. Step 1 + #. Step 2 + #. Step 3 + #. Step 4 + +This is a regular list with indentation! + + * Step 1 + * Step 2 + * Step 3 + * Step 4 +. +This is a numbered list! + +1. Step 1 +2. Step 2 +3. Step 3 +4. Step 4 + +This is a numbered list with indentation! + +> 1. Step 1 +> 2. Step 2 +> 3. Step 3 +> 4. Step 4 + +This is a regular list with indentation! + +> - Step 1 +> - Step 2 +> - Step 3 +> - Step 4 +. + +fields-after-title +. +============================= + reStructuredText Directives +============================= +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. +. +--- +Author: David Goodger +Contact: +Copyright: This document has been placed in the public domain. +Date: \$Date\$ +Revision: \$Revision\$ +--- + +# reStructuredText Directives +. diff --git a/tests/test_cli.py b/tests/test_cli.py index 566cecb..ffe505f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,3 +1,5 @@ +from pathlib import Path + from click.testing import CliRunner from rst_to_myst import cli @@ -6,7 +8,7 @@ def test_directives_list(): runner = CliRunner() result = runner.invoke(cli.directives_list, []) - assert result.exit_code == 0 + assert result.exit_code == 0, result.output assert "admonition" in result.output @@ -40,13 +42,43 @@ def test_roles_show(): def test_ast(): runner = CliRunner() - result = runner.invoke(cli.ast, [], input=":name:`content`") + result = runner.invoke(cli.ast, ["-"], input=":name:`content`") assert result.exit_code == 0, result.output assert '' in result.output -def test_parse(): +def test_tokens(): runner = CliRunner() - result = runner.invoke(cli.parse, [], input=":name:`content`") + result = runner.invoke(cli.tokens, ["-"], input=":name:`content`") + assert result.exit_code == 0, result.output + assert "paragraph_open" in result.output + + +def test_stream(): + runner = CliRunner() + result = runner.invoke(cli.stream, ["-"], input=":name:`content`") assert result.exit_code == 0, result.output assert "{name}`content`" in result.output + + +def test_convert(tmp_path: Path, file_regression): + tmp_path.joinpath("test.rst").write_text( + "head\n====\n\ncontent `a`\n", encoding="utf8" + ) + tmp_path.joinpath("config.yaml").write_text("default_role: math\n", encoding="utf8") + runner = CliRunner() + result = runner.invoke( + cli.convert, + [ + "--config", + str(tmp_path.joinpath("config.yaml")), + str(tmp_path.joinpath("test.rst")), + ], + ) + assert result.exit_code == 0, result.output + assert tmp_path.joinpath("test.md").exists() + file_regression.check( + tmp_path.joinpath("test.md").read_text(encoding="utf8"), + encoding="utf8", + extension=".md", + ) diff --git a/tests/test_cli/test_convert.md b/tests/test_cli/test_convert.md new file mode 100644 index 0000000..98d2c6f --- /dev/null +++ b/tests/test_cli/test_convert.md @@ -0,0 +1,3 @@ +# head + +content $a$ diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py index 9809762..605f034 100644 --- a/tests/test_fixtures.py +++ b/tests/test_fixtures.py @@ -2,7 +2,7 @@ import pytest -from rst_to_myst import convert, to_ast +from rst_to_myst import rst_to_myst, to_docutils_ast from rst_to_myst.utils import read_fixture_file FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures") @@ -14,7 +14,7 @@ ids=[f"{i[0]}-{i[1]}" for i in read_fixture_file(FIXTURE_PATH / "ast.txt")], ) def test_ast(line, title, rst, expected): - document, warning_stream = to_ast(rst) + document, warning_stream = to_docutils_ast(rst) text = document.pformat() try: assert warning_stream.getvalue() == "" @@ -30,10 +30,27 @@ def test_ast(line, title, rst, expected): ids=[f"{i[0]}-{i[1]}" for i in read_fixture_file(FIXTURE_PATH / "render.txt")], ) def test_render(line, title, rst, expected): - text, warning_stream = convert(rst) + output = rst_to_myst(rst) try: - assert warning_stream.getvalue() == "" - assert text.rstrip() == expected.rstrip() + assert output.warning_stream.getvalue() == "" + assert output.text.rstrip() == expected.rstrip() except AssertionError: - print(text) + print(output.text) + raise + + +@pytest.mark.parametrize( + "line,title,rst,expected", + read_fixture_file(FIXTURE_PATH / "render_extra.txt"), + ids=[ + f"{i[0]}-{i[1]}" for i in read_fixture_file(FIXTURE_PATH / "render_extra.txt") + ], +) +def test_render_extra(line, title, rst, expected): + output = rst_to_myst(rst) + try: + assert output.warning_stream.getvalue() == "" + assert output.text.rstrip() == expected.rstrip() + except AssertionError: + print(output.text) raise diff --git a/tests/test_texts.py b/tests/test_texts.py new file mode 100644 index 0000000..f922081 --- /dev/null +++ b/tests/test_texts.py @@ -0,0 +1,23 @@ +from pathlib import Path + +import pytest + +from rst_to_myst import rst_to_myst + +TEXTS_PATH = Path(__file__).parent.joinpath("texts") + + +@pytest.mark.parametrize( + "path", + list(TEXTS_PATH.glob("*.rst")), + ids=[path.name[:-4] for path in TEXTS_PATH.glob("*.rst")], +) +def test_texts(path: Path, file_regression): + text = path.read_text("utf8") + output = rst_to_myst(text) + warnings = output.warning_stream.getvalue().splitlines() + # ignore known inline target warnings + assert not [ + line for line in warnings if "inline targets not implemented" not in line + ], warnings + file_regression.check(output.text, encoding="utf8", extension=".md") diff --git a/tests/test_texts/test_texts_directives_.md b/tests/test_texts/test_texts_directives_.md new file mode 100644 index 0000000..15f676d --- /dev/null +++ b/tests/test_texts/test_texts_directives_.md @@ -0,0 +1,2082 @@ +--- +Author: David Goodger +Contact: +Copyright: This document has been placed in the public domain. +Date: \$Date\$ +Revision: \$Revision\$ +substitutions: + '---': |- + ```{eval-rst} + .. unicode:: U+02014 .. em dash + :trim: + ``` + BogusMegaCorp (TM): |- + ```{eval-rst} + .. unicode:: BogusMegaCorp U+2122 + .. with trademark sign + ``` + copy: |- + ```{eval-rst} + .. unicode:: 0xA9 .. copyright sign + ``` +--- + +# reStructuredText Directives + +```{contents} +``` + +This document describes the directives implemented in the reference +reStructuredText parser. + +Directives have the following syntax: + +``` ++-------+-------------------------------+ +| ".. " | directive type "::" directive | ++-------+ block | + | | + +-------------------------------+ +``` + +Directives begin with an explicit markup start (two periods and a +space), followed by the directive type and two colons (collectively, +the "directive marker"). The directive block begins immediately after +the directive marker, and includes all subsequent indented lines. The +directive block is divided into arguments, options (a field list), and +content (in that order), any of which may appear. See the [Directives] +section in the [reStructuredText Markup Specification] for syntax +details. + +Descriptions below list "doctree elements" (document tree element +names; XML DTD generic identifiers) corresponding to individual +directives. For details on the hierarchy of elements, please see [The +Docutils Document Tree][the docutils document tree] and the [Docutils Generic DTD] XML document +type definition. For directive implementation details, see [Creating +reStructuredText Directives][creating restructuredtext directives]. + +## Admonitions + +(attention)= + +(caution)= + +(danger)= + +(error)= + +(hint)= + +(important)= + +(note)= + +(tip)= + +(warning)= + +### Specific Admonitions + +```{eval-rst} + +"important", "note", "tip", "warning", "admonition" +:Doctree Elements: attention, caution, danger, error, hint, important, + note, tip, warning, admonition_, title +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as body elements. +``` + +Admonitions are specially marked "topics" that can appear anywhere an +ordinary body element can. They contain arbitrary body elements. +Typically, an admonition is rendered as an offset block in a document, +sometimes outlined or shaded, with a title matching the admonition +type. For example: + +``` +.. DANGER:: + Beware killer rabbits! +``` + +This directive might be rendered something like this: + +``` ++------------------------+ +| !DANGER! | +| | +| Beware killer rabbits! | ++------------------------+ +``` + +The following admonition directives have been implemented: + +- attention +- caution +- danger +- error +- hint +- important +- note +- tip +- warning + +Any text immediately following the directive indicator (on the same +line and/or indented on following lines) is interpreted as a directive +block and is parsed for normal body elements. For example, the +following "note" admonition directive contains one paragraph and a +bullet list consisting of two list items: + +``` +.. note:: This is a note admonition. + This is the second line of the first paragraph. + + - The note contains all indented body elements + following. + - It includes this bullet list. +``` + +### Generic Admonition + +```{eval-rst} + +:Directive Type: "admonition" +:Doctree Elements: admonition_, title +:Directive Arguments: One, required (admonition title) +:Directive Options: Possible, see below. +:Directive Content: Interpreted as body elements. +``` + +This is a generic, titled admonition. The title may be anything the +author desires. + +The author-supplied title is also used as a ["classes"] attribute value +after being converted into a valid identifier form (down-cased; +non-alphanumeric characters converted to single hyphens; "admonition-" +prefixed). For example, this admonition: + +``` +.. admonition:: And, by the way... + + You can make up your own admonition too. +``` + +becomes the following document tree (pseudo-XML): + +``` + + + + And, by the way... + <paragraph> + You can make up your own admonition too. +``` + +The [common options] are recognized: + +`class` + +: Overrides the computed ["classes"] attribute value. + +`name` + +: Add `text` to the ["names"] attribute of the admonition element. + +## Images + +There are two image directives: "image" and "figure". + +### Image + +```{eval-rst} + +:Directive Type: "image" +:Doctree Element: image_ +:Directive Arguments: One, required (image URI). +:Directive Options: Possible. +:Directive Content: None. +``` + +An "image" is a simple picture: + +``` +.. image:: picture.png +``` + +Inline images can be defined with an "image" directive in a [substitution +definition][substitution definition] + +The URI for the image source file is specified in the directive +argument. As with hyperlink targets, the image URI may begin on the +same line as the explicit markup start and target name, or it may +begin in an indented text block immediately following, with no +intervening blank lines. If there are multiple lines in the link +block, they are stripped of leading and trailing whitespace and joined +together. + +Optionally, the image link block may contain a flat field list, the +`` _`image options` ``. For example: + +``` +.. image:: picture.jpeg + :height: 100px + :width: 200 px + :scale: 50 % + :alt: alternate text + :align: right +``` + +The following options are recognized: + +`alt` + +: Alternate text: a short description of the image, displayed by + applications that cannot display images, or spoken by applications + for visually impaired users. + +`height` + +: The desired height of the image. + Used to reserve space or scale the image vertically. When the "scale" + option is also specified, they are combined. For example, a height of + 200px and a scale of 50 is equivalent to a height of 100px with no scale. + +`width` + +: The width of the image. + Used to reserve space or scale the image horizontally. As with "height" + above, when the "scale" option is also specified, they are combined. + +`scale` + +: The uniform scaling factor of the image. The default is "100 %", i.e. + no scaling. + + If no "height" or "width" options are specified, the [Python Imaging + Library][python imaging library] (PIL) may be used to determine them, if it is installed and + the image file is available. + +`align` + +: The alignment of the image, equivalent to the HTML `<img>` tag's + "align" attribute. The values "top", "middle", and "bottom" + control an image's vertical alignment (relative to the text + baseline); they are only useful for inline images (substitutions). + The values "left", "center", and "right" control an image's + horizontal alignment, allowing the image to float and have the + text flow around it. The specific behavior depends upon the + browser or rendering software used. + +`target` + +: Makes the image into a hyperlink reference ("clickable"). The + option argument may be a URI (relative or absolute), or a + [reference name] with underscore suffix (e.g. `` `a name`_ ``). + +and the common options [:class:] and [:name:]. + +### Figure + +```{eval-rst} + +:Directive Type: "figure" +:Doctree Elements: figure_, image_, caption_, legend_ +:Directive Arguments: One, required (image URI). +:Directive Options: Possible. +:Directive Content: Interpreted as the figure caption and an optional + legend. +``` + +A "figure" consists of [image] data (including [image options]), an optional +caption (a single paragraph), and an optional legend (arbitrary body +elements). For page-based output media, figures might float to a different +position if this helps the page layout. + +``` +.. figure:: picture.png + :scale: 50 % + :alt: map to buried treasure + + This is the caption of the figure (a simple paragraph). + + The legend consists of all elements after the caption. In this + case, the legend consists of this paragraph and the following + table: + + +-----------------------+-----------------------+ + | Symbol | Meaning | + +=======================+=======================+ + | .. image:: tent.png | Campground | + +-----------------------+-----------------------+ + | .. image:: waves.png | Lake | + +-----------------------+-----------------------+ + | .. image:: peak.png | Mountain | + +-----------------------+-----------------------+ +``` + +There must be blank lines before the caption paragraph and before the +legend. To specify a legend without a caption, use an empty comment +("..") in place of the caption. + +The "figure" directive supports all of the options of the "image" +directive (see [image options] above). These options (except +"align") are passed on to the contained image. + +`align` + +: The horizontal alignment of the figure, allowing the image to + float and have the text flow around it. The specific behavior + depends upon the browser or rendering software used. + +In addition, the following options are recognized: + +`figwidth` + +: The width of the figure. + Limits the horizontal space used by the figure. + A special value of "image" is allowed, in which case the + included image's actual width is used (requires the [Python Imaging + Library][python imaging library]). If the image file is not found or the required software is + unavailable, this option is ignored. + + Sets the "width" attribute of the "figure" doctree element. + + This option does not scale the included image; use the "width" + [image] option for that. + + ``` + +---------------------------+ + | figure | + | | + |<------ figwidth --------->| + | | + | +---------------------+ | + | | image | | + | | | | + | |<--- width --------->| | + | +---------------------+ | + | | + |The figure's caption should| + |wrap at this width. | + +---------------------------+ + ``` + +`figclass` + +: Set a ["classes"] attribute value on the figure element. See the + [class] directive below. + +## Body Elements + +### Topic + +```{eval-rst} + +:Directive Type: "topic" +:Doctree Element: topic_ +:Directive Arguments: 1, required (topic title). +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as the topic body. +``` + +A topic is like a block quote with a title, or a self-contained +section with no subsections. Use the "topic" directive to indicate a +self-contained idea that is separate from the flow of the document. +Topics may occur anywhere a section or transition may occur. Body +elements and topics may not contain nested topics. + +The directive's sole argument is interpreted as the topic title; the +next line must be blank. All subsequent lines make up the topic body, +interpreted as body elements. For example: + +``` +.. topic:: Topic Title + + Subsequent indented lines comprise + the body of the topic, and are + interpreted as body elements. +``` + +### Sidebar + +```{eval-rst} + +:Directive Type: "sidebar" +:Doctree Element: sidebar_ +:Directive Arguments: One, required (sidebar title). +:Directive Options: Possible (see below). +:Directive Content: Interpreted as the sidebar body. +``` + +Sidebars are like miniature, parallel documents that occur inside +other documents, providing related or reference material. A sidebar +is typically offset by a border and "floats" to the side of the page; +the document's main text may flow around it. Sidebars can also be +likened to super-footnotes; their content is outside of the flow of +the document's main text. + +Sidebars may occur anywhere a section or transition may occur. Body +elements (including sidebars) may not contain nested sidebars. + +The directive's sole argument is interpreted as the sidebar title, +which may be followed by a subtitle option (see below); the next line +must be blank. All subsequent lines make up the sidebar body, +interpreted as body elements. For example: + +``` +.. sidebar:: Sidebar Title + :subtitle: Optional Sidebar Subtitle + + Subsequent indented lines comprise + the body of the sidebar, and are + interpreted as body elements. +``` + +The following options are recognized: + +`subtitle` + +: The sidebar's subtitle. + +and the common options [:class:] and [:name:]. + +### Line Block + +:::{admonition} Deprecated +The "line-block" directive is deprecated. Use the [line block +syntax][line block syntax] instead. +::: + +```{eval-rst} + +:Directive Type: "line-block" +:Doctree Element: line_block_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Becomes the body of the line block. +``` + +The "line-block" directive constructs an element where line breaks and +initial indentation is significant and inline markup is supported. It +is equivalent to a [parsed literal block] with different rendering: +typically in an ordinary serif typeface instead of a +typewriter/monospaced face, and not automatically indented. (Have the +line-block directive begin a block quote to get an indented line +block.) Line blocks are useful for address blocks and verse (poetry, +song lyrics), where the structure of lines is significant. For +example, here's a classic: + +``` +"To Ma Own Beloved Lassie: A Poem on her 17th Birthday", by +Ewan McTeagle (for Lassie O'Shea): + + .. line-block:: + + Lend us a couple of bob till Thursday. + I'm absolutely skint. + But I'm expecting a postal order and I can pay you back + as soon as it comes. + Love, Ewan. +``` + +(parsed-literal)= + +### Parsed Literal Block + +```{eval-rst} + +:Directive Type: "parsed-literal" +:Doctree Element: literal_block_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Becomes the body of the literal block. +``` + +Unlike an ordinary literal block, the "parsed-literal" directive +constructs a literal block where the text is parsed for inline markup. +It is equivalent to a [line block] with different rendering: +typically in a typewriter/monospaced typeface, like an ordinary +literal block. Parsed literal blocks are useful for adding hyperlinks +to code examples. + +However, care must be taken with the text, because inline markup is +recognized and there is no protection from parsing. Backslash-escapes +may be necessary to prevent unintended parsing. And because the +markup characters are removed by the parser, care must also be taken +with vertical alignment. Parsed "ASCII art" is tricky, and extra +whitespace may be necessary. + +For example, all the element names in this content model are links: + +``` +.. parsed-literal:: + + ( (title_, subtitle_?)?, + decoration_?, + (docinfo_, transition_?)?, + `%structure.model;`_ ) +``` + +### Code + +```{eval-rst} + +:Directive Type: "code" +:Doctree Element: literal_block_, `inline elements`_ +:Directive Arguments: One, optional (formal language). +:Directive Options: name, class, number-lines. +:Directive Content: Becomes the body of the literal block. +:Configuration Setting: syntax_highlight_. +``` + +(New in Docutils 0.9) + +The "code" directive constructs a literal block. If the code language is +specified, the content is parsed by the [Pygments] syntax highlighter and +tokens are stored in nested [inline elements] with class arguments +according to their syntactic category. The actual highlighting requires +a style-sheet (e.g. one [generated by Pygments](http://pygments.org/docs/cmdline/#generating-styles), see the +[sandbox/stylesheets](http://docutils.sourceforge.net/sandbox/stylesheets/) for examples). + +The parsing can be turned off with the [syntax_highlight] configuration +setting and command line option or by specifying the language as [:class:] +option instead of directive argument. This also avoids warnings +when [Pygments] is not installed or the language is not in the +[supported languages and markup formats]. + +For inline code, use the ["code" role]. + +The following options are recognized: + +`number-lines` + +: Precede every line with a line number. + The optional argument is the number of the first line (defaut 1). + +and the common options [:class:] and [:name:]. + +Example:: + +: The content of the following directive + + ``` + .. code:: python + + def my_function(): + "just a test" + print 8/2 + ``` + + is parsed and marked up as Python source code. + +### Math + +```{eval-rst} + +:Directive Type: "math" +:Doctree Element: math_block_ +:Directive Arguments: One, optional: prepended to content. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as math block(s). + Content blocks separated by a blank line are put in + separate math-block doctree elements. +:Configuration Setting: math_output_ +``` + +(New in Docutils 0.8) + +The "math" directive inserts blocks with mathematical content +(display formulas, equations) into the document. The input format is +*LaTeX math syntax*[^math-syntax] with support for Unicode +symbols, for example: + +``` +.. math:: + + α_t(i) = P(O_1, O_2, … O_t, q_t = S_i λ) +``` + +Support is limited to a subset of *LaTeX math* by the conversion +required for many output formats. For HTML, the the [math_output] +configuration setting (or the corresponding `--math-output` +command line option) select between alternative output formats with +different subsets of supported elements. If a writer does not +support math typesetting at all, the content is inserted verbatim. + +[^math-syntax]: The supported LaTeX commands include AMS extensions + (see, e.g., the [Short Math Guide]). + +For inline math, use the ["math" role]. + +### Rubric + +```{eval-rst} + +:Directive Type: "rubric" +:Doctree Element: rubric_ +:Directive Arguments: 1, required (rubric text). +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: None. +``` + +% + +> rubric n. 1. a title, heading, or the like, in a manuscript, +> book, statute, etc., written or printed in red or otherwise +> distinguished from the rest of the text. ... +> +> <p class="attribution">-Random House Webster's College Dictionary, 1991</p> + +The "rubric" directive inserts a "rubric" element into the document +tree. A rubric is like an informal heading that doesn't correspond to +the document's structure. + +### Epigraph + +```{eval-rst} + +:Directive Type: "epigraph" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. +``` + +An epigraph is an apposite (suitable, apt, or pertinent) short +inscription, often a quotation or poem, at the beginning of a document +or section. + +The "epigraph" directive produces an "epigraph"-class block quote. +For example, this input: + +``` +.. epigraph:: + + No matter where you go, there you are. + + -- Buckaroo Banzai +``` + +becomes this document tree fragment: + +``` +<block_quote classes="epigraph"> + <paragraph> + No matter where you go, there you are. + <attribution> + Buckaroo Banzai +``` + +### Highlights + +```{eval-rst} + +:Directive Type: "highlights" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. +``` + +Highlights summarize the main points of a document or section, often +consisting of a list. + +The "highlights" directive produces a "highlights"-class block quote. +See [Epigraph] above for an analogous example. + +### Pull-Quote + +```{eval-rst} + +:Directive Type: "pull-quote" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. +``` + +A pull-quote is a small selection of text "pulled out and quoted", +typically in a larger typeface. Pull-quotes are used to attract +attention, especially in long articles. + +The "pull-quote" directive produces a "pull-quote"-class block quote. +See [Epigraph] above for an analogous example. + +### Compound Paragraph + +```{eval-rst} + +:Directive Type: "compound" +:Doctree Element: compound_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as body elements. +``` + +(New in Docutils 0.3.6) + +The "compound" directive is used to create a compound paragraph, which +is a single logical paragraph containing multiple physical body +elements such as simple paragraphs, literal blocks, tables, lists, +etc., instead of directly containing text and inline elements. For +example: + +``` +.. compound:: + + The 'rm' command is very dangerous. If you are logged + in as root and enter :: + + cd / + rm -rf * + + you will erase the entire contents of your file system. +``` + +In the example above, a literal block is "embedded" within a sentence +that begins in one physical paragraph and ends in another. + +:::{note} +The "compound" directive is *not* a generic block-level container +like HTML's `<div>` element. Do not use it only to group a +sequence of elements, or you may get unexpected results. + +If you need a generic block-level container, please use the +[container] directive, described below. +::: + +Compound paragraphs are typically rendered as multiple distinct text +blocks, with the possibility of variations to emphasize their logical +unity: + +- If paragraphs are rendered with a first-line indent, only the first + physical paragraph of a compound paragraph should have that indent + -- second and further physical paragraphs should omit the indents; +- vertical spacing between physical elements may be reduced; +- and so on. + +### Container + +```{eval-rst} + +:Directive Type: "container" +:Doctree Element: container_ +:Directive Arguments: One or more, optional (class names). +:Directive Options: `:name:`_ +:Directive Content: Interpreted as body elements. +``` + +(New in Docutils 0.3.10) + +The "container" directive surrounds its contents (arbitrary body +elements) with a generic block-level "container" element. Combined +with the optional "[classes]" attribute argument(s), this is an +extension mechanism for users & applications. For example: + +``` +.. container:: custom + + This paragraph might be rendered in a custom way. +``` + +Parsing the above results in the following pseudo-XML: + +``` +<container classes="custom"> + <paragraph> + This paragraph might be rendered in a custom way. +``` + +The "container" directive is the equivalent of HTML's `<div>` +element. It may be used to group a sequence of elements for user- or +application-specific purposes. + +## Tables + +Formal tables need more structure than the reStructuredText syntax +supplies. Tables may be given titles with the [table] directive. +Sometimes reStructuredText tables are inconvenient to write, or table +data in a standard format is readily available. The [csv-table] +directive supports CSV data. + +### Table + +```{eval-rst} + +:Directive Type: "table" +:Doctree Element: table_ +:Directive Arguments: 1, optional (table title). +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: A normal reStructuredText table. +``` + +(New in Docutils 0.3.1) + +The "table" directive is used to create a titled table, to associate a +title with a table: + +``` +.. table:: Truth table for "not" + + ===== ===== + A not A + ===== ===== + False True + True False + ===== ===== +``` + +(csv-table)= + +### CSV Table + +```{eval-rst} + +:Directive Type: "csv-table" +:Doctree Element: table_ +:Directive Arguments: 1, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A CSV (comma-separated values) table. +``` + +:::{WARNING} +The "csv-table" directive's ":file:" and ":url:" options represent +a potential security holes. They can be disabled with the +"[file_insertion_enabled]" runtime setting. +::: + +(New in Docutils 0.3.4) + +The "csv-table" directive is used to create a table from CSV +(comma-separated values) data. CSV is a common data format generated +by spreadsheet applications and commercial databases. The data may be +internal (an integral part of the document) or external (a separate +file). + +Example: + +``` +.. csv-table:: Frozen Delights! + :header: "Treat", "Quantity", "Description" + :widths: 15, 10, 30 + + "Albatross", 2.99, "On a stick!" + "Crunchy Frog", 1.49, "If we took the bones out, it wouldn't be + crunchy, now would it?" + "Gannet Ripple", 1.99, "On a stick!" +``` + +Block markup and inline markup within cells is supported. Line ends +are recognized within cells. + +Working limitations: + +- There is no support for checking that the number of columns in each + row is the same. However, this directive supports CSV generators + that do not insert "empty" entries at the end of short rows, by + automatically adding empty entries. + + % Add "strict" option to verify input? + +[^whitespace-delim]: Whitespace delimiters are supported only for external + CSV files. + +[^ascii-char]: With Python 2, the valuess for the `delimiter`, + `quote`, and `escape` options must be ASCII characters. (The csv + module does not support Unicode and all non-ASCII characters are + encoded as multi-byte utf-8 string). This limitation does not exist + under Python 3. + +The following options are recognized: + +`widths` + +: A comma- or space-separated list of relative column widths. The + default is equal-width columns (100%/#columns). + +`header-rows` + +: The number of rows of CSV data to use in the table header. + Defaults to 0. + +`stub-columns` + +: The number of table columns to use as stubs (row titles, on the + left). Defaults to 0. + +`header` + +: Supplemental data for the table header, added independently of and + before any `header-rows` from the main CSV data. Must use the + same CSV format as the main CSV data. + +`file` + +: The local filesystem path to a CSV data file. + +`url` + +: An Internet URL reference to a CSV data file. + +`encoding` + +: The text encoding of the external CSV data (file or URL). + Defaults to the document's encoding (if specified). + +`delim` + +: A one-character string[^ascii-char] used to separate fields. + Defaults to `,` (comma). May be specified as a Unicode code + point; see the [unicode] directive for syntax details. + +`quote` + +: A one-character string[^ascii-char] used to quote elements + containing the delimiter or which start with the quote + character. Defaults to `"` (quote). May be specified as a + Unicode code point; see the [unicode] directive for syntax + details. + +`keepspace` + +: Treat whitespace immediately following the delimiter as + significant. The default is to ignore such whitespace. + +`escape` + +: A one-character[^ascii-char] string used to escape the + delimiter or quote characters. May be specified as a Unicode + code point; see the [unicode] directive for syntax details. Used + when the delimiter is used in an unquoted field, or when quote + characters are used within a field. The default is to double-up + the character, e.g. "He said, ""Hi!""" + + % Add another possible value, "double", to explicitly indicate + % the default case? + +and the common options [:class:] and [:name:]. + +### List Table + +```{eval-rst} + +:Directive Type: "list-table" +:Doctree Element: table_ +:Directive Arguments: 1, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A uniform two-level bullet list. +``` + +(New in Docutils 0.3.8. This is an initial implementation; [further +ideas](../../dev/rst/alternatives.html#list-driven-tables) may be implemented in the future.) + +The "list-table" directive is used to create a table from data in a +uniform two-level bullet list. "Uniform" means that each sublist +(second-level list) must contain the same number of list items. + +Example: + +``` +.. list-table:: Frozen Delights! + :widths: 15 10 30 + :header-rows: 1 + + * - Treat + - Quantity + - Description + * - Albatross + - 2.99 + - On a stick! + * - Crunchy Frog + - 1.49 + - If we took the bones out, it wouldn't be + crunchy, now would it? + * - Gannet Ripple + - 1.99 + - On a stick! +``` + +The following options are recognized: + +`widths` + +: A comma- or space-separated list of relative column widths. The + default is equal-width columns (100%/#columns). + +`header-rows` + +: The number of rows of list data to use in the table header. + Defaults to 0. + +`stub-columns` + +: The number of table columns to use as stubs (row titles, on the + left). Defaults to 0. + +and the common options [:class:] and [:name:]. + +## Document Parts + +(contents)= + +### Table of Contents + +```{eval-rst} + +:Directive Type: "contents" +:Doctree Elements: pending_, topic_ +:Directive Arguments: One, optional: title. +:Directive Options: Possible. +:Directive Content: None. +``` + +The "contents" directive generates a table of contents (TOC) in a +[topic]. Topics, and therefore tables of contents, may occur anywhere +a section or transition may occur. Body elements and topics may not +contain tables of contents. + +Here's the directive in its simplest form: + +``` +.. contents:: +``` + +Language-dependent boilerplate text will be used for the title. The +English default title text is "Contents". + +An explicit title may be specified: + +``` +.. contents:: Table of Contents +``` + +The title may span lines, although it is not recommended: + +``` +.. contents:: Here's a very long Table of + Contents title +``` + +Options may be specified for the directive, using a field list: + +``` +.. contents:: Table of Contents + :depth: 2 +``` + +If the default title is to be used, the options field list may begin +on the same line as the directive marker: + +``` +.. contents:: :depth: 2 +``` + +The following options are recognized: + +`depth` + +: The number of section levels that are collected in the table of + contents. The default is unlimited depth. + +`local` + +: Generate a local table of contents. Entries will only include + subsections of the section in which the directive is given. If no + explicit title is given, the table of contents will not be titled. + +`backlinks` + +: Generate links from section headers back to the table of contents + entries, the table of contents itself, or generate no backlinks. + +`class` + +: Set a ["classes"] attribute value on the topic element. See the + [class] directive below. + +(sectnum)= + +(section-numbering)= + +### Automatic Section Numbering + +```{eval-rst} + +:Directive Type: "sectnum" or "section-numbering" (synonyms) +:Doctree Elements: pending_, generated_ +:Directive Arguments: None. +:Directive Options: Possible. +:Directive Content: None. +:Configuration Setting: sectnum_xform_ +``` + +The "sectnum" (or "section-numbering") directive automatically numbers +sections and subsections in a document (if not disabled by the +`--no-section-numbering` command line option or the [sectnum_xform] +configuration setting). + +Section numbers are of the "multiple enumeration" form, where each +level has a number, separated by periods. For example, the title of section +1, subsection 2, subsubsection 3 would have "1.2.3" prefixed. + +The "sectnum" directive does its work in two passes: the initial parse +and a transform. During the initial parse, a "pending" element is +generated which acts as a placeholder, storing any options internally. +At a later stage in the processing, the "pending" element triggers a +transform, which adds section numbers to titles. Section numbers are +enclosed in a "generated" element, and titles have their "auto" +attribute set to "1". + +The following options are recognized: + +`depth` + +: The number of section levels that are numbered by this directive. + The default is unlimited depth. + +`prefix` + +: An arbitrary string that is prefixed to the automatically + generated section numbers. It may be something like "3.2.", which + will produce "3.2.1", "3.2.2", "3.2.2.1", and so on. Note that + any separating punctuation (in the example, a period, ".") must be + explicitly provided. The default is no prefix. + +`suffix` + +: An arbitrary string that is appended to the automatically + generated section numbers. The default is no suffix. + +`start` + +: The value that will be used for the first section number. + Combined with `prefix`, this may be used to force the right + numbering for a document split over several source files. The + default is 1. + +(header)= + +(footer)= + +### Document Header & Footer + +```{eval-rst} + +:Directive Types: "header" and "footer" +:Doctree Elements: decoration_, header, footer +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as body elements. +``` + +(New in Docutils 0.3.8) + +The "header" and "footer" directives create document decorations, +useful for page navigation, notes, time/datestamp, etc. For example: + +``` +.. header:: This space for rent. +``` + +This will add a paragraph to the document header, which will appear at +the top of the generated web page or at the top of every printed page. + +These directives may be used multiple times, cumulatively. There is +currently support for only one header and footer. + +:::{note} +While it is possible to use the "header" and "footer" directives to +create navigational elements for web pages, you should be aware +that Docutils is meant to be used for *document* processing, and +that a navigation bar is not typically part of a document. + +Thus, you may soon find Docutils' abilities to be insufficient for +these purposes. At that time, you should consider using a +documentation generator like [Sphinx] rather than the "header" and +"footer" directives. +::: + +In addition to the use of these directives to populate header and +footer content, content may also be added automatically by the +processing system. For example, if certain runtime settings are +enabled, the document footer is populated with processing information +such as a datestamp, a link to [the Docutils website], etc. + +## References + +(target-notes)= + +### Target Footnotes + +```{eval-rst} + +:Directive Type: "target-notes" +:Doctree Elements: pending_, footnote_, footnote_reference_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Options: Possible. +:Directive Content: None. +``` + +The "target-notes" directive creates a footnote for each external +target in the text, and corresponding footnote references after each +reference. For every explicit target (of the form, `.. _target name: +URL`) in the text, a footnote will be generated containing the +visible URL as content. + +### Footnotes + +**NOT IMPLEMENTED YET** + +```{eval-rst} + +:Directive Type: "footnotes" +:Doctree Elements: pending_, topic_ +:Directive Arguments: None? +:Directive Options: Possible? +:Directive Content: None. +``` + +@@@ + +### Citations + +**NOT IMPLEMENTED YET** + +```{eval-rst} + +:Directive Type: "citations" +:Doctree Elements: pending_, topic_ +:Directive Arguments: None? +:Directive Options: Possible? +:Directive Content: None. +``` + +@@@ + +## HTML-Specific + +### Meta + +```{eval-rst} + +:Directive Type: "meta" +:Doctree Element: meta (non-standard) +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Must contain a flat field list. +``` + +The "meta" directive is used to specify HTML metadata stored in HTML +META tags. "Metadata" is data about data, in this case data about web +pages. Metadata is used to describe and classify web pages in the +World Wide Web, in a form that is easy for search engines to extract +and collate. + +Within the directive block, a flat field list provides the syntax for +metadata. The field name becomes the contents of the "name" attribute +of the META tag, and the field body (interpreted as a single string +without inline markup) becomes the contents of the "content" +attribute. For example: + +``` +.. meta:: + :description: The reStructuredText plaintext markup language + :keywords: plaintext, markup language +``` + +This would be converted to the following HTML: + +``` +<meta name="description" + content="The reStructuredText plaintext markup language"> +<meta name="keywords" content="plaintext, markup language"> +``` + +Support for other META attributes ("http-equiv", "scheme", "lang", +"dir") are provided through field arguments, which must be of the form +"attr=value": + +``` +.. meta:: + :description lang=en: An amusing story + :description lang=fr: Une histoire amusante +``` + +And their HTML equivalents: + +``` +<meta name="description" lang="en" content="An amusing story"> +<meta name="description" lang="fr" content="Une histoire amusante"> +``` + +Some META tags use an "http-equiv" attribute instead of the "name" +attribute. To specify "http-equiv" META tags, simply omit the name: + +``` +.. meta:: + :http-equiv=Content-Type: text/html; charset=ISO-8859-1 +``` + +HTML equivalent: + +``` +<meta http-equiv="Content-Type" + content="text/html; charset=ISO-8859-1"> +``` + +### Imagemap + +**NOT IMPLEMENTED YET** + +Non-standard element: imagemap. + +## Directives for Substitution Definitions + +The directives in this section may only be used in substitution +definitions. They may not be used directly, in standalone context. +The [image] directive may be used both in substitution definitions +and in the standalone context. + +(replace)= + +### Replacement Text + +```{eval-rst} + +:Directive Type: "replace" +:Doctree Element: Text & `inline elements`_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: A single paragraph; may contain inline markup. +``` + +The "replace" directive is used to indicate replacement text for a +substitution reference. It may be used within substitution +definitions only. For example, this directive can be used to expand +abbreviations: + +``` +.. |reST| replace:: reStructuredText + +Yes, |reST| is a long word, so I can't blame anyone for wanting to +abbreviate it. +``` + +As reStructuredText doesn't support nested inline markup, the only way +to create a reference with styled text is to use substitutions with +the "replace" directive: + +``` +I recommend you try |Python|_. + +.. |Python| replace:: Python, *the* best language around +.. _Python: http://www.python.org/ +``` + +(unicode)= + +### Unicode Character Codes + +```{eval-rst} + +:Directive Type: "unicode" +:Doctree Element: Text +:Directive Arguments: One or more, required (Unicode character codes, + optional text, and comments). +:Directive Options: Possible. +:Directive Content: None. +``` + +The "unicode" directive converts Unicode character codes (numerical +values) to characters, and may be used in substitution definitions +only. + +The arguments, separated by spaces, can be: + +- **character codes** as + + - decimal numbers or + - hexadecimal numbers, prefixed by `0x`, `x`, `\x`, `U+`, + `u`, or `\u` or as XML-style hexadecimal character entities, + e.g. `ᨫ` + +- **text**, which is used as-is. + +Text following " .. " is a comment and is ignored. The spaces between +the arguments are ignored and thus do not appear in the output. +Hexadecimal codes are case-insensitive. + +For example, the following text: + +``` +Copyright |copy| 2003, |BogusMegaCorp (TM)| |---| +all rights reserved. + +.. |copy| unicode:: 0xA9 .. copyright sign +.. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 + .. with trademark sign +.. |---| unicode:: U+02014 .. em dash + :trim: +``` + +results in: + +> Copyright {{ copy }} 2003, {{ BogusMegaCorp (TM) }} {{ --- }} +> all rights reserved. + +The following options are recognized: + +`ltrim` + +: Whitespace to the left of the substitution reference is removed. + +`rtrim` + +: Whitespace to the right of the substitution reference is removed. + +`trim` + +: Equivalent to `ltrim` plus `rtrim`; whitespace on both sides + of the substitution reference is removed. + +### Date + +```{eval-rst} + +:Directive Type: "date" +:Doctree Element: Text +:Directive Arguments: One, optional (date format). +:Directive Options: None. +:Directive Content: None. +``` + +The "date" directive generates the current local date and inserts it +into the document as text. This directive may be used in substitution +definitions only. + +The optional directive content is interpreted as the desired date +format, using the same codes as Python's time.strftime function. The +default format is "%Y-%m-%d" (ISO 8601 date), but time fields can also +be used. Examples: + +``` +.. |date| date:: +.. |time| date:: %H:%M + +Today's date is |date|. + +This document was generated on |date| at |time|. +``` + +## Miscellaneous + +(include)= + +### Including an External Document Fragment + +```{eval-rst} + +:Directive Type: "include" +:Doctree Elements: Depend on data being included + (literal_block_ with ``code`` or ``literal`` option). +:Directive Arguments: One, required (path to the file to include). +:Directive Options: Possible. +:Directive Content: None. +:Configuration Setting: file_insertion_enabled_ +``` + +:::{WARNING} +The "include" directive represents a potential security hole. It +can be disabled with the "[file_insertion_enabled]" runtime setting. +::: + +The "include" directive reads a text file. The directive argument is +the path to the file to be included, relative to the document +containing the directive. Unless the options `literal` or `code` +are given, the file is parsed in the current document's context at the +point of the directive. For example: + +``` +This first example will be parsed at the document level, and can +thus contain any construct, including section headers. + +.. include:: inclusion.txt + +Back in the main document. + + This second example will be parsed in a block quote context. + Therefore it may only contain body elements. It may not + contain section headers. + + .. include:: inclusion.txt +``` + +If an included document fragment contains section structure, the title +adornments must match those of the master document. + +Standard data files intended for inclusion in reStructuredText +documents are distributed with the Docutils source code, located in +the "docutils" package in the `docutils/parsers/rst/include` +directory. To access these files, use the special syntax for standard +"include" data files, angle brackets around the file name: + +``` +.. include:: <isonum.txt> +``` + +The current set of standard "include" data files consists of sets of +substitution definitions. See [reStructuredText Standard Definition +Files](definitions.html) for details. + +The following options are recognized: + +`start-line` + +: Only the content starting from this line will be included. + (As usual in Python, the first line has index 0 and negative values + count from the end.) + +`end-line` + +: Only the content up to (but excluding) this line will be included. + +`start-after` + +: Only the content after the first occurrence of the specified text + will be included. + +`end-before` + +: Only the content before the first occurrence of the specified text + (but after any `after` text) will be included. + +`literal` + +: The entire included text is inserted into the document as a single + literal block. + +`code` + +: The argument and the content of the included file are passed to + the [code] directive (useful for program listings). + (New in Docutils 0.9) + +`number-lines` + +: Precede every code line with a line number. + The optional argument is the number of the first line (defaut 1). + Works only with `code` or `literal`. + (New in Docutils 0.9) + +`encoding` + +: The text encoding of the external data file. Defaults to the + document's [input_encoding]. + +`tab-width` + +: Number of spaces for hard tab expansion. + A negative value prevents expansion of hard tabs. Defaults to the + [tab_width] configuration setting. + +With `code` or `literal` the common options [:class:] and +[:name:] are recognized as well. + +Combining `start/end-line` and `start-after/end-before` is possible. The +text markers will be searched in the specified lines (further limiting the +included content). + +(raw-directive)= + +### Raw Data Pass-Through + +```{eval-rst} + +:Directive Type: "raw" +:Doctree Element: raw_ +:Directive Arguments: One or more, required (output format types). +:Directive Options: Possible. +:Directive Content: Stored verbatim, uninterpreted. None (empty) if a + "file" or "url" option given. +:Configuration Setting: raw_enabled_ +``` + +:::{WARNING} +The "raw" directive represents a potential security hole. It can +be disabled with the "[raw_enabled]" or "[file_insertion_enabled]" +runtime settings. +::: + +:::{Caution} +The "raw" directive is a stop-gap measure allowing the author to +bypass reStructuredText's markup. It is a "power-user" feature +that should not be overused or abused. The use of "raw" ties +documents to specific output formats and makes them less portable. + +If you often need to use the "raw" directive or a "raw"-derived +interpreted text role, that is a sign either of overuse/abuse or +that functionality may be missing from reStructuredText. Please +describe your situation in a message to the [Docutils-users] mailing +list. +::: + +The "raw" directive indicates non-reStructuredText data that is to be +passed untouched to the Writer. The names of the output formats are +given in the directive arguments. The interpretation of the raw data +is up to the Writer. A Writer may ignore any raw output not matching +its format. + +For example, the following input would be passed untouched by an HTML +Writer: + +``` +.. raw:: html + + <hr width=50 size=10> +``` + +A LaTeX Writer could insert the following raw content into its +output stream: + +``` +.. raw:: latex + + \setlength{\parindent}{0pt} +``` + +Raw data can also be read from an external file, specified in a +directive option. In this case, the content block must be empty. For +example: + +``` +.. raw:: html + :file: inclusion.html +``` + +Inline equivalents of the "raw" directive can be defined via +[custom interpreted text roles] derived from the ["raw" role]. + +The following options are recognized: + +`file` + +: The local filesystem path of a raw data file to be included. + +`url` + +: An Internet URL reference to a raw data file to be included. + +`encoding` + +: The text encoding of the external raw data (file or URL). + Defaults to the document's encoding (if specified). + +(classes)= + +### Class + +```{eval-rst} + +:Directive Type: "class" +:Doctree Element: pending_ +:Directive Arguments: One or more, required (class names / attribute + values). +:Directive Options: None. +:Directive Content: Optional. If present, it is interpreted as body + elements. +``` + +The "class" directive sets the ["classes"] attribute value on its content +or on the first immediately following non-comment element [^id13]. For +details of the "classes" attribute, see [its entry](../doctree.html#classes) in [The Docutils +Document Tree][the docutils document tree]. + +The directive argument consists of one or more space-separated class +names. The names are transformed to conform to the regular expression +`[a-z](-?[a-z0-9]+)*` by converting + +- alphabetic characters to lowercase, +- accented characters to the base character, +- non-alphanumeric characters to hyphens, +- consecutive hyphens into one hyphen. + +For example "Rot-Gelb.Blau Grün:+2008" becomes "rot-gelb-blau grun-2008". +(For the [rationale], see below.) + +Examples: + +``` +.. class:: special + +This is a "special" paragraph. + +.. class:: exceptional remarkable + +An Exceptional Section +====================== + +This is an ordinary paragraph. + +.. class:: multiple + + First paragraph. + + Second paragraph. +``` + +The text above is parsed and transformed into this doctree fragment: + +``` +<paragraph classes="special"> + This is a "special" paragraph. +<section classes="exceptional remarkable"> + <title> + An Exceptional Section + <paragraph> + This is an ordinary paragraph. + <paragraph classes="multiple"> + First paragraph. + <paragraph classes="multiple"> + Second paragraph. +``` + +[^id13]: To set a "classes" attribute value on a block quote, the + "class" directive must be followed by an empty comment: + + ``` + .. class:: highlights + .. + + Block quote text. + ``` + + Without the empty comment, the indented text would be interpreted as the + "class" directive's content, and the classes would be applied to each + element (paragraph, in this case) individually, instead of to the block + quote as a whole. + +(rationale)= + +:::{topic} Rationale for "classes" Attribute Value Conversion +Docutils identifiers are converted to conform to the regular +expression `[a-z](-?[a-z0-9]+)*`. For HTML + CSS compatibility, +identifiers (the "classes" and "id" attributes) should have no +underscores, colons, or periods. Hyphens may be used. + +- The [HTML 4.01 spec] defines identifiers based on SGML tokens: + + > ID and NAME tokens must begin with a letter (\[A-Za-z\]) and + > may be followed by any number of letters, digits (\[0-9\]), + > hyphens ("-"), underscores ("\_"), colons (":"), and periods + > ("."). + +- The [CSS1 spec] defines identifiers based on the "name" token + ("flex" tokenizer notation below; "latin1" and "escape" 8-bit + characters have been replaced with XML entities): + + ``` + unicode \\[0-9a-f]{1,4} + latin1 [¡-ÿ] + escape {unicode}|\\[ -~¡-ÿ] + nmchar [-A-Za-z0-9]|{latin1}|{escape} + name {nmchar}+ + ``` + +The CSS rule does not include underscores ("\_"), colons (":"), or +periods ("."), therefore "classes" and "id" attributes should not +contain these characters. Combined with HTML's requirements (the +first character must be a letter; no "unicode", "latin1", or +"escape" characters), this results in the regular expression +`[A-Za-z][-A-Za-z0-9]*`. Docutils adds a normalisation by +downcasing and merge of consecutive hyphens. +::: + +(role)= + +### Custom Interpreted Text Roles + +```{eval-rst} + +:Directive Type: "role" +:Doctree Element: None; affects subsequent parsing. +:Directive Arguments: Two; one required (new role name), one optional + (base role name, in parentheses). +:Directive Options: Possible (depends on base role). +:Directive Content: depends on base role. +``` + +(New in Docutils 0.3.2) + +The "role" directive dynamically creates a custom interpreted text +role and registers it with the parser. This means that after +declaring a role like this: + +``` +.. role:: custom +``` + +the document may use the new "custom" role: + +``` +An example of using :custom:`interpreted text` +``` + +This will be parsed into the following document tree fragment: + +``` +<paragraph> + An example of using + <inline classes="custom"> + interpreted text +``` + +The role must be declared in a document before it can be used. + +The new role may be based on an existing role, specified as a second +argument in parentheses (whitespace optional): + +``` +.. role:: custom(emphasis) + +:custom:`text` +``` + +The parsed result is as follows: + +``` +<paragraph> + <emphasis classes="custom"> + text +``` + +A special case is the ["raw" role]: derived roles enable +inline [raw data pass-through], e.g.: + +``` +.. role:: raw-role(raw) + :format: html latex + +:raw-role:`raw text` +``` + +If no base role is explicitly specified, a generic custom role is +automatically used. Subsequent interpreted text will produce an +"inline" element with a ["classes"] attribute, as in the first example +above. + +With most roles, the ":class:" option can be used to set a "classes" +attribute that is different from the role name. For example: + +``` +.. role:: custom + :class: special + +:custom:`interpreted text` +``` + +This is the parsed result: + +``` +<paragraph> + <inline classes="special"> + interpreted text +``` + +(role-class)= + +The following option is recognized by the "role" directive for most +base roles: + +`class` + +: Set the ["classes"] attribute value on the element produced + (`inline`, or element associated with a base class) when the + custom interpreted text role is used. If no directive options are + specified, a "class" option with the directive argument (role + name) as the value is implied. See the [class] directive above. + +Specific base roles may support other options and/or directive +content. See the [reStructuredText Interpreted Text Roles] document +for details. + +(default-role)= + +### Setting the Default Interpreted Text Role + +```{eval-rst} + +:Directive Type: "default-role" +:Doctree Element: None; affects subsequent parsing. +:Directive Arguments: One, optional (new default role name). +:Directive Options: None. +:Directive Content: None. +``` + +(New in Docutils 0.3.10) + +The "default-role" directive sets the default interpreted text role, +the role that is used for interpreted text without an explicit role. +For example, after setting the default role like this: + +``` +.. default-role:: subscript +``` + +any subsequent use of implicit-role interpreted text in the document +will use the "subscript" role: + +``` +An example of a `default` role. +``` + +This will be parsed into the following document tree fragment: + +``` +<paragraph> + An example of a + <subscript> + default + role. +``` + +Custom roles may be used (see the "[role]" directive above), but it +must have been declared in a document before it can be set as the +default role. See the [reStructuredText Interpreted Text Roles] +document for details of built-in roles. + +The directive may be used without an argument to restore the initial +default interpreted text role, which is application-dependent. The +initial default interpreted text role of the standard reStructuredText +parser is "title-reference". + +### Metadata Document Title + +```{eval-rst} + +:Directive Type: "title" +:Doctree Element: None. +:Directive Arguments: 1, required (the title text). +:Directive Options: None. +:Directive Content: None. +``` + +The "title" directive specifies the document title as metadata, which +does not become part of the document body. It overrides a +document-supplied title. For example, in HTML output the metadata +document title appears in the title bar of the browser window. + +### Restructuredtext-Test-Directive + +```{eval-rst} + +:Directive Type: "restructuredtext-test-directive" +:Doctree Element: system_warning +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as a literal block. +``` + +This directive is provided for test purposes only. (Nobody is +expected to type in a name *that* long!) It is converted into a +level-1 (info) system message showing the directive data, possibly +followed by a literal block containing the rest of the directive +block. + +## Common Options + +Most of the directives that generate doctree elements support the following +options: + +`` _`:class:` `` + +: Set a ["classes"] attribute value on the doctree element generated by + the directive. See also the [class] directive. + +`` _`:name:` `` + +: Add `text` to the ["names"] attribute of the doctree element generated + by the directive. This allows [hyperlink references] to the element + using `text` as [reference name]. + + Specifying the `name` option of a directive, e.g., + + ``` + .. image:: bild.png + :name: my picture + ``` + + is a concise syntax alternative to preceding it with a [hyperlink + target][hyperlink target] + + ``` + .. _my picture: + + .. image:: bild.png + ``` + + New in Docutils 0.8. + +% Local Variables: +% mode: indented-text +% indent-tabs-mode: nil +% sentence-end-double-space: t +% fill-column: 70 +% End: + +["classes"]: ../doctree.html#classes +["code" role]: roles.html#code +["math" role]: roles.html#math +["names"]: ../doctree.html#names +["raw" role]: roles.html#raw +[admonition]: ../doctree.html#admonition +[block_quote]: ../doctree.html#block-quote +[caption]: ../doctree.html#caption +[compound]: ../doctree.html#compound +[container]: ../doctree.html#container +[creating restructuredtext directives]: ../../howto/rst-directives.html +[css1 spec]: http://www.w3.org/TR/REC-CSS1 +[decoration]: ../doctree.html#decoration +[directives]: restructuredtext.html#directives +[docutils generic dtd]: ../docutils.dtd +[docutils-users]: ../../user/mailing-lists.html#docutils-users +[figure]: ../doctree.html#figure +[file_insertion_enabled]: ../../user/config.html#file-insertion-enabled +[footnote]: ../doctree.html#footnote +[footnote_reference]: ../doctree.html#footnote-reference +[generated]: ../doctree.html#generated +[html 4.01 spec]: http://www.w3.org/TR/html401/ +[hyperlink references]: restructuredtext.html#hyperlink-references +[hyperlink target]: restructuredtext.html#hyperlink-targets +[image]: ../doctree.html#image +[inline elements]: ../doctree.html#inline-elements +[input_encoding]: ../../user/config.html#input-encoding +[legend]: ../doctree.html#legend +[length]: restructuredtext.html#length-units +[line block syntax]: restructuredtext.html#line-blocks +[line_block]: ../doctree.html#line-block +[literal_block]: ../doctree.html#literal-block +[math_block]: ../doctree.html#math-block +[math_output]: ../../user/config.html#math-output +[pending]: ../doctree.html#pending +[percentage]: restructuredtext.html#percentage-units +[pygments]: http://pygments.org/ +[python imaging library]: http://www.pythonware.com/products/pil/ +[raw]: ../doctree.html#raw +[raw_enabled]: ../../user/config.html#raw-enabled +[reference name]: restructuredtext.html#reference-names +[restructuredtext interpreted text roles]: roles.html +[restructuredtext markup specification]: restructuredtext.html +[rubric]: ../doctree.html#rubric +[sectnum_xform]: ../../user/config.html#sectnum-xform +[short math guide]: ftp://ftp.ams.org/ams/doc/amsmath/short-math-guide.pdf +[sidebar]: ../doctree.html#sidebar +[sphinx]: http://sphinx-doc.org/ +[substitution definition]: restructuredtext.html#substitution-definitions +[supported languages and markup formats]: http://pygments.org/languages/ +[syntax_highlight]: ../../user/config.html#syntax-highlight +[tab_width]: ../../user/config.html#tab-width +[table]: ../doctree.html#table +[the docutils document tree]: ../doctree.html +[the docutils website]: http://docutils.sourceforge.net +[title]: ../doctree.html#title +[topic]: ../doctree.html#topic diff --git a/tests/test_texts/test_texts_restructuredtext_.md b/tests/test_texts/test_texts_restructuredtext_.md new file mode 100644 index 0000000..6c95741 --- /dev/null +++ b/tests/test_texts/test_texts_restructuredtext_.md @@ -0,0 +1,3140 @@ +--- +Author: David Goodger +Contact: <mailto:docutils-develop@lists.sourceforge.net> +Copyright: This document has been placed in the public domain. +Date: \$Date\$ +Revision: \$Revision\$ +--- + +% -*- coding: utf-8 -*- + +# reStructuredText Markup Specification + +:::{Note} +This document is a detailed technical specification; it is not a +tutorial or a primer. If this is your first exposure to +reStructuredText, please read [A ReStructuredText Primer] and the +[Quick reStructuredText] user reference first. +::: + +[reStructuredText] is plaintext that uses simple and intuitive +constructs to indicate the structure of a document. These constructs +are equally easy to read in raw and processed forms. This document is +itself an example of reStructuredText (raw, if you are reading the +text file, or processed, if you are reading an HTML document, for +example). The reStructuredText parser is a component of [Docutils]. + +Simple, implicit markup is used to indicate special constructs, such +as section headings, bullet lists, and emphasis. The markup used is +as minimal and unobtrusive as possible. Less often-used constructs +and extensions to the basic reStructuredText syntax may have more +elaborate or explicit markup. + +reStructuredText is applicable to documents of any length, from the +very small (such as inline program documentation fragments, e.g. +Python docstrings) to the quite large (this document). + +The first section gives a quick overview of the syntax of the +reStructuredText markup by example. A complete specification is given +in the [Syntax Details] section. + +[Literal blocks] (in which no markup processing is done) are used for +examples throughout this document, to illustrate the plaintext markup. + +```{contents} +``` + +## Quick Syntax Overview + +A reStructuredText document is made up of body or block-level +elements, and may be structured into sections. [Sections] are +indicated through title style (underlines & optional overlines). +Sections contain body elements and/or subsections. Some body elements +contain further elements, such as lists containing list items, which +in turn may contain paragraphs and other body elements. Others, such +as paragraphs, contain text and [inline markup] elements. + +Here are examples of [body elements]: + +- [Paragraphs] (and [inline markup]): + + ``` + Paragraphs contain text and may contain inline markup: + *emphasis*, **strong emphasis**, `interpreted text`, ``inline + literals``, standalone hyperlinks (http://www.python.org), + external hyperlinks (Python_), internal cross-references + (example_), footnote references ([1]_), citation references + ([CIT2002]_), substitution references (|example|), and _`inline + internal targets`. + + Paragraphs are separated by blank lines and are left-aligned. + ``` + +- Five types of lists: + + 1. [Bullet lists]: + + ``` + - This is a bullet list. + + - Bullets can be "*", "+", or "-". + ``` + + 2. [Enumerated lists]: + + ``` + 1. This is an enumerated list. + + 2. Enumerators may be arabic numbers, letters, or roman + numerals. + ``` + + 3. [Definition lists]: + + ``` + what + Definition lists associate a term with a definition. + + how + The term is a one-line phrase, and the definition is one + or more paragraphs or body elements, indented relative to + the term. + ``` + + 4. [Field lists]: + + ``` + :what: Field lists map field names to field bodies, like + database records. They are often part of an extension + syntax. + + :how: The field marker is a colon, the field name, and a + colon. + + The field body may contain one or more body elements, + indented relative to the field marker. + ``` + + 5. [Option lists], for listing command-line options: + + ``` + -a command-line option "a" + -b file options can have arguments + and long descriptions + --long options can be long also + --input=file long options can also have + arguments + /V DOS/VMS-style options too + ``` + + There must be at least two spaces between the option and the + description. + +- [Literal blocks]: + + ``` + Literal blocks are either indented or line-prefix-quoted blocks, + and indicated with a double-colon ("::") at the end of the + preceding paragraph (right here -->):: + + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + ``` + +- [Block quotes]: + + ``` + Block quotes consist of indented body elements: + + This theory, that is mine, is mine. + + -- Anne Elk (Miss) + ``` + +- [Doctest blocks]: + + ``` + >>> print 'Python-specific usage examples; begun with ">>>"' + Python-specific usage examples; begun with ">>>" + >>> print '(cut and pasted from interactive Python sessions)' + (cut and pasted from interactive Python sessions) + ``` + +- Two syntaxes for [tables]: + + 1. [Grid tables]; complete, but complex and verbose: + + ``` + +------------------------+------------+----------+ + | Header row, column 1 | Header 2 | Header 3 | + +========================+============+==========+ + | body row 1, column 1 | column 2 | column 3 | + +------------------------+------------+----------+ + | body row 2 | Cells may span | + +------------------------+-----------------------+ + ``` + + 2. [Simple tables]; easy and compact, but limited: + + ``` + ==================== ========== ========== + Header row, column 1 Header 2 Header 3 + ==================== ========== ========== + body row 1, column 1 column 2 column 3 + body row 2 Cells may span columns + ==================== ====================== + ``` + +- [Explicit markup blocks] all begin with an explicit block marker, + two periods and a space: + + - [Footnotes]: + + ``` + .. [1] A footnote contains body elements, consistently + indented by at least 3 spaces. + ``` + + - [Citations]: + + ``` + .. [CIT2002] Just like a footnote, except the label is + textual. + ``` + + - [Hyperlink targets]: + + ``` + .. _Python: http://www.python.org + + .. _example: + + The "_example" target above points to this paragraph. + ``` + + - [Directives]: + + ``` + .. image:: mylogo.png + ``` + + - [Substitution definitions]: + + ``` + .. |symbol here| image:: symbol.png + ``` + + - [Comments]: + + ``` + .. Comments begin with two dots and a space. Anything may + follow, except for the syntax of footnotes/citations, + hyperlink targets, directives, or substitution definitions. + ``` + +## Syntax Details + +Descriptions below list "doctree elements" (document tree element +names; XML DTD generic identifiers) corresponding to syntax +constructs. For details on the hierarchy of elements, please see [The +Docutils Document Tree][the docutils document tree] and the [Docutils Generic DTD] XML document +type definition. + +### Whitespace + +Spaces are recommended for [indentation], but tabs may also be used. +Tabs will be converted to spaces. Tab stops are at every 8th column. + +Other whitespace characters (form feeds \[chr(12)\] and vertical tabs +\[chr(11)\]) are converted to single spaces before processing. + +#### Blank Lines + +Blank lines are used to separate paragraphs and other elements. +Multiple successive blank lines are equivalent to a single blank line, +except within literal blocks (where all whitespace is preserved). +Blank lines may be omitted when the markup makes element separation +unambiguous, in conjunction with indentation. The first line of a +document is treated as if it is preceded by a blank line, and the last +line of a document is treated as if it is followed by a blank line. + +#### Indentation + +Indentation is used to indicate -- and is only significant in +indicating -- block quotes, definitions (in definition list items), +and local nested content: + +- list item content (multi-line contents of list items, and multiple + body elements within a list item, including nested lists), +- the content of literal blocks, and +- the content of explicit markup blocks. + +Any text whose indentation is less than that of the current level +(i.e., unindented text or "dedents") ends the current level of +indentation. + +Since all indentation is significant, the level of indentation must be +consistent. For example, indentation is the sole markup indicator for +[block quotes]: + +``` +This is a top-level paragraph. + + This paragraph belongs to a first-level block quote. + + Paragraph 2 of the first-level block quote. +``` + +Multiple levels of indentation within a block quote will result in +more complex structures: + +``` +This is a top-level paragraph. + + This paragraph belongs to a first-level block quote. + + This paragraph belongs to a second-level block quote. + +Another top-level paragraph. + + This paragraph belongs to a second-level block quote. + + This paragraph belongs to a first-level block quote. The + second-level block quote above is inside this first-level + block quote. +``` + +When a paragraph or other construct consists of more than one line of +text, the lines must be left-aligned: + +``` +This is a paragraph. The lines of +this paragraph are aligned at the left. + + This paragraph has problems. The +lines are not left-aligned. In addition + to potential misinterpretation, warning + and/or error messages will be generated + by the parser. +``` + +Several constructs begin with a marker, and the body of the construct +must be indented relative to the marker. For constructs using simple +markers ([bullet lists], [enumerated lists], [footnotes], [citations], +[hyperlink targets], [directives], and [comments]), the level of +indentation of the body is determined by the position of the first +line of text, which begins on the same line as the marker. For +example, bullet list bodies must be indented by at least two columns +relative to the left edge of the bullet: + +``` +- This is the first line of a bullet list + item's paragraph. All lines must align + relative to the first line. [1]_ + + This indented paragraph is interpreted + as a block quote. + +Because it is not sufficiently indented, +this paragraph does not belong to the list +item. + +.. [1] Here's a footnote. The second line is aligned + with the beginning of the footnote label. The ".." + marker is what determines the indentation. +``` + +For constructs using complex markers ([field lists] and [option +lists][option lists]), where the marker may contain arbitrary text, the indentation +of the first line *after* the marker determines the left edge of the +body. For example, field lists may have very long markers (containing +the field names): + +``` +:Hello: This field has a short field name, so aligning the field + body with the first line is feasible. + +:Number-of-African-swallows-required-to-carry-a-coconut: It would + be very difficult to align the field body with the left edge + of the first line. It may even be preferable not to begin the + body on the same line as the marker. +``` + +### Escaping Mechanism + +The character set universally available to plaintext documents, 7-bit +ASCII, is limited. No matter what characters are used for markup, +they will already have multiple meanings in written text. Therefore +markup characters *will* sometimes appear in text **without being +intended as markup**. Any serious markup system requires an escaping +mechanism to override the default meaning of the characters used for +the markup. In reStructuredText we use the backslash, commonly used +as an escaping character in other domains. + +A backslash followed by any character (except whitespace characters) +escapes that character. The escaped character represents the +character itself, and is prevented from playing a role in any markup +interpretation. The backslash is removed from the output. A literal +backslash is represented by two backslashes in a row (the first +backslash "escapes" the second, preventing it being interpreted in an +"escaping" role). + +Backslash-escaped whitespace characters are removed from the document. +This allows for character-level [inline markup]. + +There are two contexts in which backslashes have no special meaning: +literal blocks and inline literals. In these contexts, a single +backslash represents a literal backslash, without having to double up. + +Please note that the reStructuredText specification and parser do not +address the issue of the representation or extraction of text input +(how and in what form the text actually *reaches* the parser). +Backslashes and other characters may serve a character-escaping +purpose in certain contexts and must be dealt with appropriately. For +example, Python uses backslashes in strings to escape certain +characters, but not others. The simplest solution when backslashes +appear in Python docstrings is to use raw docstrings: + +``` +r"""This is a raw docstring. Backslashes (\) are not touched.""" +``` + +### Reference Names + +Simple reference names are single words consisting of alphanumerics +plus isolated (no two adjacent) internal hyphens, underscores, +periods, colons and plus signs; no whitespace or other characters are +allowed. Footnote labels ([Footnotes] & [Footnote References]), citation +labels ([Citations] & [Citation References]), [interpreted text] roles, +and some [hyperlink references] use the simple reference name syntax. + +Reference names using punctuation or whose names are phrases (two or +more space-separated words) are called "phrase-references". +Phrase-references are expressed by enclosing the phrase in backquotes +and treating the backquoted text as a reference name: + +``` +Want to learn about `my favorite programming language`_? + +.. _my favorite programming language: http://www.python.org +``` + +Simple reference names may also optionally use backquotes. + +Reference names are whitespace-neutral and case-insensitive. When +resolving reference names internally: + +- whitespace is normalized (one or more spaces, horizontal or vertical + tabs, newlines, carriage returns, or form feeds, are interpreted as + a single space), and +- case is normalized (all alphabetic characters are converted to + lowercase). + +For example, the following [hyperlink references] are equivalent: + +``` +- `A HYPERLINK`_ +- `a hyperlink`_ +- `A + Hyperlink`_ +``` + +[Hyperlinks], [footnotes], and [citations] all share the same namespace +for reference names. The labels of citations (simple reference names) +and manually-numbered footnotes (numbers) are entered into the same +database as other hyperlink names. This means that a footnote +(defined as "`.. [1]`") which can be referred to by a footnote +reference (`[1]_`), can also be referred to by a plain hyperlink +reference ([1]). Of course, each type of reference (hyperlink, +footnote, citation) may be processed and rendered differently. Some +care should be taken to avoid reference name conflicts. + +### Document Structure + +#### Document + +Doctree element: document. + +The top-level element of a parsed reStructuredText document is the +"document" element. After initial parsing, the document element is a +simple container for a document fragment, consisting of [body +elements][body elements], [transitions], and [sections], but lacking a document title +or other bibliographic elements. The code that calls the parser may +choose to run one or more optional post-parse [transforms], +rearranging the document fragment into a complete document with a +title and possibly other metadata elements (author, date, etc.; see +[Bibliographic Fields]). + +Specifically, there is no way to indicate a document title and +subtitle explicitly in reStructuredText. Instead, a lone top-level +section title (see [Sections] below) can be treated as the document +title. Similarly, a lone second-level section title immediately after +the "document title" can become the document subtitle. The rest of +the sections are then lifted up a level or two. See the [DocTitle +transform][doctitle transform] for details. + +#### Sections + +Doctree elements: section, title. + +Sections are identified through their titles, which are marked up with +adornment: "underlines" below the title text, or underlines and +matching "overlines" above the title. An underline/overline is a +single repeated punctuation character that begins in column 1 and +forms a line extending at least as far as the right edge of the title +text. Specifically, an underline/overline character may be any +non-alphanumeric printable 7-bit ASCII character [^id2]. When an +overline is used, the length and character used must match the +underline. Underline-only adornment styles are distinct from +overline-and-underline styles that use the same character. There may +be any number of levels of section titles, although some output +formats may have limits (HTML has 6 levels). + +[^id2]: The following are all valid section title adornment + characters: + + ``` + ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ + ``` + + Some characters are more suitable than others. The following are + recommended: + + ``` + = - ` : . ' " ~ ^ _ * + # + ``` + +Rather than imposing a fixed number and order of section title +adornment styles, the order enforced will be the order as encountered. +The first style encountered will be an outermost title (like HTML H1), +the second style will be a subtitle, the third will be a subsubtitle, +and so on. + +Below are examples of section title styles: + +`````````````` +=============== + Section Title +=============== + +--------------- + Section Title +--------------- + +Section Title +============= + +Section Title +------------- + +Section Title +````````````` + +Section Title +''''''''''''' + +Section Title +............. + +Section Title +~~~~~~~~~~~~~ + +Section Title +************* + +Section Title ++++++++++++++ + +Section Title +^^^^^^^^^^^^^ +`````````````` + +When a title has both an underline and an overline, the title text may +be inset, as in the first two examples above. This is merely +aesthetic and not significant. Underline-only title text may *not* be +inset. + +A blank line after a title is optional. All text blocks up to the +next title of the same or higher level are included in a section (or +subsection, etc.). + +All section title styles need not be used, nor need any specific +section title style be used. However, a document must be consistent +in its use of section titles: once a hierarchy of title styles is +established, sections must use that hierarchy. + +Each section title automatically generates a hyperlink target pointing +to the section. The text of the hyperlink target (the "reference +name") is the same as that of the section title. See [Implicit +Hyperlink Targets][implicit hyperlink targets] for a complete description. + +Sections may contain [body elements], [transitions], and nested +sections. + +#### Transitions + +Doctree element: transition. + +> Instead of subheads, extra space or a type ornament between +> paragraphs may be used to mark text divisions or to signal +> changes in subject or emphasis. +> +> (The Chicago Manual of Style, 14th edition, section 1.80) + +Transitions are commonly seen in novels and short fiction, as a gap +spanning one or more lines, with or without a type ornament such as a +row of asterisks. Transitions separate other body elements. A +transition should not begin or end a section or document, nor should +two transitions be immediately adjacent. + +The syntax for a transition marker is a horizontal line of 4 or more +repeated punctuation characters. The syntax is the same as section +title underlines without title text. Transition markers require blank +lines before and after: + +``` +Para. + +---------- + +Para. +``` + +Unlike section title underlines, no hierarchy of transition markers is +enforced, nor do differences in transition markers accomplish +anything. It is recommended that a single consistent style be used. + +The processing system is free to render transitions in output in any +way it likes. For example, horizontal rules (`<hr>`) in HTML output +would be an obvious choice. + +### Body Elements + +#### Paragraphs + +Doctree element: paragraph. + +Paragraphs consist of blocks of left-aligned text with no markup +indicating any other body element. Blank lines separate paragraphs +from each other and from other body elements. Paragraphs may contain +[inline markup]. + +Syntax diagram: + +``` ++------------------------------+ +| paragraph | +| | ++------------------------------+ + ++------------------------------+ +| paragraph | +| | ++------------------------------+ +``` + +#### Bullet Lists + +Doctree elements: bullet_list, list_item. + +A text block which begins with a "\*", "+", "-", "•", "‣", or "⁃", +followed by whitespace, is a bullet list item (a.k.a. "unordered" list +item). List item bodies must be left-aligned and indented relative to +the bullet; the text immediately after the bullet determines the +indentation. For example: + +``` +- This is the first bullet list item. The blank line above the + first list item is required; blank lines between list items + (such as below this paragraph) are optional. + +- This is the first paragraph in the second item in the list. + + This is the second paragraph in the second item in the list. + The blank line above this paragraph is required. The left edge + of this paragraph lines up with the paragraph above, both + indented relative to the bullet. + + - This is a sublist. The bullet lines up with the left edge of + the text blocks above. A sublist is a new list so requires a + blank line above and below. + +- This is the third item of the main list. + +This paragraph is not part of the list. +``` + +Here are examples of **incorrectly** formatted bullet lists: + +``` +- This first line is fine. +A blank line is required between list items and paragraphs. +(Warning) + +- The following line appears to be a new sublist, but it is not: + - This is a paragraph continuation, not a sublist (since there's + no blank line). This line is also incorrectly indented. + - Warnings may be issued by the implementation. +``` + +Syntax diagram: + +``` ++------+-----------------------+ +| "- " | list item | ++------| (body elements)+ | + +-----------------------+ +``` + +#### Enumerated Lists + +Doctree elements: enumerated_list, list_item. + +Enumerated lists (a.k.a. "ordered" lists) are similar to bullet lists, +but use enumerators instead of bullets. An enumerator consists of an +enumeration sequence member and formatting, followed by whitespace. +The following enumeration sequences are recognized: + +- arabic numerals: 1, 2, 3, ... (no upper limit). +- uppercase alphabet characters: A, B, C, ..., Z. +- lower-case alphabet characters: a, b, c, ..., z. +- uppercase Roman numerals: I, II, III, IV, ..., MMMMCMXCIX (4999). +- lowercase Roman numerals: i, ii, iii, iv, ..., mmmmcmxcix (4999). + +In addition, the auto-enumerator, "#", may be used to automatically +enumerate a list. Auto-enumerated lists may begin with explicit +enumeration, which sets the sequence. Fully auto-enumerated lists use +arabic numerals and begin with 1. (Auto-enumerated lists are new in +Docutils 0.3.8.) + +The following formatting types are recognized: + +- suffixed with a period: "1.", "A.", "a.", "I.", "i.". +- surrounded by parentheses: "(1)", "(A)", "(a)", "(I)", "(i)". +- suffixed with a right-parenthesis: "1)", "A)", "a)", "I)", "i)". + +While parsing an enumerated list, a new list will be started whenever: + +- An enumerator is encountered which does not have the same format and + sequence type as the current list (e.g. "1.", "(a)" produces two + separate lists). +- The enumerators are not in sequence (e.g., "1.", "3." produces two + separate lists). + +It is recommended that the enumerator of the first list item be +ordinal-1 ("1", "A", "a", "I", or "i"). Although other start-values +will be recognized, they may not be supported by the output format. A +level-1 \[info\] system message will be generated for any list beginning +with a non-ordinal-1 enumerator. + +Lists using Roman numerals must begin with "I"/"i" or a +multi-character value, such as "II" or "XV". Any other +single-character Roman numeral ("V", "X", "L", "C", "D", "M") will be +interpreted as a letter of the alphabet, not as a Roman numeral. +Likewise, lists using letters of the alphabet may not begin with +"I"/"i", since these are recognized as Roman numeral 1. + +The second line of each enumerated list item is checked for validity. +This is to prevent ordinary paragraphs from being mistakenly +interpreted as list items, when they happen to begin with text +identical to enumerators. For example, this text is parsed as an +ordinary paragraph: + +``` +A. Einstein was a really +smart dude. +``` + +However, ambiguity cannot be avoided if the paragraph consists of only +one line. This text is parsed as an enumerated list item: + +``` +A. Einstein was a really smart dude. +``` + +If a single-line paragraph begins with text identical to an enumerator +("A.", "1.", "(b)", "I)", etc.), the first character will have to be +escaped in order to have the line parsed as an ordinary paragraph: + +``` +\A. Einstein was a really smart dude. +``` + +Examples of nested enumerated lists: + +``` +1. Item 1 initial text. + + a) Item 1a. + b) Item 1b. + +2. a) Item 2a. + b) Item 2b. +``` + +Example syntax diagram: + +``` ++-------+----------------------+ +| "1. " | list item | ++-------| (body elements)+ | + +----------------------+ +``` + +#### Definition Lists + +Doctree elements: definition_list, definition_list_item, term, +classifier, definition. + +Each definition list item contains a term, optional classifiers, and a +definition. A term is a simple one-line word or phrase. Optional +classifiers may follow the term on the same line, each after an inline +" : " (space, colon, space). A definition is a block indented +relative to the term, and may contain multiple paragraphs and other +body elements. There may be no blank line between a term line and a +definition block (this distinguishes definition lists from [block +quotes][block quotes]). Blank lines are required before the first and after the +last definition list item, but are optional in-between. For example: + +``` +term 1 + Definition 1. + +term 2 + Definition 2, paragraph 1. + + Definition 2, paragraph 2. + +term 3 : classifier + Definition 3. + +term 4 : classifier one : classifier two + Definition 4. +``` + +Inline markup is parsed in the term line before the classifier +delimiter (" : ") is recognized. The delimiter will only be +recognized if it appears outside of any inline markup. + +A definition list may be used in various ways, including: + +- As a dictionary or glossary. The term is the word itself, a + classifier may be used to indicate the usage of the term (noun, + verb, etc.), and the definition follows. +- To describe program variables. The term is the variable name, a + classifier may be used to indicate the type of the variable (string, + integer, etc.), and the definition describes the variable's use in + the program. This usage of definition lists supports the classifier + syntax of [Grouch], a system for describing and enforcing a Python + object schema. + +Syntax diagram: + +``` ++----------------------------+ +| term [ " : " classifier ]* | ++--+-------------------------+--+ + | definition | + | (body elements)+ | + +----------------------------+ +``` + +#### Field Lists + +Doctree elements: field_list, field, field_name, field_body. + +Field lists are used as part of an extension syntax, such as options +for [directives], or database-like records meant for further +processing. They may also be used for two-column table-like +structures resembling database records (label & data pairs). +Applications of reStructuredText may recognize field names and +transform fields or field bodies in certain contexts. For examples, +see [Bibliographic Fields] below, or the "[image]" and "[meta]" +directives in [reStructuredText Directives]. + +Field lists are mappings from field names to field bodies, modeled on +[RFC822] headers. A field name may consist of any characters, but +colons (":") inside of field names must be escaped with a backslash. +Inline markup is parsed in field names. Field names are +case-insensitive when further processed or transformed. The field +name, along with a single colon prefix and suffix, together form the +field marker. The field marker is followed by whitespace and the +field body. The field body may contain multiple body elements, +indented relative to the field marker. The first line after the field +name marker determines the indentation of the field body. For +example: + +``` +:Date: 2001-08-16 +:Version: 1 +:Authors: - Me + - Myself + - I +:Indentation: Since the field marker may be quite long, the second + and subsequent lines of the field body do not have to line up + with the first line, but they must be indented relative to the + field name marker, and they must line up with each other. +:Parameter i: integer +``` + +The interpretation of individual words in a multi-word field name is +up to the application. The application may specify a syntax for the +field name. For example, second and subsequent words may be treated +as "arguments", quoted phrases may be treated as a single argument, +and direct support for the "name=value" syntax may be added. + +Standard [RFC822] headers cannot be used for this construct because +they are ambiguous. A word followed by a colon at the beginning of a +line is common in written text. However, in well-defined contexts +such as when a field list invariably occurs at the beginning of a +document (PEPs and email messages), standard RFC822 headers could be +used. + +Syntax diagram (simplified): + +``` ++--------------------+----------------------+ +| ":" field name ":" | field body | ++-------+------------+ | + | (body elements)+ | + +-----------------------------------+ +``` + +##### Bibliographic Fields + +Doctree elements: docinfo, author, authors, organization, contact, +version, status, date, copyright, field, topic. + +When a field list is the first non-comment element in a document +(after the document title, if there is one), it may have its fields +transformed to document bibliographic data. This bibliographic data +corresponds to the front matter of a book, such as the title page and +copyright page. + +Certain registered field names (listed below) are recognized and +transformed to the corresponding doctree elements, most becoming child +elements of the "docinfo" element. No ordering is required of these +fields, although they may be rearranged to fit the document structure, +as noted. Unless otherwise indicated below, each of the bibliographic +elements' field bodies may contain a single paragraph only. Field +bodies may be checked for [RCS keywords] and cleaned up. Any +unrecognized fields will remain as generic fields in the docinfo +element. + +The registered bibliographic field names and their corresponding +doctree elements are as follows: + +- Field name "Author": author element. +- "Authors": authors. +- "Organization": organization. +- "Contact": contact. +- "Address": address. +- "Version": version. +- "Status": status. +- "Date": date. +- "Copyright": copyright. +- "Dedication": topic. +- "Abstract": topic. + +The "Authors" field may contain either: a single paragraph consisting +of a list of authors, separated by ";" or ","; or a bullet list whose +elements each contain a single paragraph per author. ";" is checked +first, so "Doe, Jane; Doe, John" will work. In some languages +(e.g. Swedish), there is no singular/plural distinction between +"Author" and "Authors", so only an "Authors" field is provided, and a +single name is interpreted as an "Author". If a single name contains +a comma, end it with a semicolon to disambiguate: ":Authors: Doe, +Jane;". + +The "Address" field is for a multi-line surface mailing address. +Newlines and whitespace will be preserved. + +The "Dedication" and "Abstract" fields may contain arbitrary body +elements. Only one of each is allowed. They become topic elements +with "Dedication" or "Abstract" titles (or language equivalents) +immediately following the docinfo element. + +This field-name-to-element mapping can be replaced for other +languages. See the [DocInfo transform] implementation documentation +for details. + +Unregistered/generic fields may contain one or more paragraphs or +arbitrary body elements. + +##### RCS Keywords + +[Bibliographic fields] recognized by the parser are normally checked +for RCS [^id6] keywords and cleaned up [^id7]. RCS keywords may be +entered into source files as "\$keyword\$", and once stored under RCS or +CVS [^id8], they are expanded to "\$keyword: expansion text \$". For +example, a "Status" field will be transformed to a "status" element: + +``` +:Status: $keyword: expansion text $ +``` + +[^id6]: Revision Control System. + +[^id7]: RCS keyword processing can be turned off (unimplemented). + +[^id8]: Concurrent Versions System. CVS uses the same keywords as RCS. + +Processed, the "status" element's text will become simply "expansion +text". The dollar sign delimiters and leading RCS keyword name are +removed. + +The RCS keyword processing only kicks in when the field list is in +bibliographic context (first non-comment construct in the document, +after a document title if there is one). + +#### Option Lists + +Doctree elements: option_list, option_list_item, option_group, option, +option_string, option_argument, description. + +Option lists are two-column lists of command-line options and +descriptions, documenting a program's options. For example: + +``` +-a Output all. +-b Output both (this description is + quite long). +-c arg Output just arg. +--long Output all day long. + +-p This option has two paragraphs in the description. + This is the first. + + This is the second. Blank lines may be omitted between + options (as above) or left in (as here and below). + +--very-long-option A VMS-style option. Note the adjustment for + the required two spaces. + +--an-even-longer-option + The description can also start on the next line. + +-2, --two This option has two variants. + +-f FILE, --file=FILE These two options are synonyms; both have + arguments. + +/V A VMS/DOS-style option. +``` + +There are several types of options recognized by reStructuredText: + +- Short POSIX options consist of one dash and an option letter. +- Long POSIX options consist of two dashes and an option word; some + systems use a single dash. +- Old GNU-style "plus" options consist of one plus and an option + letter ("plus" options are deprecated now, their use discouraged). +- DOS/VMS options consist of a slash and an option letter or word. + +Please note that both POSIX-style and DOS/VMS-style options may be +used by DOS or Windows software. These and other variations are +sometimes used mixed together. The names above have been chosen for +convenience only. + +The syntax for short and long POSIX options is based on the syntax +supported by Python's [getopt.py] module, which implements an option +parser similar to the [GNU libc getopt_long()] function but with some +restrictions. There are many variant option systems, and +reStructuredText option lists do not support all of them. + +Although long POSIX and DOS/VMS option words may be allowed to be +truncated by the operating system or the application when used on the +command line, reStructuredText option lists do not show or support +this with any special syntax. The complete option word should be +given, supported by notes about truncation if and when applicable. + +Options may be followed by an argument placeholder, whose role and +syntax should be explained in the description text. Either a space or +an equals sign may be used as a delimiter between options and option +argument placeholders; short options ("-" or "+" prefix only) may omit +the delimiter. Option arguments may take one of two forms: + +- Begins with a letter (`[a-zA-Z]`) and subsequently consists of + letters, numbers, underscores and hyphens (`[a-zA-Z0-9_-]`). +- Begins with an open-angle-bracket (`<`) and ends with a + close-angle-bracket (`>`); any characters except angle brackets + are allowed internally. + +Multiple option "synonyms" may be listed, sharing a single +description. They must be separated by comma-space. + +There must be at least two spaces between the option(s) and the +description. The description may contain multiple body elements. The +first line after the option marker determines the indentation of the +description. As with other types of lists, blank lines are required +before the first option list item and after the last, but are optional +between option entries. + +Syntax diagram (simplified): + +``` ++----------------------------+-------------+ +| option [" " argument] " " | description | ++-------+--------------------+ | + | (body elements)+ | + +----------------------------------+ +``` + +#### Literal Blocks + +Doctree element: literal_block. + +A paragraph consisting of two colons ("::") signifies that the +following text block(s) comprise a literal block. The literal block +must either be indented or quoted (see below). No markup processing +is done within a literal block. It is left as-is, and is typically +rendered in a monospaced typeface: + +``` +This is a typical paragraph. An indented literal block follows. + +:: + + for a in [5,4,3,2,1]: # this is program code, shown as-is + print a + print "it's..." + # a literal block continues until the indentation ends + +This text has returned to the indentation of the first paragraph, +is outside of the literal block, and is therefore treated as an +ordinary paragraph. +``` + +The paragraph containing only "::" will be completely removed from the +output; no empty paragraph will remain. + +As a convenience, the "::" is recognized at the end of any paragraph. +If immediately preceded by whitespace, both colons will be removed +from the output (this is the "partially minimized" form). When text +immediately precedes the "::", *one* colon will be removed from the +output, leaving only one colon visible (i.e., "::" will be replaced by +":"; this is the "fully minimized" form). + +In other words, these are all equivalent (please pay attention to the +colons after "Paragraph"): + +1. Expanded form: + + ``` + Paragraph: + + :: + + Literal block + ``` + +2. Partially minimized form: + + ``` + Paragraph: :: + + Literal block + ``` + +3. Fully minimized form: + + ``` + Paragraph:: + + Literal block + ``` + +All whitespace (including line breaks, but excluding minimum +indentation for indented literal blocks) is preserved. Blank lines +are required before and after a literal block, but these blank lines +are not included as part of the literal block. + +##### Indented Literal Blocks + +Indented literal blocks are indicated by indentation relative to the +surrounding text (leading whitespace on each line). The minimum +indentation will be removed from each line of an indented literal +block. The literal block need not be contiguous; blank lines are +allowed between sections of indented text. The literal block ends +with the end of the indentation. + +Syntax diagram: + +``` ++------------------------------+ +| paragraph | +| (ends with "::") | ++------------------------------+ + +---------------------------+ + | indented literal block | + +---------------------------+ +``` + +##### Quoted Literal Blocks + +Quoted literal blocks are unindented contiguous blocks of text where +each line begins with the same non-alphanumeric printable 7-bit ASCII +character [^id10]. A blank line ends a quoted literal block. The +quoting characters are preserved in the processed document. + +[^id10]: The following are all valid quoting characters: + + ``` + ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ + ``` + + Note that these are the same characters as are valid for title + adornment of [sections]. + +Possible uses include literate programming in Haskell and email +quoting: + +``` +John Doe wrote:: + +>> Great idea! +> +> Why didn't I think of that? + +You just did! ;-) +``` + +Syntax diagram: + +``` ++------------------------------+ +| paragraph | +| (ends with "::") | ++------------------------------+ ++------------------------------+ +| ">" per-line-quoted | +| ">" contiguous literal block | ++------------------------------+ +``` + +#### Line Blocks + +Doctree elements: line_block, line. (New in Docutils 0.3.5.) + +Line blocks are useful for address blocks, verse (poetry, song +lyrics), and unadorned lists, where the structure of lines is +significant. Line blocks are groups of lines beginning with vertical +bar ("|") prefixes. Each vertical bar prefix indicates a new line, so +line breaks are preserved. Initial indents are also significant, +resulting in a nested structure. Inline markup is supported. +Continuation lines are wrapped portions of long lines; they begin with +a space in place of the vertical bar. The left edge of a continuation +line must be indented, but need not be aligned with the left edge of +the text above it. A line block ends with a blank line. + +This example illustrates continuation lines: + +``` +| Lend us a couple of bob till Thursday. +| I'm absolutely skint. +| But I'm expecting a postal order and I can pay you back + as soon as it comes. +| Love, Ewan. +``` + +This example illustrates the nesting of line blocks, indicated by the +initial indentation of new lines: + +``` +Take it away, Eric the Orchestra Leader! + + | A one, two, a one two three four + | + | Half a bee, philosophically, + | must, *ipso facto*, half not be. + | But half the bee has got to be, + | *vis a vis* its entity. D'you see? + | + | But can a bee be said to be + | or not to be an entire bee, + | when half the bee is not a bee, + | due to some ancient injury? + | + | Singing... +``` + +Syntax diagram: + +``` ++------+-----------------------+ +| "| " | line | ++------| continuation line | + +-----------------------+ +``` + +#### Block Quotes + +Doctree element: block_quote, attribution. + +A text block that is indented relative to the preceding text, without +preceding markup indicating it to be a literal block or other content, +is a block quote. All markup processing (for body elements and inline +markup) continues within the block quote: + +``` +This is an ordinary paragraph, introducing a block quote. + + "It is my business to know things. That is my trade." + + -- Sherlock Holmes +``` + +A block quote may end with an attribution: a text block beginning with +"--", "---", or a true em-dash, flush left within the block quote. If +the attribution consists of multiple lines, the left edges of the +second and subsequent lines must align. + +Multiple block quotes may occur consecutively if terminated with +attributions. + +> Unindented paragraph. +> +> > Block quote 1. +> > +> > <p class="attribution">-Attribution 1</p> +> +> > Block quote 2. + +[Empty comments] may be used to explicitly terminate preceding +constructs that would otherwise consume a block quote: + +``` +* List item. + +.. + + Block quote 3. +``` + +Empty comments may also be used to separate block quotes: + +``` + Block quote 4. + +.. + + Block quote 5. +``` + +Blank lines are required before and after a block quote, but these +blank lines are not included as part of the block quote. + +Syntax diagram: + +``` ++------------------------------+ +| (current level of | +| indentation) | ++------------------------------+ + +---------------------------+ + | block quote | + | (body elements)+ | + | | + | -- attribution text | + | (optional) | + +---------------------------+ +``` + +#### Doctest Blocks + +Doctree element: doctest_block. + +Doctest blocks are interactive Python sessions cut-and-pasted into +docstrings. They are meant to illustrate usage by example, and +provide an elegant and powerful testing environment via the [doctest +module][doctest module] in the Python standard library. + +Doctest blocks are text blocks which begin with `">>> "`, the Python +interactive interpreter main prompt, and end with a blank line. +Doctest blocks are treated as a special case of literal blocks, +without requiring the literal block syntax. If both are present, the +literal block syntax takes priority over Doctest block syntax: + +``` +This is an ordinary paragraph. + +>>> print 'this is a Doctest block' +this is a Doctest block + +The following is a literal block:: + + >>> This is not recognized as a doctest block by + reStructuredText. It *will* be recognized by the doctest + module, though! +``` + +Indentation is not required for doctest blocks. + +#### Tables + +Doctree elements: table, tgroup, colspec, thead, tbody, row, entry. + +ReStructuredText provides two syntaxes for delineating table cells: +[Grid Tables] and [Simple Tables]. + +As with other body elements, blank lines are required before and after +tables. Tables' left edges should align with the left edge of +preceding text blocks; if indented, the table is considered to be part +of a block quote. + +Once isolated, each table cell is treated as a miniature document; the +top and bottom cell boundaries act as delimiting blank lines. Each +cell contains zero or more body elements. Cell contents may include +left and/or right margins, which are removed before processing. + +##### Grid Tables + +Grid tables provide a complete table representation via grid-like +"ASCII art". Grid tables allow arbitrary cell contents (body +elements), and both row and column spans. However, grid tables can be +cumbersome to produce, especially for simple data sets. The [Emacs +table mode][emacs table mode] is a tool that allows easy editing of grid tables, in +Emacs. See [Simple Tables] for a simpler (but limited) +representation. + +Grid tables are described with a visual grid made up of the characters +"-", "=", "|", and "+". The hyphen ("-") is used for horizontal lines +(row separators). The equals sign ("=") may be used to separate +optional header rows from the table body (not supported by the [Emacs +table mode][emacs table mode]). The vertical bar ("|") is used for vertical lines +(column separators). The plus sign ("+") is used for intersections of +horizontal and vertical lines. Example: + +``` ++------------------------+------------+----------+----------+ +| Header row, column 1 | Header 2 | Header 3 | Header 4 | +| (header rows optional) | | | | ++========================+============+==========+==========+ +| body row 1, column 1 | column 2 | column 3 | column 4 | ++------------------------+------------+----------+----------+ +| body row 2 | Cells may span columns. | ++------------------------+------------+---------------------+ +| body row 3 | Cells may | - Table cells | ++------------------------+ span rows. | - contain | +| body row 4 | | - body elements. | ++------------------------+------------+---------------------+ +``` + +Some care must be taken with grid tables to avoid undesired +interactions with cell text in rare cases. For example, the following +table contains a cell in row 2 spanning from column 2 to column 4: + +``` ++--------------+----------+-----------+-----------+ +| row 1, col 1 | column 2 | column 3 | column 4 | ++--------------+----------+-----------+-----------+ +| row 2 | | ++--------------+----------+-----------+-----------+ +| row 3 | | | | ++--------------+----------+-----------+-----------+ +``` + +If a vertical bar is used in the text of that cell, it could have +unintended effects if accidentally aligned with column boundaries: + +``` ++--------------+----------+-----------+-----------+ +| row 1, col 1 | column 2 | column 3 | column 4 | ++--------------+----------+-----------+-----------+ +| row 2 | Use the command ``ls | more``. | ++--------------+----------+-----------+-----------+ +| row 3 | | | | ++--------------+----------+-----------+-----------+ +``` + +Several solutions are possible. All that is needed is to break the +continuity of the cell outline rectangle. One possibility is to shift +the text by adding an extra space before: + +``` ++--------------+----------+-----------+-----------+ +| row 1, col 1 | column 2 | column 3 | column 4 | ++--------------+----------+-----------+-----------+ +| row 2 | Use the command ``ls | more``. | ++--------------+----------+-----------+-----------+ +| row 3 | | | | ++--------------+----------+-----------+-----------+ +``` + +Another possibility is to add an extra line to row 2: + +``` ++--------------+----------+-----------+-----------+ +| row 1, col 1 | column 2 | column 3 | column 4 | ++--------------+----------+-----------+-----------+ +| row 2 | Use the command ``ls | more``. | +| | | ++--------------+----------+-----------+-----------+ +| row 3 | | | | ++--------------+----------+-----------+-----------+ +``` + +##### Simple Tables + +Simple tables provide a compact and easy to type but limited +row-oriented table representation for simple data sets. Cell contents +are typically single paragraphs, although arbitrary body elements may +be represented in most cells. Simple tables allow multi-line rows (in +all but the first column) and column spans, but not row spans. See +[Grid Tables] above for a complete table representation. + +Simple tables are described with horizontal borders made up of "=" and +"-" characters. The equals sign ("=") is used for top and bottom +table borders, and to separate optional header rows from the table +body. The hyphen ("-") is used to indicate column spans in a single +row by underlining the joined columns, and may optionally be used to +explicitly and/or visually separate rows. + +A simple table begins with a top border of equals signs with one or +more spaces at each column boundary (two or more spaces recommended). +Regardless of spans, the top border *must* fully describe all table +columns. There must be at least two columns in the table (to +differentiate it from section headers). The top border may be +followed by header rows, and the last of the optional header rows is +underlined with '=', again with spaces at column boundaries. There +may not be a blank line below the header row separator; it would be +interpreted as the bottom border of the table. The bottom boundary of +the table consists of '=' underlines, also with spaces at column +boundaries. For example, here is a truth table, a three-column table +with one header row and four body rows: + +``` +===== ===== ======= + A B A and B +===== ===== ======= +False False False +True False False +False True False +True True True +===== ===== ======= +``` + +Underlines of '-' may be used to indicate column spans by "filling in" +column margins to join adjacent columns. Column span underlines must +be complete (they must cover all columns) and align with established +column boundaries. Text lines containing column span underlines may +not contain any other text. A column span underline applies only to +one row immediately above it. For example, here is a table with a +column span in the header: + +``` +===== ===== ====== + Inputs Output +------------ ------ + A B A or B +===== ===== ====== +False False False +True False True +False True True +True True True +===== ===== ====== +``` + +Each line of text must contain spaces at column boundaries, except +where cells have been joined by column spans. Each line of text +starts a new row, except when there is a blank cell in the first +column. In that case, that line of text is parsed as a continuation +line. For this reason, cells in the first column of new rows (*not* +continuation lines) *must* contain some text; blank cells would lead +to a misinterpretation (but see the tip below). Also, this mechanism +limits cells in the first column to only one line of text. Use [grid +tables][grid tables] if this limitation is unacceptable. + +:::{Tip} +To start a new row in a simple table without text in the first +column in the processed output, use one of these: + +- an empty comment (".."), which may be omitted from the processed + output (see [Comments] below) +- a backslash escape ("`\`") followed by a space (see [Escaping + Mechanism][escaping mechanism] above) +::: + +Underlines of '-' may also be used to visually separate rows, even if +there are no column spans. This is especially useful in long tables, +where rows are many lines long. + +Blank lines are permitted within simple tables. Their interpretation +depends on the context. Blank lines *between* rows are ignored. +Blank lines *within* multi-line rows may separate paragraphs or other +body elements within cells. + +The rightmost column is unbounded; text may continue past the edge of +the table (as indicated by the table borders). However, it is +recommended that borders be made long enough to contain the entire +text. + +The following example illustrates continuation lines (row 2 consists +of two lines of text, and four lines for row 3), a blank line +separating paragraphs (row 3, column 2), text extending past the right +edge of the table, and a new row which will have no text in the first +column in the processed output (row 4): + +``` +===== ===== +col 1 col 2 +===== ===== +1 Second column of row 1. +2 Second column of row 2. + Second line of paragraph. +3 - Second column of row 3. + + - Second item in bullet + list (row 3, column 2). +\ Row 4; column 1 will be empty. +===== ===== +``` + +#### Explicit Markup Blocks + +An explicit markup block is a text block: + +- whose first line begins with ".." followed by whitespace (the + "explicit markup start"), +- whose second and subsequent lines (if any) are indented relative to + the first, and +- which ends before an unindented line. + +Explicit markup blocks are analogous to bullet list items, with ".." +as the bullet. The text on the lines immediately after the explicit +markup start determines the indentation of the block body. The +maximum common indentation is always removed from the second and +subsequent lines of the block body. Therefore if the first construct +fits in one line, and the indentation of the first and second +constructs should differ, the first construct should not begin on the +same line as the explicit markup start. + +Blank lines are required between explicit markup blocks and other +elements, but are optional between explicit markup blocks where +unambiguous. + +The explicit markup syntax is used for footnotes, citations, hyperlink +targets, directives, substitution definitions, and comments. + +##### Footnotes + +Doctree elements: footnote, label. + +Each footnote consists of an explicit markup start (".. "), a left +square bracket, the footnote label, a right square bracket, and +whitespace, followed by indented body elements. A footnote label can +be: + +- a whole decimal number consisting of one or more digits, +- a single "#" (denoting [auto-numbered footnotes]), +- a "#" followed by a simple reference name (an [autonumber label]), + or +- a single "\*" (denoting [auto-symbol footnotes]). + +The footnote content (body elements) must be consistently indented (by +at least 3 spaces) and left-aligned. The first body element within a +footnote may often begin on the same line as the footnote label. +However, if the first element fits on one line and the indentation of +the remaining elements differ, the first element must begin on the +line after the footnote label. Otherwise, the difference in +indentation will not be detected. + +Footnotes may occur anywhere in the document, not only at the end. +Where and how they appear in the processed output depends on the +processing system. + +Here is a manually numbered footnote: + +``` +.. [1] Body elements go here. +``` + +Each footnote automatically generates a hyperlink target pointing to +itself. The text of the hyperlink target name is the same as that of +the footnote label. [Auto-numbered footnotes] generate a number as +their footnote label and reference name. See [Implicit Hyperlink +Targets][implicit hyperlink targets] for a complete description of the mechanism. + +Syntax diagram: + +``` ++-------+-------------------------+ +| ".. " | "[" label "]" footnote | ++-------+ | + | (body elements)+ | + +-------------------------+ +``` + +###### Auto-Numbered Footnotes + +A number sign ("#") may be used as the first character of a footnote +label to request automatic numbering of the footnote or footnote +reference. + +The first footnote to request automatic numbering is assigned the +label "1", the second is assigned the label "2", and so on (assuming +there are no manually numbered footnotes present; see [Mixed Manual +and Auto-Numbered Footnotes][mixed manual and auto-numbered footnotes] below). A footnote which has +automatically received a label "1" generates an implicit hyperlink +target with name "1", just as if the label was explicitly specified. + +(autonumber label)= + +A footnote may specify a label explicitly while at the same time +requesting automatic numbering: `[#label]`. These labels are called +`` _`autonumber labels` ``. Autonumber labels do two things: + +- On the footnote itself, they generate a hyperlink target whose name + is the autonumber label (doesn't include the "#"). + +- They allow an automatically numbered footnote to be referred to more + than once, as a footnote reference or hyperlink reference. For + example: + + ``` + If [#note]_ is the first footnote reference, it will show up as + "[1]". We can refer to it again as [#note]_ and again see + "[1]". We can also refer to it as note_ (an ordinary internal + hyperlink reference). + + .. [#note] This is the footnote labeled "note". + ``` + +The numbering is determined by the order of the footnotes, not by the +order of the references. For footnote references without autonumber +labels (`[#]_`), the footnotes and footnote references must be in +the same relative order but need not alternate in lock-step. For +example: + +``` +[#]_ is a reference to footnote 1, and [#]_ is a reference to +footnote 2. + +.. [#] This is footnote 1. +.. [#] This is footnote 2. +.. [#] This is footnote 3. + +[#]_ is a reference to footnote 3. +``` + +Special care must be taken if footnotes themselves contain +auto-numbered footnote references, or if multiple references are made +in close proximity. Footnotes and references are noted in the order +they are encountered in the document, which is not necessarily the +same as the order in which a person would read them. + +###### Auto-Symbol Footnotes + +An asterisk ("\*") may be used for footnote labels to request automatic +symbol generation for footnotes and footnote references. The asterisk +may be the only character in the label. For example: + +``` +Here is a symbolic footnote reference: [*]_. + +.. [*] This is the footnote. +``` + +A transform will insert symbols as labels into corresponding footnotes +and footnote references. The number of references must be equal to +the number of footnotes. One symbol footnote cannot have multiple +references. + +The standard Docutils system uses the following symbols for footnote +marks [^id12]: + +- asterisk/star ("\*") +- dagger (HTML character entity "\†", Unicode U+02020) +- double dagger ("\‡"/U+02021) +- section mark ("\§"/U+000A7) +- pilcrow or paragraph mark ("\¶"/U+000B6) +- number sign ("#") +- spade suit ("\♠"/U+02660) +- heart suit ("\♥"/U+02665) +- diamond suit ("\♦"/U+02666) +- club suit ("\♣"/U+02663) + +[^id12]: This list was inspired by the list of symbols for "Note + Reference Marks" in The Chicago Manual of Style, 14th edition, + section 12.51. "Parallels" ("||") were given in CMoS instead of + the pilcrow. The last four symbols (the card suits) were added + arbitrarily. + +If more than ten symbols are required, the same sequence will be +reused, doubled and then tripled, and so on ("\*\*" etc.). + +:::{Note} +When using auto-symbol footnotes, the choice of output +encoding is important. Many of the symbols used are not encodable +in certain common text encodings such as Latin-1 (ISO 8859-1). The +use of UTF-8 for the output encoding is recommended. An +alternative for HTML and XML output is to use the +"xmlcharrefreplace" [output encoding error handler](../../user/config.html#output-encoding-error-handler). +::: + +###### Mixed Manual and Auto-Numbered Footnotes + +Manual and automatic footnote numbering may both be used within a +single document, although the results may not be expected. Manual +numbering takes priority. Only unused footnote numbers are assigned +to auto-numbered footnotes. The following example should be +illustrative: + +``` +[2]_ will be "2" (manually numbered), +[#]_ will be "3" (anonymous auto-numbered), and +[#label]_ will be "1" (labeled auto-numbered). + +.. [2] This footnote is labeled manually, so its number is fixed. + +.. [#label] This autonumber-labeled footnote will be labeled "1". + It is the first auto-numbered footnote and no other footnote + with label "1" exists. The order of the footnotes is used to + determine numbering, not the order of the footnote references. + +.. [#] This footnote will be labeled "3". It is the second + auto-numbered footnote, but footnote label "2" is already used. +``` + +##### Citations + +Citations are identical to footnotes except that they use only +non-numeric labels such as `[note]` or `[GVR2001]`. Citation +labels are simple [reference names] (case-insensitive single words +consisting of alphanumerics plus internal hyphens, underscores, and +periods; no whitespace). Citations may be rendered separately and +differently from footnotes. For example: + +``` +Here is a citation reference: [CIT2002]_. + +.. [CIT2002] This is the citation. It's just like a footnote, + except the label is textual. +``` + +(hyperlinks)= + +##### Hyperlink Targets + +Doctree element: target. + +These are also called `` _`explicit hyperlink targets` ``, to differentiate +them from [implicit hyperlink targets] defined below. + +Hyperlink targets identify a location within or outside of a document, +which may be linked to by [hyperlink references]. + +Hyperlink targets may be named or anonymous. Named hyperlink targets +consist of an explicit markup start (".. "), an underscore, the +reference name (no trailing underscore), a colon, whitespace, and a +link block: + +``` +.. _hyperlink-name: link-block +``` + +Reference names are whitespace-neutral and case-insensitive. See +[Reference Names] for details and examples. + +Anonymous hyperlink targets consist of an explicit markup start +(".. "), two underscores, a colon, whitespace, and a link block; there +is no reference name: + +``` +.. __: anonymous-hyperlink-target-link-block +``` + +An alternate syntax for anonymous hyperlinks consists of two +underscores, a space, and a link block: + +``` +__ anonymous-hyperlink-target-link-block +``` + +See [Anonymous Hyperlinks] below. + +There are three types of hyperlink targets: internal, external, and +indirect. + +1. `` _`Internal hyperlink targets` `` have empty link blocks. They provide + an end point allowing a hyperlink to connect one place to another + within a document. An internal hyperlink target points to the + element following the target. For example: + + ``` + Clicking on this internal hyperlink will take us to the target_ + below. + + .. _target: + + The hyperlink target above points to this paragraph. + ``` + + Internal hyperlink targets may be "chained". Multiple adjacent + internal hyperlink targets all point to the same element: + + ``` + .. _target1: + .. _target2: + + The targets "target1" and "target2" are synonyms; they both + point to this paragraph. + ``` + + If the element "pointed to" is an external hyperlink target (with a + URI in its link block; see #2 below) the URI from the external + hyperlink target is propagated to the internal hyperlink targets; + they will all "point to" the same URI. There is no need to + duplicate a URI. For example, all three of the following hyperlink + targets refer to the same URI: + + ``` + .. _Python DOC-SIG mailing list archive: + .. _archive: + .. _Doc-SIG: http://mail.python.org/pipermail/doc-sig/ + ``` + + An inline form of internal hyperlink target is available; see + [Inline Internal Targets]. + +2. `` _`External hyperlink targets` `` have an absolute or relative URI or + email address in their link blocks. For example, take the + following input: + + ``` + See the Python_ home page for info. + + `Write to me`_ with your questions. + + .. _Python: http://www.python.org + .. _Write to me: jdoe@example.com + ``` + + After processing into HTML, the hyperlinks might be expressed as: + + ``` + See the <a href="http://www.python.org">Python</a> home page + for info. + + <a href="mailto:jdoe@example.com">Write to me</a> with your + questions. + ``` + + An external hyperlink's URI may begin on the same line as the + explicit markup start and target name, or it may begin in an + indented text block immediately following, with no intervening + blank lines. If there are multiple lines in the link block, they + are concatenated. Any whitespace is removed (whitespace is + permitted to allow for line wrapping). The following external + hyperlink targets are equivalent: + + ``` + .. _one-liner: http://docutils.sourceforge.net/rst.html + + .. _starts-on-this-line: http:// + docutils.sourceforge.net/rst.html + + .. _entirely-below: + http://docutils. + sourceforge.net/rst.html + ``` + + If an external hyperlink target's URI contains an underscore as its + last character, it must be escaped to avoid being mistaken for an + indirect hyperlink target: + + ``` + This link_ refers to a file called ``underscore_``. + + .. _link: underscore\_ + ``` + + It is possible (although not generally recommended) to include URIs + directly within hyperlink references. See [Embedded URIs and Aliases] + below. + +3. `` _`Indirect hyperlink targets` `` have a hyperlink reference in their + link blocks. In the following example, target "one" indirectly + references whatever target "two" references, and target "two" + references target "three", an internal hyperlink target. In + effect, all three reference the same thing: + + ``` + .. _one: two_ + .. _two: three_ + .. _three: + ``` + + Just as with [hyperlink references] anywhere else in a document, + if a phrase-reference is used in the link block it must be enclosed + in backquotes. As with [external hyperlink targets], the link + block of an indirect hyperlink target may begin on the same line as + the explicit markup start or the next line. It may also be split + over multiple lines, in which case the lines are joined with + whitespace before being normalized. + + For example, the following indirect hyperlink targets are + equivalent: + + ``` + .. _one-liner: `A HYPERLINK`_ + .. _entirely-below: + `a hyperlink`_ + .. _split: `A + Hyperlink`_ + ``` + + It is possible to include an alias directly within hyperlink + references. See [Embedded URIs and Aliases] below. + +If the reference name contains any colons, either: + +- the phrase must be enclosed in backquotes: + + ``` + .. _`FAQTS: Computers: Programming: Languages: Python`: + http://python.faqts.com/ + ``` + +- or the colon(s) must be backslash-escaped in the link target: + + ``` + .. _Chapter One\: "Tadpole Days": + + It's not easy being green... + ``` + +See [Implicit Hyperlink Targets] below for the resolution of +duplicate reference names. + +Syntax diagram: + +``` ++-------+----------------------+ +| ".. " | "_" name ":" link | ++-------+ block | + | | + +----------------------+ +``` + +###### Anonymous Hyperlinks + +The [World Wide Web Consortium] recommends in its [HTML Techniques +for Web Content Accessibility Guidelines][html techniques for web content accessibility guidelines] that authors should +"clearly identify the target of each link." Hyperlink references +should be as verbose as possible, but duplicating a verbose hyperlink +name in the target is onerous and error-prone. Anonymous hyperlinks +are designed to allow convenient verbose hyperlink references, and are +analogous to [Auto-Numbered Footnotes]. They are particularly useful +in short or one-off documents. However, this feature is easily abused +and can result in unreadable plaintext and/or unmaintainable +documents. Caution is advised. + +Anonymous [hyperlink references] are specified with two underscores +instead of one: + +``` +See `the web site of my favorite programming language`__. +``` + +Anonymous targets begin with ".. \_\_:"; no reference name is required +or allowed: + +``` +.. __: http://www.python.org +``` + +As a convenient alternative, anonymous targets may begin with "\_\_" +only: + +``` +__ http://www.python.org +``` + +The reference name of the reference is not used to match the reference +to its target. Instead, the order of anonymous hyperlink references +and targets within the document is significant: the first anonymous +reference will link to the first anonymous target. The number of +anonymous hyperlink references in a document must match the number of +anonymous targets. For readability, it is recommended that targets be +kept close to references. Take care when editing text containing +anonymous references; adding, removing, and rearranging references +require attention to the order of corresponding targets. + +##### Directives + +Doctree elements: depend on the directive. + +Directives are an extension mechanism for reStructuredText, a way of +adding support for new constructs without adding new primary syntax +(directives may support additional syntax locally). All standard +directives (those implemented and registered in the reference +reStructuredText parser) are described in the [reStructuredText +Directives][restructuredtext directives] document, and are always available. Any other directives +are domain-specific, and may require special action to make them +available when processing the document. + +For example, here's how an [image] may be placed: + +``` +.. image:: mylogo.jpeg +``` + +A [figure] (a graphic with a caption) may placed like this: + +``` +.. figure:: larch.png + + The larch. +``` + +An [admonition] (note, caution, etc.) contains other body elements: + +``` +.. note:: This is a paragraph + + - Here is a bullet list. +``` + +Directives are indicated by an explicit markup start (".. ") followed +by the directive type, two colons, and whitespace (together called the +"directive marker"). Directive types are case-insensitive single +words (alphanumerics plus isolated internal hyphens, underscores, +plus signs, colons, and periods; no whitespace). Two colons are used +after the directive type for these reasons: + +- Two colons are distinctive, and unlikely to be used in common text. + +- Two colons avoids clashes with common comment text like: + + ``` + .. Danger: modify at your own risk! + ``` + +- If an implementation of reStructuredText does not recognize a + directive (i.e., the directive-handler is not installed), a level-3 + (error) system message is generated, and the entire directive block + (including the directive itself) will be included as a literal + block. Thus "::" is a natural choice. + +The directive block is consists of any text on the first line of the +directive after the directive marker, and any subsequent indented +text. The interpretation of the directive block is up to the +directive code. There are three logical parts to the directive block: + +1. Directive arguments. +2. Directive options. +3. Directive content. + +Individual directives can employ any combination of these parts. +Directive arguments can be filesystem paths, URLs, title text, etc. +Directive options are indicated using [field lists]; the field names +and contents are directive-specific. Arguments and options must form +a contiguous block beginning on the first or second line of the +directive; a blank line indicates the beginning of the directive +content block. If either arguments and/or options are employed by the +directive, a blank line must separate them from the directive content. +The "figure" directive employs all three parts: + +``` +.. figure:: larch.png + :scale: 50 + + The larch. +``` + +Simple directives may not require any content. If a directive that +does not employ a content block is followed by indented text anyway, +it is an error. If a block quote should immediately follow a +directive, use an empty comment in-between (see [Comments] below). + +Actions taken in response to directives and the interpretation of text +in the directive content block or subsequent text block(s) are +directive-dependent. See [reStructuredText Directives] for details. + +Directives are meant for the arbitrary processing of their contents, +which can be transformed into something possibly unrelated to the +original text. It may also be possible for directives to be used as +pragmas, to modify the behavior of the parser, such as to experiment +with alternate syntax. There is no parser support for this +functionality at present; if a reasonable need for pragma directives +is found, they may be supported. + +Directives do not generate "directive" elements; they are a *parser +construct* only, and have no intrinsic meaning outside of +reStructuredText. Instead, the parser will transform recognized +directives into (possibly specialized) document elements. Unknown +directives will trigger level-3 (error) system messages. + +Syntax diagram: + +``` ++-------+-------------------------------+ +| ".. " | directive type "::" directive | ++-------+ block | + | | + +-------------------------------+ +``` + +##### Substitution Definitions + +Doctree element: substitution_definition. + +Substitution definitions are indicated by an explicit markup start +(".. ") followed by a vertical bar, the substitution text, another +vertical bar, whitespace, and the definition block. Substitution text +may not begin or end with whitespace. A substitution definition block +contains an embedded inline-compatible directive (without the leading +".. "), such as "[image]" or "[replace]". For example: + +``` +The |biohazard| symbol must be used on containers used to +dispose of medical waste. + +.. |biohazard| image:: biohazard.png +``` + +It is an error for a substitution definition block to directly or +indirectly contain a circular substitution reference. + +[Substitution references] are replaced in-line by the processed +contents of the corresponding definition (linked by matching +substitution text). Matches are case-sensitive but forgiving; if no +exact match is found, a case-insensitive comparison is attempted. + +Substitution definitions allow the power and flexibility of +block-level [directives] to be shared by inline text. They are a way +to include arbitrarily complex inline structures within text, while +keeping the details out of the flow of text. They are the equivalent +of SGML/XML's named entities or programming language macros. + +Without the substitution mechanism, every time someone wants an +application-specific new inline structure, they would have to petition +for a syntax change. In combination with existing directive syntax, +any inline structure can be coded without new syntax (except possibly +a new directive). + +Syntax diagram: + +``` ++-------+-----------------------------------------------------+ +| ".. " | "|" substitution text "| " directive type "::" data | ++-------+ directive block | + | | + +-----------------------------------------------------+ +``` + +Following are some use cases for the substitution mechanism. Please +note that most of the embedded directives shown are examples only and +have not been implemented. + +Objects + +: Substitution references may be used to associate ambiguous text + with a unique object identifier. + + For example, many sites may wish to implement an inline "user" + directive: + + ``` + |Michael| and |Jon| are our widget-wranglers. + + .. |Michael| user:: mjones + .. |Jon| user:: jhl + ``` + + Depending on the needs of the site, this may be used to index the + document for later searching, to hyperlink the inline text in + various ways (mailto, homepage, mouseover Javascript with profile + and contact information, etc.), or to customize presentation of + the text (include username in the inline text, include an icon + image with a link next to the text, make the text bold or a + different color, etc.). + + The same approach can be used in documents which frequently refer + to a particular type of objects with unique identifiers but + ambiguous common names. Movies, albums, books, photos, court + cases, and laws are possible. For example: + + ``` + |The Transparent Society| offers a fascinating alternate view + on privacy issues. + + .. |The Transparent Society| book:: isbn=0738201448 + ``` + + Classes or functions, in contexts where the module or class names + are unclear and/or interpreted text cannot be used, are another + possibility: + + ``` + 4XSLT has the convenience method |runString|, so you don't + have to mess with DOM objects if all you want is the + transformed output. + + .. |runString| function:: module=xml.xslt class=Processor + ``` + +Images + +: Images are a common use for substitution references: + + ``` + West led the |H| 3, covered by dummy's |H| Q, East's |H| K, + and trumped in hand with the |S| 2. + + .. |H| image:: /images/heart.png + :height: 11 + :width: 11 + .. |S| image:: /images/spade.png + :height: 11 + :width: 11 + + * |Red light| means stop. + * |Green light| means go. + * |Yellow light| means go really fast. + + .. |Red light| image:: red_light.png + .. |Green light| image:: green_light.png + .. |Yellow light| image:: yellow_light.png + + |-><-| is the official symbol of POEE_. + + .. |-><-| image:: discord.png + .. _POEE: http://www.poee.org/ + ``` + + The "[image]" directive has been implemented. + +Styles [^id15] + +: Substitution references may be used to associate inline text with + an externally defined presentation style: + + ``` + Even |the text in Texas| is big. + + .. |the text in Texas| style:: big + ``` + + The style name may be meaningful in the context of some particular + output format (CSS class name for HTML output, LaTeX style name + for LaTeX, etc), or may be ignored for other output formats (such + as plaintext). + + % @@@ This needs to be rethought & rewritten or removed: + % + % Interpreted text is unsuitable for this purpose because the set + % of style names cannot be predefined - it is the domain of the + % content author, not the author of the parser and output + % formatter - and there is no way to associate a style name + % argument with an interpreted text style role. Also, it may be + % desirable to use the same mechanism for styling blocks:: + % + % .. style:: motto + % At Bob's Underwear Shop, we'll do anything to get in + % your pants. + % + % .. style:: disclaimer + % All rights reversed. Reprint what you like. + + [^id15]: There may be sufficient need for a "style" mechanism to + warrant simpler syntax such as an extension to the interpreted + text role syntax. The substitution mechanism is cumbersome for + simple text styling. + +Templates + +: Inline markup may be used for later processing by a template + engine. For example, a [Zope] author might write: + + ``` + Welcome back, |name|! + + .. |name| tal:: replace user/getUserName + ``` + + After processing, this ZPT output would result: + + ``` + Welcome back, + <span tal:replace="user/getUserName">name</span>! + ``` + + Zope would then transform this to something like "Welcome back, + David!" during a session with an actual user. + +Replacement text + +: The substitution mechanism may be used for simple macro + substitution. This may be appropriate when the replacement text + is repeated many times throughout one or more documents, + especially if it may need to change later. A short example is + unavoidably contrived: + + ``` + |RST|_ is a little annoying to type over and over, especially + when writing about |RST| itself, and spelling out the + bicapitalized word |RST| every time isn't really necessary for + |RST| source readability. + + .. |RST| replace:: reStructuredText + .. _RST: http://docutils.sourceforge.net/rst.html + ``` + + Note the trailing underscore in the first use of a substitution + reference. This indicates a reference to the corresponding + hyperlink target. + + Substitution is also appropriate when the replacement text cannot + be represented using other inline constructs, or is obtrusively + long: + + ``` + But still, that's nothing compared to a name like + |j2ee-cas|__. + + .. |j2ee-cas| replace:: + the Java `TM`:super: 2 Platform, Enterprise Edition Client + Access Services + __ http://developer.java.sun.com/developer/earlyAccess/ + j2eecas/ + ``` + + The "[replace]" directive has been implemented. + +##### Comments + +Doctree element: comment. + +Arbitrary indented text may follow the explicit markup start and will +be processed as a comment element. No further processing is done on +the comment block text; a comment contains a single "text blob". +Depending on the output formatter, comments may be removed from the +processed output. The only restriction on comments is that they not +use the same syntax as any of the other explicit markup constructs: +substitution definitions, directives, footnotes, citations, or +hyperlink targets. To ensure that none of the other explicit markup +constructs is recognized, leave the ".." on a line by itself: + +``` +.. This is a comment +.. + _so: is this! +.. + [and] this! +.. + this:: too! +.. + |even| this:: ! +``` + +(empty-comments)= + +An explicit markup start followed by a blank line and nothing else +(apart from whitespace) is an "`` _`empty comment` ``". It serves to +terminate a preceding construct, and does **not** consume any indented +text following. To have a block quote follow a list or any indented +construct, insert an unindented empty comment in-between. + +Syntax diagram: + +``` ++-------+----------------------+ +| ".. " | comment | ++-------+ block | + | | + +----------------------+ +``` + +### Implicit Hyperlink Targets + +Implicit hyperlink targets are generated by section titles, footnotes, +and citations, and may also be generated by extension constructs. +Implicit hyperlink targets otherwise behave identically to explicit +[hyperlink targets]. + +Problems of ambiguity due to conflicting duplicate implicit and +explicit reference names are avoided by following this procedure: + +1. [Explicit hyperlink targets] override any implicit targets having + the same reference name. The implicit hyperlink targets are + removed, and level-1 (info) system messages are inserted. +2. Duplicate implicit hyperlink targets are removed, and level-1 + (info) system messages inserted. For example, if two or more + sections have the same title (such as "Introduction" subsections of + a rigidly-structured document), there will be duplicate implicit + hyperlink targets. +3. Duplicate explicit hyperlink targets are removed, and level-2 + (warning) system messages are inserted. Exception: duplicate + [external hyperlink targets] (identical hyperlink names and + referenced URIs) do not conflict, and are not removed. + +System messages are inserted where target links have been removed. +See "Error Handling" in [PEP 258]. + +The parser must return a set of *unique* hyperlink targets. The +calling software (such as the [Docutils]) can warn of unresolvable +links, giving reasons for the messages. + +### Inline Markup + +In reStructuredText, inline markup applies to words or phrases within +a text block. The same whitespace and punctuation that serves to +delimit words in written text is used to delimit the inline markup +syntax constructs. The text within inline markup may not begin or end +with whitespace. Arbitrary [character-level inline markup] is +supported although not encouraged. Inline markup cannot be nested. + +There are nine inline markup constructs. Five of the constructs use +identical start-strings and end-strings to indicate the markup: + +- [emphasis]: "\*" +- [strong emphasis]: "\*\*" +- [interpreted text]: "\`" +- [inline literals]: "\`\`" +- [substitution references]: "|" + +Three constructs use different start-strings and end-strings: + +- [inline internal targets]: "\_\`" and "\`" +- [footnote references]: "\[" and "\]\_" +- [hyperlink references]: "\`" and "\`\_" (phrases), or just a + trailing "\_" (single words) + +[Standalone hyperlinks] are recognized implicitly, and use no extra +markup. + +#### Inline markup recognition rules + +Inline markup start-strings and end-strings are only recognized if all of +the following conditions are met: + +1. Inline markup start-strings must start a text block or be + immediately preceded by + + - whitespace, + - one of the ASCII characters `- : / ' " < ( [ {` or + - a non-ASCII punctuation character with [Unicode category] + `Pd` (Dash), + `Po` (Other), + `Ps` (Open), + `Pi` (Initial quote), or + `Pf` (Final quote) [^pipf]. + +2. Inline markup start-strings must be immediately followed by + non-whitespace. + +3. Inline markup end-strings must be immediately preceded by + non-whitespace. + +4. Inline markup end-strings must end a text block or be immediately + followed by + + - whitespace, + - one of the ASCII characters `- . , : ; ! ? \ / ' " ) ] } >` or + - a non-ASCII punctuation character with [Unicode category] + `Pd` (Dash), + `Po` (Other), + `Pe` (Close), + `Pf` (Final quote), or + `Pi` (Initial quote) [^pipf]. + +5. If an inline markup start-string is immediately preceded by one of the + ASCII characters `' " < ( [ {`, or a character with Unicode character + category `Ps`, `Pi`, or `Pf`, it must not be followed by the + corresponding [^corresponding-quotes] closing character from + `' " ) ] } >` or the categories `Pe`, `Pf`, or `Pi`. + +6. An inline markup end-string must be separated by at least one + character from the start-string. + +7. An unescaped backslash preceding a start-string or end-string will + disable markup recognition, except for the end-string of [inline + literals][inline literals]. See [Escaping Mechanism] above for details. + +[^pipf]: `Pi` (Punctuation, Initial quote) characters are "usually + closing, sometimes opening". `Pf` (Punctuation, Final quote) + characters are "usually closing, sometimes opening". + +[^corresponding-quotes]: For quotes, corresponding characters can be + any of the [quotation marks in international usage] + +The inline markup recognition rules were devised to allow 90% of non-markup +uses of "\*", "\`", "\_", and "|" without escaping. For example, none of the +following terms are recognized as containing inline markup strings: + +- 2\*x a\*\*b O(N\*\*2) e\*\*(x\*y) f(x)\*f(y) a|b file\*.\* (breaks 1) +- 2 * x a \*\* b (\* BOM32\_\* \` \`\` _ \_\_ | (breaks 2) +- "\*" '|' (\*) \[\*\] {\*} \<\*> + ‘\*’ ‚\*‘ ‘\*‚ ’\*’ ‚\*’ + “\*” „\*“ “\*„ ”\*” „\*” + »\*« ›\*‹ «\*» »\*» ›\*› (breaks 5) +- || (breaks 6) +- \_\_init\_\_ \_\_init\_\_() + +No escaping is required inside the following inline markup examples: + +- *2 * x \*a \*\*b \*.txt* (breaks 3) +- *2\*x a\*\*b O(N\*\*2) e\*\*(x\*y) f(x)\*f(y) a\*(1+2)* (breaks 4) + +It may be desirable to use [inline literals] for some of these anyhow, +especially if they represent code snippets. It's a judgment call. + +These cases *do* require either literal-quoting or escaping to avoid +misinterpretation: + +> \*4, class\_, \*args, \*\*kwargs, \`TeX-quoted', \*ML, \*.txt + +In most use cases, [inline literals] or [literal blocks] are the best +choice (by default, this also selects a monospaced font): + +``` +*4, class_, *args, **kwargs, `TeX-quoted', *ML, *.txt +``` + +#### Recognition order + +Inline markup delimiter characters are used for multiple constructs, +so to avoid ambiguity there must be a specific recognition order for +each character. The inline markup recognition order is as follows: + +- Asterisks: [Strong emphasis] ("\*\*") is recognized before [emphasis] + ("\*"). +- Backquotes: [Inline literals] ("\`\`"), [inline internal targets] + (leading "\_\`", trailing "\`"), are mutually independent, and are + recognized before phrase [hyperlink references] (leading "\`", + trailing "\`\_") and [interpreted text] ("\`"). +- Trailing underscores: Footnote references ("\[" + label + "\]\_") and + simple [hyperlink references] (name + trailing "\_") are mutually + independent. +- Vertical bars: [Substitution references] ("|") are independently + recognized. +- [Standalone hyperlinks] are the last to be recognized. + +#### Character-Level Inline Markup + +It is possible to mark up individual characters within a word with +backslash escapes (see [Escaping Mechanism] above). Backslash +escapes can be used to allow arbitrary text to immediately follow +inline markup: + +``` +Python ``list``\s use square bracket syntax. +``` + +The backslash will disappear from the processed document. The word +"list" will appear as inline literal text, and the letter "s" will +immediately follow it as normal text, with no space in-between. + +Arbitrary text may immediately precede inline markup using +backslash-escaped whitespace: + +``` +Possible in *re*\ ``Structured``\ *Text*, though not encouraged. +``` + +The backslashes and spaces separating "re", "Structured", and "Text" +above will disappear from the processed document. + +:::{CAUTION} +The use of backslash-escapes for character-level inline markup is +not encouraged. Such use is ugly and detrimental to the +unprocessed document's readability. Please use this feature +sparingly and only where absolutely necessary. +::: + +#### Emphasis + +Doctree element: emphasis. + +Start-string = end-string = "\*". + +Text enclosed by single asterisk characters is emphasized: + +``` +This is *emphasized text*. +``` + +Emphasized text is typically displayed in italics. + +#### Strong Emphasis + +Doctree element: strong. + +Start-string = end-string = "\*\*". + +Text enclosed by double-asterisks is emphasized strongly: + +``` +This is **strong text**. +``` + +Strongly emphasized text is typically displayed in boldface. + +#### Interpreted Text + +Doctree element: depends on the explicit or implicit role and +processing. + +Start-string = end-string = "\`". + +Interpreted text is text that is meant to be related, indexed, linked, +summarized, or otherwise processed, but the text itself is typically +left alone. Interpreted text is enclosed by single backquote +characters: + +``` +This is `interpreted text`. +``` + +The "role" of the interpreted text determines how the text is +interpreted. The role may be inferred implicitly (as above; the +"default role" is used) or indicated explicitly, using a role marker. +A role marker consists of a colon, the role name, and another colon. +A role name is a single word consisting of alphanumerics plus isolated +internal hyphens, underscores, plus signs, colons, and periods; +no whitespace or other characters are allowed. A role marker is +either a prefix or a suffix to the interpreted text, whichever reads +better; it's up to the author: + +``` +:role:`interpreted text` + +`interpreted text`:role: +``` + +Interpreted text allows extensions to the available inline descriptive +markup constructs. To [emphasis], [strong emphasis], [inline +literals][inline literals], and [hyperlink references], we can add "title reference", +"index entry", "acronym", "class", "red", "blinking" or anything else +we want. Only pre-determined roles are recognized; unknown roles will +generate errors. A core set of standard roles is implemented in the +reference parser; see [reStructuredText Interpreted Text Roles] for +individual descriptions. The [role] directive can be used to define +custom interpreted text roles. In addition, applications may support +specialized roles. + +#### Inline Literals + +Doctree element: literal. + +Start-string = end-string = "\`\`". + +Text enclosed by double-backquotes is treated as inline literals: + +``` +This text is an example of ``inline literals``. +``` + +Inline literals may contain any characters except two adjacent +backquotes in an end-string context (according to the recognition +rules above). No markup interpretation (including backslash-escape +interpretation) is done within inline literals. + +Line breaks are *not* preserved in inline literals. Although a +reStructuredText parser will preserve runs of spaces in its output, +the final representation of the processed document is dependent on the +output formatter, thus the preservation of whitespace cannot be +guaranteed. If the preservation of line breaks and/or other +whitespace is important, [literal blocks] should be used. + +Inline literals are useful for short code snippets. For example: + +``` +The regular expression ``[+-]?(\d+(\.\d*)?|\.\d+)`` matches +floating-point numbers (without exponents). +``` + +#### Hyperlink References + +Doctree element: reference. + +- Named hyperlink references: + + - Start-string = "" (empty string), end-string = "\_". + - Start-string = "\`", end-string = "\`\_". (Phrase references.) + +- Anonymous hyperlink references: + + - Start-string = "" (empty string), end-string = "\_\_". + - Start-string = "\`", end-string = "\`\_\_". (Phrase references.) + +Hyperlink references are indicated by a trailing underscore, "\_", +except for [standalone hyperlinks] which are recognized +independently. The underscore can be thought of as a right-pointing +arrow. The trailing underscores point away from hyperlink references, +and the leading underscores point toward [hyperlink targets]. + +Hyperlinks consist of two parts. In the text body, there is a source +link, a reference name with a trailing underscore (or two underscores +for [anonymous hyperlinks]): + +``` +See the Python_ home page for info. +``` + +A target link with a matching reference name must exist somewhere else +in the document. See [Hyperlink Targets] for a full description). + +[Anonymous hyperlinks] (which see) do not use reference names to +match references to targets, but otherwise behave similarly to named +hyperlinks. + +##### Embedded URIs and Aliases + +A hyperlink reference may directly embed a target URI or (since +Docutils 0.11) a hyperlink reference within angle brackets ("\<...>") +as follows: + +``` +See the `Python home page <http://www.python.org>`_ for info. + +This `link <Python home page_>`_ is an alias to the link above. +``` + +This is exactly equivalent to: + +``` +See the `Python home page`_ for info. + +This link_ is an alias to the link above. + +.. _Python home page: http://www.python.org +.. _link: `Python home page`_ +``` + +The bracketed URI must be preceded by whitespace and be the last text +before the end string. + +With a single trailing underscore, the reference is named and the same +target URI may be referred to again. +With two trailing underscores, the reference and target are both +anonymous, and the target cannot be referred to again. These are +"one-off" hyperlinks. For example: + +``` +`RFC 2396 <http://www.rfc-editor.org/rfc/rfc2396.txt>`__ and `RFC +2732 <http://www.rfc-editor.org/rfc/rfc2732.txt>`__ together +define the syntax of URIs. +``` + +Equivalent to: + +``` +`RFC 2396`__ and `RFC 2732`__ together define the syntax of URIs. + +__ http://www.rfc-editor.org/rfc/rfc2396.txt +__ http://www.rfc-editor.org/rfc/rfc2732.txt +``` + +[Standalone hyperlinks] are treated as URIs, even if they end with an +underscore like in the example of a Python function documentation: + +``` +`__init__ <http:example.py.html#__init__>`__ +``` + +If a target URI that is not recognized as [standalone hyperlink] happens +to end with an underscore, this needs to be backslash-escaped to avoid +being parsed as hyperlink reference. For example + +``` +Use the `source <parrots.txt\_>`__. +``` + +creates an anonymous reference to the file `parrots.txt_`. + +If the reference text happens to end with angle-bracketed text that is +*not* a URI or hyperlink reference, at least one angle-bracket needs to +be backslash-escaped or an escaped space should follow. For example, here +are three references to titles describing a tag: + +``` +See `HTML Element: \<a>`_, `HTML Element: <b\> `_, and +`HTML Element: <c>\ `_. +``` + +The reference text may also be omitted, in which case the URI will be +duplicated for use as the reference text. This is useful for relative +URIs where the address or file name is also the desired reference +text: + +``` +See `<a_named_relative_link>`_ or `<an_anonymous_relative_link>`__ +for details. +``` + +:::{CAUTION} +This construct offers easy authoring and maintenance of hyperlinks +at the expense of general readability. Inline URIs, especially +long ones, inevitably interrupt the natural flow of text. For +documents meant to be read in source form, the use of independent +block-level [hyperlink targets] is **strongly recommended**. The +embedded URI construct is most suited to documents intended *only* +to be read in processed form. +::: + +#### Inline Internal Targets + +Doctree element: target. + +Start-string = "\_\`", end-string = "\`". + +Inline internal targets are the equivalent of explicit [internal +hyperlink targets][internal hyperlink targets], but may appear within running text. The syntax +begins with an underscore and a backquote, is followed by a hyperlink +name or phrase, and ends with a backquote. Inline internal targets +may not be anonymous. + +For example, the following paragraph contains a hyperlink target named +"Norwegian Blue": + +``` +Oh yes, the _`Norwegian Blue`. What's, um, what's wrong with it? +``` + +See [Implicit Hyperlink Targets] for the resolution of duplicate +reference names. + +#### Footnote References + +Doctree element: footnote_reference. + +Start-string = "\[", end-string = "\]\_". + +Each footnote reference consists of a square-bracketed label followed +by a trailing underscore. Footnote labels are one of: + +- one or more digits (i.e., a number), +- a single "#" (denoting [auto-numbered footnotes]), +- a "#" followed by a simple reference name (an [autonumber label]), + or +- a single "\*" (denoting [auto-symbol footnotes]). + +For example: + +``` +Please RTFM [1]_. + +.. [1] Read The Fine Manual +``` + +#### Citation References + +Doctree element: citation_reference. + +Start-string = "\[", end-string = "\]\_". + +Each citation reference consists of a square-bracketed label followed +by a trailing underscore. Citation labels are simple [reference +names][reference names] (case-insensitive single words, consisting of alphanumerics +plus internal hyphens, underscores, and periods; no whitespace). + +For example: + +``` +Here is a citation reference: [CIT2002]_. +``` + +See [Citations] for the citation itself. + +#### Substitution References + +Doctree element: substitution_reference, reference. + +Start-string = "|", end-string = "|" (optionally followed by "\_" or +"\_\_"). + +Vertical bars are used to bracket the substitution reference text. A +substitution reference may also be a hyperlink reference by appending +a "\_" (named) or "\_\_" (anonymous) suffix; the substitution text is +used for the reference text in the named case. + +The processing system replaces substitution references with the +processed contents of the corresponding [substitution definitions] +(which see for the definition of "correspond"). Substitution +definitions produce inline-compatible elements. + +Examples: + +``` +This is a simple |substitution reference|. It will be replaced by +the processing system. + +This is a combination |substitution and hyperlink reference|_. In +addition to being replaced, the replacement text or element will +refer to the "substitution and hyperlink reference" target. +``` + +(standalone-hyperlink)= + +#### Standalone Hyperlinks + +Doctree element: reference. + +Start-string = end-string = "" (empty string). + +A URI (absolute URI [^uri] or standalone email address) within a text +block is treated as a general external hyperlink with the URI itself +as the link's text. For example: + +``` +See http://www.python.org for info. +``` + +would be marked up in HTML as: + +``` +See <a href="http://www.python.org">http://www.python.org</a> for +info. +``` + +Two forms of URI are recognized: + +1. Absolute URIs. These consist of a scheme, a colon (":"), and a + scheme-specific part whose interpretation depends on the scheme. + + The scheme is the name of the protocol, such as "http", "ftp", + "mailto", or "telnet". The scheme consists of an initial letter, + followed by letters, numbers, and/or "+", "-", ".". Recognition is + limited to known schemes, per the [Official IANA Registry of URI + Schemes][official iana registry of uri schemes] and the W3C's [Retired Index of WWW Addressing Schemes]. + + The scheme-specific part of the resource identifier may be either + hierarchical or opaque: + + - Hierarchical identifiers begin with one or two slashes and may + use slashes to separate hierarchical components of the path. + Examples are web pages and FTP sites: + + ``` + http://www.python.org + + ftp://ftp.python.org/pub/python + ``` + + - Opaque identifiers do not begin with slashes. Examples are + email addresses and newsgroups: + + ``` + mailto:someone@somewhere.com + + news:comp.lang.python + ``` + + With queries, fragments, and %-escape sequences, URIs can become + quite complicated. A reStructuredText parser must be able to + recognize any absolute URI, as defined in [RFC2396] and [RFC2732]. + +2. Standalone email addresses, which are treated as if they were + absolute URIs with a "mailto:" scheme. Example: + + ``` + someone@somewhere.com + ``` + +Punctuation at the end of a URI is not considered part of the URI, +unless the URI is terminated by a closing angle bracket (">"). +Backslashes may be used in URIs to escape markup characters, +specifically asterisks ("\*") and underscores ("\_") which are vaid URI +characters (see [Escaping Mechanism] above). + +[^uri]: Uniform Resource Identifier. URIs are a general form of + URLs (Uniform Resource Locators). For the syntax of URIs see + [RFC2396] and [RFC2732]. + +### Units + +(New in Docutils 0.3.10.) + +All measures consist of a positive floating point number in standard +(non-scientific) notation and a unit, possibly separated by one or +more spaces. + +Units are only supported where explicitly mentioned in the reference +manuals. + +#### Length Units + +The following length units are supported by the reStructuredText +parser: + +- em (ems, the height of the element's font) +- ex (x-height, the height of the letter "x") +- px (pixels, relative to the canvas resolution) +- in (inches; 1in=2.54cm) +- cm (centimeters; 1cm=10mm) +- mm (millimeters) +- pt (points; 1pt=1/72in) +- pc (picas; 1pc=12pt) + +This set corresponds to the [length units in CSS]. + +(List and explanations taken from +<http://www.htmlhelp.com/reference/css/units.html#length>.) + +The following are all valid length values: "1.5em", "20 mm", ".5in". + +Length values without unit are completed with a writer-dependent +default (e.g. px with `html4css1`, pt with `latex2e`). See the writer +specific documentation in the [user doc](../../user/) for details. + +#### Percentage Units + +Percentage values have a percent sign ("%") as unit. Percentage +values are relative to other values, depending on the context in which +they occur. + +## Error Handling + +Doctree element: system_message, problematic. + +Markup errors are handled according to the specification in [PEP +258][pep 258]. + +(doctitle-transform)= + +% Local Variables: +% mode: indented-text +% indent-tabs-mode: nil +% sentence-end-double-space: t +% fill-column: 70 +% End: + +[a restructuredtext primer]: ../../user/rst/quickstart.html +[admonition]: directives.html#admonitions +[docinfo transform]: http://docutils.sourceforge.net/docutils/transforms/frontmatter.py +[doctest module]: http://www.python.org/doc/current/lib/module-doctest.html +[doctitle transform]: http://docutils.sourceforge.net/docutils/transforms/frontmatter.py +[docutils]: http://docutils.sourceforge.net/ +[docutils generic dtd]: ../docutils.dtd +[emacs table mode]: http://table.sourceforge.net/ +[figure]: directives.html#figure +[getopt.py]: http://www.python.org/doc/current/lib/module-getopt.html +[gnu libc getopt_long()]: http://www.gnu.org/software/libc/manual/html_node/Getopt-Long-Options.html +[grouch]: http://www.mems-exchange.org/software/grouch/ +[html techniques for web content accessibility guidelines]: http://www.w3.org/TR/WCAG10-HTML-TECHS/#link-text +[image]: directives.html#image +[length units in css]: http://www.w3.org/TR/CSS2/syndata.html#length-units +[meta]: directives.html#meta +[official iana registry of uri schemes]: http://www.iana.org/assignments/uri-schemes +[pep 258]: ../../peps/pep-0258.html +[quick restructuredtext]: ../../user/rst/quickref.html +[quotation marks in international usage]: http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage +[replace]: directives.html#replace +[restructuredtext]: http://docutils.sourceforge.net/rst.html +[restructuredtext directives]: directives.html +[restructuredtext interpreted text roles]: roles.html +[retired index of www addressing schemes]: http://www.w3.org/Addressing/schemes.html +[rfc2396]: http://www.rfc-editor.org/rfc/rfc2396.txt +[rfc2732]: http://www.rfc-editor.org/rfc/rfc2732.txt +[rfc822]: http://www.rfc-editor.org/rfc/rfc822.txt +[role]: directives.html#custom-interpreted-text-roles +[the docutils document tree]: ../doctree.html +[transforms]: http://docutils.sourceforge.net/docutils/transforms/ +[unicode category]: http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values +[world wide web consortium]: http://www.w3.org/ +[zope]: http://www.zope.com/ diff --git a/tests/test_texts/test_texts_roles_.md b/tests/test_texts/test_texts_roles_.md new file mode 100644 index 0000000..14cc07f --- /dev/null +++ b/tests/test_texts/test_texts_roles_.md @@ -0,0 +1,409 @@ +--- +Author: David Goodger +Contact: <mailto:docutils-develop@lists.sourceforge.net> +Copyright: This document has been placed in the public domain. +Date: \$Date\$ +Revision: \$Revision\$ +--- + +# reStructuredText Interpreted Text Roles + +This document describes the interpreted text roles implemented in the +reference reStructuredText parser. + +Interpreted text uses backquotes (\`) around the text. An explicit +role marker may optionally appear before or after the text, delimited +with colons. For example: + +``` +This is `interpreted text` using the default role. + +This is :title:`interpreted text` using an explicit role. +``` + +A default role may be defined by applications of reStructuredText; it +is used if no explicit `:role:` prefix or suffix is given. The +"default default role" is [:title-reference:]. It can be changed +using the [default-role] directive. + +See the [Interpreted Text] section in the [reStructuredText Markup +Specification][restructuredtext markup specification] for syntax details. For details on the hierarchy of +elements, please see [The Docutils Document Tree] and the [Docutils +Generic DTD][docutils generic dtd] XML document type definition. For interpreted text role +implementation details, see [Creating reStructuredText Interpreted +Text Roles][creating restructuredtext interpreted text roles]. + +```{contents} +``` + +## Customization + +Custom interpreted text roles may be defined in a document with the +["role" directive]. Customization details are listed with each role. + +(class)= + +A `class` option is recognized by the "role" directive for most +interpreted text roles. A [description](directives.html#role-class) is provided in the ["role" +directive]["role" directive] documentation. + +## Standard Roles + +### `:emphasis:` + +```{eval-rst} + +:Aliases: None +:DTD Element: emphasis +:Customization: + :Options: class_. + :Content: None. +``` + +Implements emphasis. These are equivalent: + +``` +*text* +:emphasis:`text` +``` + +### `:literal:` + +```{eval-rst} + +:Aliases: None +:DTD Element: literal +:Customization: + :Options: class_. + :Content: None. +``` + +Implements inline literal text. These are equivalent: + +``` +``text`` +:literal:`text` +``` + +Care must be taken with backslash-escapes though. These are *not* +equivalent: + +``` +``text \ and \ backslashes`` +:literal:`text \ and \ backslashes` +``` + +The backslashes in the first line are preserved (and do nothing), +whereas the backslashes in the second line escape the following +spaces. + +### `:code:` + +```{eval-rst} + +:Aliases: None +:DTD Element: literal +:Customization: + :Options: class_, language + :Content: None. +``` + +(New in Docutils 0.9.) + +The `code` role marks its content as code in a formal language. + +For syntax highlight of inline code, the ["role" directive] can be used to +build custom roles with the code language specified in the "language" +option. + +For example, the following creates a LaTeX-specific "latex" role: + +``` +.. role:: latex(code) + :language: latex +``` + +Content of the new role is parsed and tagged by the [Pygments] syntax +highlighter. See the [code directive] for more info on parsing and display +of code in reStructuredText. + +In addition to "[class]", the following option is recognized: + +`language` + +: Name of the code's language. + See [supported languages and markup formats] for recognized values. + +### `:math:` + +```{eval-rst} + +:Aliases: None +:DTD Element: math +:Customization: + :Options: class_ + :Content: None. +``` + +(New in Docutils 0.8.) + +The `math` role marks its content as mathematical notation (inline +formula). + +The input format is LaTeX math syntax without the “math delimiters“ +(`$ $`), for example: + +``` +The area of a circle is :math:`A_\text{c} = (\pi/4) d^2`. +``` + +See the [math directive] (producing display formulas) for more info +on mathematical notation in reStructuredText. + +### `:pep-reference:` + +```{eval-rst} + +:Aliases: ``:PEP:`` +:DTD Element: reference +:Customization: + :Options: class_. + :Content: None. +``` + +The `:pep-reference:` role is used to create an HTTP reference to a +PEP (Python Enhancement Proposal). The `:PEP:` alias is usually +used. For example: + +``` +See :PEP:`287` for more information about reStructuredText. +``` + +This is equivalent to: + +``` +See `PEP 287`__ for more information about reStructuredText. + +__ http://www.python.org/peps/pep-0287.html +``` + +### `:rfc-reference:` + +```{eval-rst} + +:Aliases: ``:RFC:`` +:DTD Element: reference +:Customization: + :Options: class_. + :Content: None. +``` + +The `:rfc-reference:` role is used to create an HTTP reference to an +RFC (Internet Request for Comments). The `:RFC:` alias is usually +used. For example: + +``` +See :RFC:`2822` for information about email headers. +``` + +This is equivalent to: + +``` +See `RFC 2822`__ for information about email headers. + +__ http://www.faqs.org/rfcs/rfc2822.html +``` + +### `:strong:` + +```{eval-rst} + +:Aliases: None +:DTD Element: strong +:Customization: + :Options: class_. + :Content: None. +``` + +Implements strong emphasis. These are equivalent: + +``` +**text** +:strong:`text` +``` + +### `:subscript:` + +```{eval-rst} + +:Aliases: ``:sub:`` +:DTD Element: subscript +:Customization: + :Options: class_. + :Content: None. +``` + +Implements subscripts. + +:::{Tip} +Whitespace or punctuation is required around interpreted text, but +often not desired with subscripts & superscripts. +Backslash-escaped whitespace can be used; the whitespace will be +removed from the processed document: + +``` +H\ :sub:`2`\ O +E = mc\ :sup:`2` +``` + +In such cases, readability of the plain text can be greatly +improved with substitutions: + +``` +The chemical formula for pure water is |H2O|. + +.. |H2O| replace:: H\ :sub:`2`\ O +``` + +See [the reStructuredText spec](restructuredtext.html) for further information on +[character-level markup](restructuredtext.html#character-level-inline-markup) and [the substitution mechanism](restructuredtext.html#substitution-references). +::: + +### `:superscript:` + +```{eval-rst} + +:Aliases: ``:sup:`` +:DTD Element: superscript +:Customization: + :Options: class_. + :Content: None. +``` + +Implements superscripts. See the tip in [:subscript:] above. + +### `:title-reference:` + +```{eval-rst} + +:Aliases: ``:title:``, ``:t:``. +:DTD Element: title_reference +:Customization: + :Options: class_. + :Content: None. +``` + +The `:title-reference:` role is used to describe the titles of +books, periodicals, and other materials. It is the equivalent of the +HTML "cite" element, and it is expected that HTML writers will +typically render "title_reference" elements using "cite". + +Since title references are typically rendered with italics, they are +often marked up using `*emphasis*`, which is misleading and vague. +The "title_reference" element provides accurate and unambiguous +descriptive markup. + +Let's assume `:title-reference:` is the default interpreted text +role (see below) for this example: + +``` +`Design Patterns` [GoF95]_ is an excellent read. +``` + +The following document fragment ([pseudo-XML]) will result from +processing: + +``` +<paragraph> + <title_reference> + Design Patterns + + <citation_reference refname="gof95"> + GoF95 + is an excellent read. +``` + +`:title-reference:` is the default interpreted text role in the +standard reStructuredText parser. This means that no explicit role is +required. Applications of reStructuredText may designate a different +default role, in which case the explicit `:title-reference:` role +must be used to obtain a `title_reference` element. + +## Specialized Roles + +### `raw` + +```{eval-rst} + +:Aliases: None +:DTD Element: raw +:Customization: + :Options: class_, format + :Content: None +``` + +:::{WARNING} +The "raw" role is a stop-gap measure allowing the author to bypass +reStructuredText's markup. It is a "power-user" feature that +should not be overused or abused. The use of "raw" ties documents +to specific output formats and makes them less portable. + +If you often need to use "raw"-derived interpreted text roles or +the "raw" directive, that is a sign either of overuse/abuse or that +functionality may be missing from reStructuredText. Please +describe your situation in a message to the [Docutils-users] mailing +list. +::: + +The "raw" role indicates non-reStructuredText data that is to be +passed untouched to the Writer. It is the inline equivalent of the +["raw" directive]; see its documentation for details on the +semantics. + +The "raw" role cannot be used directly. The ["role" directive] must +first be used to build custom roles based on the "raw" role. One or +more formats (Writer names) must be provided in a "format" option. + +For example, the following creates an HTML-specific "raw-html" role: + +``` +.. role:: raw-html(raw) + :format: html +``` + +This role can now be used directly to pass data untouched to the HTML +Writer. For example: + +``` +If there just *has* to be a line break here, +:raw-html:`<br />` +it can be accomplished with a "raw"-derived role. +But the line block syntax should be considered first. +``` + +:::{Tip} +Roles based on "raw" should clearly indicate their origin, so +they are not mistaken for reStructuredText markup. Using a "raw-" +prefix for role names is recommended. +::: + +In addition to "[class]", the following option is recognized: + +`format` + +: One or more space-separated output format names (Writer names). + +["raw" directive]: directives.html#raw-directive +["role" directive]: directives.html#role +[code directive]: directives.html#code +[creating restructuredtext interpreted text roles]: ../../howto/rst-roles.html +[default-role]: directives.html#default-role +[docutils generic dtd]: ../docutils.dtd +[docutils-users]: ../../user/mailing-lists.html#docutils-user +[interpreted text]: restructuredtext.html#interpreted-text +[math directive]: directives.html#math +[pseudo-xml]: ../doctree.html#pseudo-xml +[pygments]: http://pygments.org/ +[restructuredtext markup specification]: restructuredtext.html +[supported languages and markup formats]: http://pygments.org/languages/ +[the docutils document tree]: ../doctree.html diff --git a/tests/texts/directives.rst b/tests/texts/directives.rst new file mode 100644 index 0000000..db85aed --- /dev/null +++ b/tests/texts/directives.rst @@ -0,0 +1,1909 @@ +============================= + reStructuredText Directives +============================= +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. contents:: + +This document describes the directives implemented in the reference +reStructuredText parser. + +Directives have the following syntax:: + + +-------+-------------------------------+ + | ".. " | directive type "::" directive | + +-------+ block | + | | + +-------------------------------+ + +Directives begin with an explicit markup start (two periods and a +space), followed by the directive type and two colons (collectively, +the "directive marker"). The directive block begins immediately after +the directive marker, and includes all subsequent indented lines. The +directive block is divided into arguments, options (a field list), and +content (in that order), any of which may appear. See the Directives_ +section in the `reStructuredText Markup Specification`_ for syntax +details. + +Descriptions below list "doctree elements" (document tree element +names; XML DTD generic identifiers) corresponding to individual +directives. For details on the hierarchy of elements, please see `The +Docutils Document Tree`_ and the `Docutils Generic DTD`_ XML document +type definition. For directive implementation details, see `Creating +reStructuredText Directives`_. + +.. _Directives: restructuredtext.html#directives +.. _reStructuredText Markup Specification: restructuredtext.html +.. _The Docutils Document Tree: ../doctree.html +.. _Docutils Generic DTD: ../docutils.dtd +.. _Creating reStructuredText Directives: + ../../howto/rst-directives.html + + +------------- + Admonitions +------------- + +.. _attention: +.. _caution: +.. _danger: +.. _error: +.. _hint: +.. _important: +.. _note: +.. _tip: +.. _warning: + +Specific Admonitions +==================== + +:Directive Types: "attention", "caution", "danger", "error", "hint", + "important", "note", "tip", "warning", "admonition" +:Doctree Elements: attention, caution, danger, error, hint, important, + note, tip, warning, admonition_, title +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as body elements. + +Admonitions are specially marked "topics" that can appear anywhere an +ordinary body element can. They contain arbitrary body elements. +Typically, an admonition is rendered as an offset block in a document, +sometimes outlined or shaded, with a title matching the admonition +type. For example:: + + .. DANGER:: + Beware killer rabbits! + +This directive might be rendered something like this:: + + +------------------------+ + | !DANGER! | + | | + | Beware killer rabbits! | + +------------------------+ + +The following admonition directives have been implemented: + +- attention +- caution +- danger +- error +- hint +- important +- note +- tip +- warning + +Any text immediately following the directive indicator (on the same +line and/or indented on following lines) is interpreted as a directive +block and is parsed for normal body elements. For example, the +following "note" admonition directive contains one paragraph and a +bullet list consisting of two list items:: + + .. note:: This is a note admonition. + This is the second line of the first paragraph. + + - The note contains all indented body elements + following. + - It includes this bullet list. + + +Generic Admonition +================== + +:Directive Type: "admonition" +:Doctree Elements: admonition_, title +:Directive Arguments: One, required (admonition title) +:Directive Options: Possible, see below. +:Directive Content: Interpreted as body elements. + +This is a generic, titled admonition. The title may be anything the +author desires. + +The author-supplied title is also used as a `"classes"`_ attribute value +after being converted into a valid identifier form (down-cased; +non-alphanumeric characters converted to single hyphens; "admonition-" +prefixed). For example, this admonition:: + + .. admonition:: And, by the way... + + You can make up your own admonition too. + +becomes the following document tree (pseudo-XML):: + + <document source="test data"> + <admonition classes="admonition-and-by-the-way"> + <title> + And, by the way... + <paragraph> + You can make up your own admonition too. + +The `common options`_ are recognized: + +``class`` : text + Overrides the computed `"classes"`_ attribute value. + +``name`` : text + Add `text` to the `"names"`_ attribute of the admonition element. + + +-------- + Images +-------- + +There are two image directives: "image" and "figure". + + +Image +===== + +:Directive Type: "image" +:Doctree Element: image_ +:Directive Arguments: One, required (image URI). +:Directive Options: Possible. +:Directive Content: None. + +An "image" is a simple picture:: + + .. image:: picture.png + +Inline images can be defined with an "image" directive in a `substitution +definition`_ + +The URI for the image source file is specified in the directive +argument. As with hyperlink targets, the image URI may begin on the +same line as the explicit markup start and target name, or it may +begin in an indented text block immediately following, with no +intervening blank lines. If there are multiple lines in the link +block, they are stripped of leading and trailing whitespace and joined +together. + +Optionally, the image link block may contain a flat field list, the +_`image options`. For example:: + + .. image:: picture.jpeg + :height: 100px + :width: 200 px + :scale: 50 % + :alt: alternate text + :align: right + +The following options are recognized: + +``alt`` : text + Alternate text: a short description of the image, displayed by + applications that cannot display images, or spoken by applications + for visually impaired users. + +``height`` : `length`_ + The desired height of the image. + Used to reserve space or scale the image vertically. When the "scale" + option is also specified, they are combined. For example, a height of + 200px and a scale of 50 is equivalent to a height of 100px with no scale. + +``width`` : `length`_ or `percentage`_ of the current line width + The width of the image. + Used to reserve space or scale the image horizontally. As with "height" + above, when the "scale" option is also specified, they are combined. + + .. _length: restructuredtext.html#length-units + .. _percentage: restructuredtext.html#percentage-units + +``scale`` : integer percentage (the "%" symbol is optional) + The uniform scaling factor of the image. The default is "100 %", i.e. + no scaling. + + If no "height" or "width" options are specified, the `Python Imaging + Library`_ (PIL) may be used to determine them, if it is installed and + the image file is available. + +``align`` : "top", "middle", "bottom", "left", "center", or "right" + The alignment of the image, equivalent to the HTML ``<img>`` tag's + "align" attribute. The values "top", "middle", and "bottom" + control an image's vertical alignment (relative to the text + baseline); they are only useful for inline images (substitutions). + The values "left", "center", and "right" control an image's + horizontal alignment, allowing the image to float and have the + text flow around it. The specific behavior depends upon the + browser or rendering software used. + +``target`` : text (URI or reference name) + Makes the image into a hyperlink reference ("clickable"). The + option argument may be a URI (relative or absolute), or a + `reference name`_ with underscore suffix (e.g. ```a name`_``). + +and the common options `:class:`_ and `:name:`_. + +.. _substitution definition: restructuredtext.html#substitution-definitions + + +Figure +====== + +:Directive Type: "figure" +:Doctree Elements: figure_, image_, caption_, legend_ +:Directive Arguments: One, required (image URI). +:Directive Options: Possible. +:Directive Content: Interpreted as the figure caption and an optional + legend. + +A "figure" consists of image_ data (including `image options`_), an optional +caption (a single paragraph), and an optional legend (arbitrary body +elements). For page-based output media, figures might float to a different +position if this helps the page layout. +:: + + .. figure:: picture.png + :scale: 50 % + :alt: map to buried treasure + + This is the caption of the figure (a simple paragraph). + + The legend consists of all elements after the caption. In this + case, the legend consists of this paragraph and the following + table: + + +-----------------------+-----------------------+ + | Symbol | Meaning | + +=======================+=======================+ + | .. image:: tent.png | Campground | + +-----------------------+-----------------------+ + | .. image:: waves.png | Lake | + +-----------------------+-----------------------+ + | .. image:: peak.png | Mountain | + +-----------------------+-----------------------+ + +There must be blank lines before the caption paragraph and before the +legend. To specify a legend without a caption, use an empty comment +("..") in place of the caption. + +The "figure" directive supports all of the options of the "image" +directive (see `image options`_ above). These options (except +"align") are passed on to the contained image. + +``align`` : "left", "center", or "right" + The horizontal alignment of the figure, allowing the image to + float and have the text flow around it. The specific behavior + depends upon the browser or rendering software used. + +In addition, the following options are recognized: + +``figwidth`` : "image", length_, or percentage_ of current line width + The width of the figure. + Limits the horizontal space used by the figure. + A special value of "image" is allowed, in which case the + included image's actual width is used (requires the `Python Imaging + Library`_). If the image file is not found or the required software is + unavailable, this option is ignored. + + Sets the "width" attribute of the "figure" doctree element. + + This option does not scale the included image; use the "width" + `image`_ option for that. :: + + +---------------------------+ + | figure | + | | + |<------ figwidth --------->| + | | + | +---------------------+ | + | | image | | + | | | | + | |<--- width --------->| | + | +---------------------+ | + | | + |The figure's caption should| + |wrap at this width. | + +---------------------------+ + +``figclass`` : text + Set a `"classes"`_ attribute value on the figure element. See the + class_ directive below. + +.. _Python Imaging Library: http://www.pythonware.com/products/pil/ + + +--------------- + Body Elements +--------------- + +Topic +===== + +:Directive Type: "topic" +:Doctree Element: topic_ +:Directive Arguments: 1, required (topic title). +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as the topic body. + +A topic is like a block quote with a title, or a self-contained +section with no subsections. Use the "topic" directive to indicate a +self-contained idea that is separate from the flow of the document. +Topics may occur anywhere a section or transition may occur. Body +elements and topics may not contain nested topics. + +The directive's sole argument is interpreted as the topic title; the +next line must be blank. All subsequent lines make up the topic body, +interpreted as body elements. For example:: + + .. topic:: Topic Title + + Subsequent indented lines comprise + the body of the topic, and are + interpreted as body elements. + + +Sidebar +======= + +:Directive Type: "sidebar" +:Doctree Element: sidebar_ +:Directive Arguments: One, required (sidebar title). +:Directive Options: Possible (see below). +:Directive Content: Interpreted as the sidebar body. + +Sidebars are like miniature, parallel documents that occur inside +other documents, providing related or reference material. A sidebar +is typically offset by a border and "floats" to the side of the page; +the document's main text may flow around it. Sidebars can also be +likened to super-footnotes; their content is outside of the flow of +the document's main text. + +Sidebars may occur anywhere a section or transition may occur. Body +elements (including sidebars) may not contain nested sidebars. + +The directive's sole argument is interpreted as the sidebar title, +which may be followed by a subtitle option (see below); the next line +must be blank. All subsequent lines make up the sidebar body, +interpreted as body elements. For example:: + + .. sidebar:: Sidebar Title + :subtitle: Optional Sidebar Subtitle + + Subsequent indented lines comprise + the body of the sidebar, and are + interpreted as body elements. + +The following options are recognized: + +``subtitle`` : text + The sidebar's subtitle. + +and the common options `:class:`_ and `:name:`_. + + +Line Block +========== + +.. admonition:: Deprecated + + The "line-block" directive is deprecated. Use the `line block + syntax`_ instead. + + .. _line block syntax: restructuredtext.html#line-blocks + +:Directive Type: "line-block" +:Doctree Element: line_block_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Becomes the body of the line block. + +The "line-block" directive constructs an element where line breaks and +initial indentation is significant and inline markup is supported. It +is equivalent to a `parsed literal block`_ with different rendering: +typically in an ordinary serif typeface instead of a +typewriter/monospaced face, and not automatically indented. (Have the +line-block directive begin a block quote to get an indented line +block.) Line blocks are useful for address blocks and verse (poetry, +song lyrics), where the structure of lines is significant. For +example, here's a classic:: + + "To Ma Own Beloved Lassie: A Poem on her 17th Birthday", by + Ewan McTeagle (for Lassie O'Shea): + + .. line-block:: + + Lend us a couple of bob till Thursday. + I'm absolutely skint. + But I'm expecting a postal order and I can pay you back + as soon as it comes. + Love, Ewan. + + + +.. _parsed-literal: + +Parsed Literal Block +==================== + +:Directive Type: "parsed-literal" +:Doctree Element: literal_block_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Becomes the body of the literal block. + +Unlike an ordinary literal block, the "parsed-literal" directive +constructs a literal block where the text is parsed for inline markup. +It is equivalent to a `line block`_ with different rendering: +typically in a typewriter/monospaced typeface, like an ordinary +literal block. Parsed literal blocks are useful for adding hyperlinks +to code examples. + +However, care must be taken with the text, because inline markup is +recognized and there is no protection from parsing. Backslash-escapes +may be necessary to prevent unintended parsing. And because the +markup characters are removed by the parser, care must also be taken +with vertical alignment. Parsed "ASCII art" is tricky, and extra +whitespace may be necessary. + +For example, all the element names in this content model are links:: + + .. parsed-literal:: + + ( (title_, subtitle_?)?, + decoration_?, + (docinfo_, transition_?)?, + `%structure.model;`_ ) + +Code +==== + +:Directive Type: "code" +:Doctree Element: literal_block_, `inline elements`_ +:Directive Arguments: One, optional (formal language). +:Directive Options: name, class, number-lines. +:Directive Content: Becomes the body of the literal block. +:Configuration Setting: syntax_highlight_. + +(New in Docutils 0.9) + +The "code" directive constructs a literal block. If the code language is +specified, the content is parsed by the Pygments_ syntax highlighter and +tokens are stored in nested `inline elements`_ with class arguments +according to their syntactic category. The actual highlighting requires +a style-sheet (e.g. one `generated by Pygments`__, see the +`sandbox/stylesheets`__ for examples). + +The parsing can be turned off with the syntax_highlight_ configuration +setting and command line option or by specifying the language as `:class:`_ +option instead of directive argument. This also avoids warnings +when Pygments_ is not installed or the language is not in the +`supported languages and markup formats`_. + +For inline code, use the `"code" role`_. + +__ http://pygments.org/docs/cmdline/#generating-styles +__ http://docutils.sourceforge.net/sandbox/stylesheets/ +.. _Pygments: http://pygments.org/ +.. _syntax_highlight: ../../user/config.html#syntax-highlight +.. _supported languages and markup formats: http://pygments.org/languages/ +.. _"code" role: roles.html#code + + +The following options are recognized: + +``number-lines`` : [start line number] + Precede every line with a line number. + The optional argument is the number of the first line (defaut 1). + +and the common options `:class:`_ and `:name:`_. + +Example:: + The content of the following directive :: + + .. code:: python + + def my_function(): + "just a test" + print 8/2 + + is parsed and marked up as Python source code. + + +Math +==== + +:Directive Type: "math" +:Doctree Element: math_block_ +:Directive Arguments: One, optional: prepended to content. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as math block(s). + Content blocks separated by a blank line are put in + separate math-block doctree elements. +:Configuration Setting: math_output_ + +(New in Docutils 0.8) + +The "math" directive inserts blocks with mathematical content +(display formulas, equations) into the document. The input format is +*LaTeX math syntax*\ [#math-syntax]_ with support for Unicode +symbols, for example:: + + .. math:: + + α_t(i) = P(O_1, O_2, … O_t, q_t = S_i λ) + +Support is limited to a subset of *LaTeX math* by the conversion +required for many output formats. For HTML, the the `math_output`_ +configuration setting (or the corresponding ``--math-output`` +command line option) select between alternative output formats with +different subsets of supported elements. If a writer does not +support math typesetting at all, the content is inserted verbatim. + +.. [#math-syntax] The supported LaTeX commands include AMS extensions + (see, e.g., the `Short Math Guide`_). + + +For inline math, use the `"math" role`_. + +.. _Short Math Guide: ftp://ftp.ams.org/ams/doc/amsmath/short-math-guide.pdf +.. _"math" role: roles.html#math +.. _math_output: ../../user/config.html#math-output + +Rubric +====== + +:Directive Type: "rubric" +:Doctree Element: rubric_ +:Directive Arguments: 1, required (rubric text). +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: None. + +.. + + rubric n. 1. a title, heading, or the like, in a manuscript, + book, statute, etc., written or printed in red or otherwise + distinguished from the rest of the text. ... + + -- Random House Webster's College Dictionary, 1991 + +The "rubric" directive inserts a "rubric" element into the document +tree. A rubric is like an informal heading that doesn't correspond to +the document's structure. + + +Epigraph +======== + +:Directive Type: "epigraph" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. + +An epigraph is an apposite (suitable, apt, or pertinent) short +inscription, often a quotation or poem, at the beginning of a document +or section. + +The "epigraph" directive produces an "epigraph"-class block quote. +For example, this input:: + + .. epigraph:: + + No matter where you go, there you are. + + -- Buckaroo Banzai + +becomes this document tree fragment:: + + <block_quote classes="epigraph"> + <paragraph> + No matter where you go, there you are. + <attribution> + Buckaroo Banzai + + +Highlights +========== + +:Directive Type: "highlights" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. + +Highlights summarize the main points of a document or section, often +consisting of a list. + +The "highlights" directive produces a "highlights"-class block quote. +See Epigraph_ above for an analogous example. + + +Pull-Quote +========== + +:Directive Type: "pull-quote" +:Doctree Element: block_quote_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as the body of the block quote. + +A pull-quote is a small selection of text "pulled out and quoted", +typically in a larger typeface. Pull-quotes are used to attract +attention, especially in long articles. + +The "pull-quote" directive produces a "pull-quote"-class block quote. +See Epigraph_ above for an analogous example. + + +Compound Paragraph +================== + +:Directive Type: "compound" +:Doctree Element: compound_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: Interpreted as body elements. + +(New in Docutils 0.3.6) + +The "compound" directive is used to create a compound paragraph, which +is a single logical paragraph containing multiple physical body +elements such as simple paragraphs, literal blocks, tables, lists, +etc., instead of directly containing text and inline elements. For +example:: + + .. compound:: + + The 'rm' command is very dangerous. If you are logged + in as root and enter :: + + cd / + rm -rf * + + you will erase the entire contents of your file system. + +In the example above, a literal block is "embedded" within a sentence +that begins in one physical paragraph and ends in another. + +.. note:: + + The "compound" directive is *not* a generic block-level container + like HTML's ``<div>`` element. Do not use it only to group a + sequence of elements, or you may get unexpected results. + + If you need a generic block-level container, please use the + container_ directive, described below. + +Compound paragraphs are typically rendered as multiple distinct text +blocks, with the possibility of variations to emphasize their logical +unity: + +* If paragraphs are rendered with a first-line indent, only the first + physical paragraph of a compound paragraph should have that indent + -- second and further physical paragraphs should omit the indents; +* vertical spacing between physical elements may be reduced; +* and so on. + + +Container +========= + +:Directive Type: "container" +:Doctree Element: container_ +:Directive Arguments: One or more, optional (class names). +:Directive Options: `:name:`_ +:Directive Content: Interpreted as body elements. + +(New in Docutils 0.3.10) + +The "container" directive surrounds its contents (arbitrary body +elements) with a generic block-level "container" element. Combined +with the optional "classes_" attribute argument(s), this is an +extension mechanism for users & applications. For example:: + + .. container:: custom + + This paragraph might be rendered in a custom way. + +Parsing the above results in the following pseudo-XML:: + + <container classes="custom"> + <paragraph> + This paragraph might be rendered in a custom way. + +The "container" directive is the equivalent of HTML's ``<div>`` +element. It may be used to group a sequence of elements for user- or +application-specific purposes. + + + +-------- + Tables +-------- + +Formal tables need more structure than the reStructuredText syntax +supplies. Tables may be given titles with the table_ directive. +Sometimes reStructuredText tables are inconvenient to write, or table +data in a standard format is readily available. The csv-table_ +directive supports CSV data. + + +Table +===== + +:Directive Type: "table" +:Doctree Element: table_ +:Directive Arguments: 1, optional (table title). +:Directive Options: `:class:`_, `:name:`_ +:Directive Content: A normal reStructuredText table. + +(New in Docutils 0.3.1) + +The "table" directive is used to create a titled table, to associate a +title with a table:: + + .. table:: Truth table for "not" + + ===== ===== + A not A + ===== ===== + False True + True False + ===== ===== + + +.. _csv-table: + +CSV Table +========= + +:Directive Type: "csv-table" +:Doctree Element: table_ +:Directive Arguments: 1, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A CSV (comma-separated values) table. + +.. WARNING:: + + The "csv-table" directive's ":file:" and ":url:" options represent + a potential security holes. They can be disabled with the + "file_insertion_enabled_" runtime setting. + +(New in Docutils 0.3.4) + +The "csv-table" directive is used to create a table from CSV +(comma-separated values) data. CSV is a common data format generated +by spreadsheet applications and commercial databases. The data may be +internal (an integral part of the document) or external (a separate +file). + +Example:: + + .. csv-table:: Frozen Delights! + :header: "Treat", "Quantity", "Description" + :widths: 15, 10, 30 + + "Albatross", 2.99, "On a stick!" + "Crunchy Frog", 1.49, "If we took the bones out, it wouldn't be + crunchy, now would it?" + "Gannet Ripple", 1.99, "On a stick!" + +Block markup and inline markup within cells is supported. Line ends +are recognized within cells. + +Working limitations: + +* There is no support for checking that the number of columns in each + row is the same. However, this directive supports CSV generators + that do not insert "empty" entries at the end of short rows, by + automatically adding empty entries. + + .. Add "strict" option to verify input? + +.. [#whitespace-delim] Whitespace delimiters are supported only for external + CSV files. + +.. [#ASCII-char] With Python 2, the valuess for the ``delimiter``, + ``quote``, and ``escape`` options must be ASCII characters. (The csv + module does not support Unicode and all non-ASCII characters are + encoded as multi-byte utf-8 string). This limitation does not exist + under Python 3. + +The following options are recognized: + +``widths`` : integer [, integer...] + A comma- or space-separated list of relative column widths. The + default is equal-width columns (100%/#columns). + +``header-rows`` : integer + The number of rows of CSV data to use in the table header. + Defaults to 0. + +``stub-columns`` : integer + The number of table columns to use as stubs (row titles, on the + left). Defaults to 0. + +``header`` : CSV data + Supplemental data for the table header, added independently of and + before any ``header-rows`` from the main CSV data. Must use the + same CSV format as the main CSV data. + +``file`` : string (newlines removed) + The local filesystem path to a CSV data file. + +``url`` : string (whitespace removed) + An Internet URL reference to a CSV data file. + +``encoding`` : name of text encoding + The text encoding of the external CSV data (file or URL). + Defaults to the document's encoding (if specified). + +``delim`` : char | "tab" | "space" [#whitespace-delim]_ + A one-character string\ [#ASCII-char]_ used to separate fields. + Defaults to ``,`` (comma). May be specified as a Unicode code + point; see the unicode_ directive for syntax details. + +``quote`` : char + A one-character string\ [#ASCII-char]_ used to quote elements + containing the delimiter or which start with the quote + character. Defaults to ``"`` (quote). May be specified as a + Unicode code point; see the unicode_ directive for syntax + details. + +``keepspace`` : flag + Treat whitespace immediately following the delimiter as + significant. The default is to ignore such whitespace. + +``escape`` : char + A one-character\ [#ASCII-char]_ string used to escape the + delimiter or quote characters. May be specified as a Unicode + code point; see the unicode_ directive for syntax details. Used + when the delimiter is used in an unquoted field, or when quote + characters are used within a field. The default is to double-up + the character, e.g. "He said, ""Hi!""" + + .. Add another possible value, "double", to explicitly indicate + the default case? + +and the common options `:class:`_ and `:name:`_. + + +List Table +========== + +:Directive Type: "list-table" +:Doctree Element: table_ +:Directive Arguments: 1, optional (table title). +:Directive Options: Possible (see below). +:Directive Content: A uniform two-level bullet list. + +(New in Docutils 0.3.8. This is an initial implementation; `further +ideas`__ may be implemented in the future.) + +__ ../../dev/rst/alternatives.html#list-driven-tables + +The "list-table" directive is used to create a table from data in a +uniform two-level bullet list. "Uniform" means that each sublist +(second-level list) must contain the same number of list items. + +Example:: + + .. list-table:: Frozen Delights! + :widths: 15 10 30 + :header-rows: 1 + + * - Treat + - Quantity + - Description + * - Albatross + - 2.99 + - On a stick! + * - Crunchy Frog + - 1.49 + - If we took the bones out, it wouldn't be + crunchy, now would it? + * - Gannet Ripple + - 1.99 + - On a stick! + +The following options are recognized: + +``widths`` : integer [integer...] + A comma- or space-separated list of relative column widths. The + default is equal-width columns (100%/#columns). + +``header-rows`` : integer + The number of rows of list data to use in the table header. + Defaults to 0. + +``stub-columns`` : integer + The number of table columns to use as stubs (row titles, on the + left). Defaults to 0. + +and the common options `:class:`_ and `:name:`_. + + +---------------- + Document Parts +---------------- + +.. _contents: + +Table of Contents +================= + +:Directive Type: "contents" +:Doctree Elements: pending_, topic_ +:Directive Arguments: One, optional: title. +:Directive Options: Possible. +:Directive Content: None. + +The "contents" directive generates a table of contents (TOC) in a +topic_. Topics, and therefore tables of contents, may occur anywhere +a section or transition may occur. Body elements and topics may not +contain tables of contents. + +Here's the directive in its simplest form:: + + .. contents:: + +Language-dependent boilerplate text will be used for the title. The +English default title text is "Contents". + +An explicit title may be specified:: + + .. contents:: Table of Contents + +The title may span lines, although it is not recommended:: + + .. contents:: Here's a very long Table of + Contents title + +Options may be specified for the directive, using a field list:: + + .. contents:: Table of Contents + :depth: 2 + +If the default title is to be used, the options field list may begin +on the same line as the directive marker:: + + .. contents:: :depth: 2 + +The following options are recognized: + +``depth`` : integer + The number of section levels that are collected in the table of + contents. The default is unlimited depth. + +``local`` : flag (empty) + Generate a local table of contents. Entries will only include + subsections of the section in which the directive is given. If no + explicit title is given, the table of contents will not be titled. + +``backlinks`` : "entry" or "top" or "none" + Generate links from section headers back to the table of contents + entries, the table of contents itself, or generate no backlinks. + +``class`` : text + Set a `"classes"`_ attribute value on the topic element. See the + class_ directive below. + + +.. _sectnum: +.. _section-numbering: + +Automatic Section Numbering +=========================== + +:Directive Type: "sectnum" or "section-numbering" (synonyms) +:Doctree Elements: pending_, generated_ +:Directive Arguments: None. +:Directive Options: Possible. +:Directive Content: None. +:Configuration Setting: sectnum_xform_ + +The "sectnum" (or "section-numbering") directive automatically numbers +sections and subsections in a document (if not disabled by the +``--no-section-numbering`` command line option or the `sectnum_xform`_ +configuration setting). + +Section numbers are of the "multiple enumeration" form, where each +level has a number, separated by periods. For example, the title of section +1, subsection 2, subsubsection 3 would have "1.2.3" prefixed. + +The "sectnum" directive does its work in two passes: the initial parse +and a transform. During the initial parse, a "pending" element is +generated which acts as a placeholder, storing any options internally. +At a later stage in the processing, the "pending" element triggers a +transform, which adds section numbers to titles. Section numbers are +enclosed in a "generated" element, and titles have their "auto" +attribute set to "1". + +The following options are recognized: + +``depth`` : integer + The number of section levels that are numbered by this directive. + The default is unlimited depth. + +``prefix`` : string + An arbitrary string that is prefixed to the automatically + generated section numbers. It may be something like "3.2.", which + will produce "3.2.1", "3.2.2", "3.2.2.1", and so on. Note that + any separating punctuation (in the example, a period, ".") must be + explicitly provided. The default is no prefix. + +``suffix`` : string + An arbitrary string that is appended to the automatically + generated section numbers. The default is no suffix. + +``start`` : integer + The value that will be used for the first section number. + Combined with ``prefix``, this may be used to force the right + numbering for a document split over several source files. The + default is 1. + +.. _sectnum_xform: ../../user/config.html#sectnum-xform + + +.. _header: +.. _footer: + +Document Header & Footer +======================== + +:Directive Types: "header" and "footer" +:Doctree Elements: decoration_, header, footer +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as body elements. + +(New in Docutils 0.3.8) + +The "header" and "footer" directives create document decorations, +useful for page navigation, notes, time/datestamp, etc. For example:: + + .. header:: This space for rent. + +This will add a paragraph to the document header, which will appear at +the top of the generated web page or at the top of every printed page. + +These directives may be used multiple times, cumulatively. There is +currently support for only one header and footer. + +.. note:: + + While it is possible to use the "header" and "footer" directives to + create navigational elements for web pages, you should be aware + that Docutils is meant to be used for *document* processing, and + that a navigation bar is not typically part of a document. + + Thus, you may soon find Docutils' abilities to be insufficient for + these purposes. At that time, you should consider using a + documentation generator like Sphinx_ rather than the "header" and + "footer" directives. + + .. _Sphinx: http://sphinx-doc.org/ + +In addition to the use of these directives to populate header and +footer content, content may also be added automatically by the +processing system. For example, if certain runtime settings are +enabled, the document footer is populated with processing information +such as a datestamp, a link to `the Docutils website`_, etc. + +.. _the Docutils website: http://docutils.sourceforge.net + + +------------ + References +------------ + +.. _target-notes: + +Target Footnotes +================ + +:Directive Type: "target-notes" +:Doctree Elements: pending_, footnote_, footnote_reference_ +:Directive Arguments: None. +:Directive Options: `:class:`_, `:name:`_ +:Directive Options: Possible. +:Directive Content: None. + +The "target-notes" directive creates a footnote for each external +target in the text, and corresponding footnote references after each +reference. For every explicit target (of the form, ``.. _target name: +URL``) in the text, a footnote will be generated containing the +visible URL as content. + + +Footnotes +========= + +**NOT IMPLEMENTED YET** + +:Directive Type: "footnotes" +:Doctree Elements: pending_, topic_ +:Directive Arguments: None? +:Directive Options: Possible? +:Directive Content: None. + +@@@ + + +Citations +========= + +**NOT IMPLEMENTED YET** + +:Directive Type: "citations" +:Doctree Elements: pending_, topic_ +:Directive Arguments: None? +:Directive Options: Possible? +:Directive Content: None. + +@@@ + + +--------------- + HTML-Specific +--------------- + +Meta +==== + +:Directive Type: "meta" +:Doctree Element: meta (non-standard) +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Must contain a flat field list. + +The "meta" directive is used to specify HTML metadata stored in HTML +META tags. "Metadata" is data about data, in this case data about web +pages. Metadata is used to describe and classify web pages in the +World Wide Web, in a form that is easy for search engines to extract +and collate. + +Within the directive block, a flat field list provides the syntax for +metadata. The field name becomes the contents of the "name" attribute +of the META tag, and the field body (interpreted as a single string +without inline markup) becomes the contents of the "content" +attribute. For example:: + + .. meta:: + :description: The reStructuredText plaintext markup language + :keywords: plaintext, markup language + +This would be converted to the following HTML:: + + <meta name="description" + content="The reStructuredText plaintext markup language"> + <meta name="keywords" content="plaintext, markup language"> + +Support for other META attributes ("http-equiv", "scheme", "lang", +"dir") are provided through field arguments, which must be of the form +"attr=value":: + + .. meta:: + :description lang=en: An amusing story + :description lang=fr: Une histoire amusante + +And their HTML equivalents:: + + <meta name="description" lang="en" content="An amusing story"> + <meta name="description" lang="fr" content="Une histoire amusante"> + +Some META tags use an "http-equiv" attribute instead of the "name" +attribute. To specify "http-equiv" META tags, simply omit the name:: + + .. meta:: + :http-equiv=Content-Type: text/html; charset=ISO-8859-1 + +HTML equivalent:: + + <meta http-equiv="Content-Type" + content="text/html; charset=ISO-8859-1"> + + +Imagemap +======== + +**NOT IMPLEMENTED YET** + +Non-standard element: imagemap. + + +----------------------------------------- + Directives for Substitution Definitions +----------------------------------------- + +The directives in this section may only be used in substitution +definitions. They may not be used directly, in standalone context. +The `image`_ directive may be used both in substitution definitions +and in the standalone context. + + +.. _replace: + +Replacement Text +================ + +:Directive Type: "replace" +:Doctree Element: Text & `inline elements`_ +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: A single paragraph; may contain inline markup. + +The "replace" directive is used to indicate replacement text for a +substitution reference. It may be used within substitution +definitions only. For example, this directive can be used to expand +abbreviations:: + + .. |reST| replace:: reStructuredText + + Yes, |reST| is a long word, so I can't blame anyone for wanting to + abbreviate it. + +As reStructuredText doesn't support nested inline markup, the only way +to create a reference with styled text is to use substitutions with +the "replace" directive:: + + I recommend you try |Python|_. + + .. |Python| replace:: Python, *the* best language around + .. _Python: http://www.python.org/ + + +.. _unicode: + +Unicode Character Codes +======================= + +:Directive Type: "unicode" +:Doctree Element: Text +:Directive Arguments: One or more, required (Unicode character codes, + optional text, and comments). +:Directive Options: Possible. +:Directive Content: None. + +The "unicode" directive converts Unicode character codes (numerical +values) to characters, and may be used in substitution definitions +only. + +The arguments, separated by spaces, can be: + +* **character codes** as + + - decimal numbers or + + - hexadecimal numbers, prefixed by ``0x``, ``x``, ``\x``, ``U+``, + ``u``, or ``\u`` or as XML-style hexadecimal character entities, + e.g. ``ᨫ`` + +* **text**, which is used as-is. + +Text following " .. " is a comment and is ignored. The spaces between +the arguments are ignored and thus do not appear in the output. +Hexadecimal codes are case-insensitive. + +For example, the following text:: + + Copyright |copy| 2003, |BogusMegaCorp (TM)| |---| + all rights reserved. + + .. |copy| unicode:: 0xA9 .. copyright sign + .. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 + .. with trademark sign + .. |---| unicode:: U+02014 .. em dash + :trim: + +results in: + + Copyright |copy| 2003, |BogusMegaCorp (TM)| |---| + all rights reserved. + + .. |copy| unicode:: 0xA9 .. copyright sign + .. |BogusMegaCorp (TM)| unicode:: BogusMegaCorp U+2122 + .. with trademark sign + .. |---| unicode:: U+02014 .. em dash + :trim: + +The following options are recognized: + +``ltrim`` : flag + Whitespace to the left of the substitution reference is removed. + +``rtrim`` : flag + Whitespace to the right of the substitution reference is removed. + +``trim`` : flag + Equivalent to ``ltrim`` plus ``rtrim``; whitespace on both sides + of the substitution reference is removed. + + +Date +==== + +:Directive Type: "date" +:Doctree Element: Text +:Directive Arguments: One, optional (date format). +:Directive Options: None. +:Directive Content: None. + +The "date" directive generates the current local date and inserts it +into the document as text. This directive may be used in substitution +definitions only. + +The optional directive content is interpreted as the desired date +format, using the same codes as Python's time.strftime function. The +default format is "%Y-%m-%d" (ISO 8601 date), but time fields can also +be used. Examples:: + + .. |date| date:: + .. |time| date:: %H:%M + + Today's date is |date|. + + This document was generated on |date| at |time|. + + +--------------- + Miscellaneous +--------------- + +.. _include: + +Including an External Document Fragment +======================================= + +:Directive Type: "include" +:Doctree Elements: Depend on data being included + (literal_block_ with ``code`` or ``literal`` option). +:Directive Arguments: One, required (path to the file to include). +:Directive Options: Possible. +:Directive Content: None. +:Configuration Setting: file_insertion_enabled_ + +.. WARNING:: + + The "include" directive represents a potential security hole. It + can be disabled with the "file_insertion_enabled_" runtime setting. + + .. _file_insertion_enabled: ../../user/config.html#file-insertion-enabled + +The "include" directive reads a text file. The directive argument is +the path to the file to be included, relative to the document +containing the directive. Unless the options ``literal`` or ``code`` +are given, the file is parsed in the current document's context at the +point of the directive. For example:: + + This first example will be parsed at the document level, and can + thus contain any construct, including section headers. + + .. include:: inclusion.txt + + Back in the main document. + + This second example will be parsed in a block quote context. + Therefore it may only contain body elements. It may not + contain section headers. + + .. include:: inclusion.txt + +If an included document fragment contains section structure, the title +adornments must match those of the master document. + +Standard data files intended for inclusion in reStructuredText +documents are distributed with the Docutils source code, located in +the "docutils" package in the ``docutils/parsers/rst/include`` +directory. To access these files, use the special syntax for standard +"include" data files, angle brackets around the file name:: + + .. include:: <isonum.txt> + +The current set of standard "include" data files consists of sets of +substitution definitions. See `reStructuredText Standard Definition +Files`__ for details. + +__ definitions.html + +The following options are recognized: + +``start-line`` : integer + Only the content starting from this line will be included. + (As usual in Python, the first line has index 0 and negative values + count from the end.) + +``end-line`` : integer + Only the content up to (but excluding) this line will be included. + +``start-after`` : text to find in the external data file + Only the content after the first occurrence of the specified text + will be included. + +``end-before`` : text to find in the external data file + Only the content before the first occurrence of the specified text + (but after any ``after`` text) will be included. + +``literal`` : flag (empty) + The entire included text is inserted into the document as a single + literal block. + +``code`` : formal language (optional) + The argument and the content of the included file are passed to + the code_ directive (useful for program listings). + (New in Docutils 0.9) + +``number-lines`` : [start line number] + Precede every code line with a line number. + The optional argument is the number of the first line (defaut 1). + Works only with ``code`` or ``literal``. + (New in Docutils 0.9) + +``encoding`` : name of text encoding + The text encoding of the external data file. Defaults to the + document's input_encoding_. + + .. _input_encoding: ../../user/config.html#input-encoding + +``tab-width`` : integer + Number of spaces for hard tab expansion. + A negative value prevents expansion of hard tabs. Defaults to the + tab_width_ configuration setting. + + .. _tab_width: ../../user/config.html#tab-width + + +With ``code`` or ``literal`` the common options `:class:`_ and +`:name:`_ are recognized as well. + +Combining ``start/end-line`` and ``start-after/end-before`` is possible. The +text markers will be searched in the specified lines (further limiting the +included content). + +.. _raw-directive: + +Raw Data Pass-Through +===================== + +:Directive Type: "raw" +:Doctree Element: raw_ +:Directive Arguments: One or more, required (output format types). +:Directive Options: Possible. +:Directive Content: Stored verbatim, uninterpreted. None (empty) if a + "file" or "url" option given. +:Configuration Setting: raw_enabled_ + +.. WARNING:: + + The "raw" directive represents a potential security hole. It can + be disabled with the "raw_enabled_" or "file_insertion_enabled_" + runtime settings. + + .. _raw_enabled: ../../user/config.html#raw-enabled + +.. Caution:: + + The "raw" directive is a stop-gap measure allowing the author to + bypass reStructuredText's markup. It is a "power-user" feature + that should not be overused or abused. The use of "raw" ties + documents to specific output formats and makes them less portable. + + If you often need to use the "raw" directive or a "raw"-derived + interpreted text role, that is a sign either of overuse/abuse or + that functionality may be missing from reStructuredText. Please + describe your situation in a message to the Docutils-users_ mailing + list. + +.. _Docutils-users: ../../user/mailing-lists.html#docutils-users + +The "raw" directive indicates non-reStructuredText data that is to be +passed untouched to the Writer. The names of the output formats are +given in the directive arguments. The interpretation of the raw data +is up to the Writer. A Writer may ignore any raw output not matching +its format. + +For example, the following input would be passed untouched by an HTML +Writer:: + + .. raw:: html + + <hr width=50 size=10> + +A LaTeX Writer could insert the following raw content into its +output stream:: + + .. raw:: latex + + \setlength{\parindent}{0pt} + +Raw data can also be read from an external file, specified in a +directive option. In this case, the content block must be empty. For +example:: + + .. raw:: html + :file: inclusion.html + +Inline equivalents of the "raw" directive can be defined via +`custom interpreted text roles`_ derived from the `"raw" role`_. + +The following options are recognized: + +``file`` : string (newlines removed) + The local filesystem path of a raw data file to be included. + +``url`` : string (whitespace removed) + An Internet URL reference to a raw data file to be included. + +``encoding`` : name of text encoding + The text encoding of the external raw data (file or URL). + Defaults to the document's encoding (if specified). + +.. _"raw" role: roles.html#raw + + +.. _classes: + +Class +===== + +:Directive Type: "class" +:Doctree Element: pending_ +:Directive Arguments: One or more, required (class names / attribute + values). +:Directive Options: None. +:Directive Content: Optional. If present, it is interpreted as body + elements. + +The "class" directive sets the `"classes"`_ attribute value on its content +or on the first immediately following non-comment element [#]_. For +details of the "classes" attribute, see `its entry`__ in `The Docutils +Document Tree`_. + +The directive argument consists of one or more space-separated class +names. The names are transformed to conform to the regular expression +``[a-z](-?[a-z0-9]+)*`` by converting + +* alphabetic characters to lowercase, +* accented characters to the base character, +* non-alphanumeric characters to hyphens, +* consecutive hyphens into one hyphen. + +For example "Rot-Gelb.Blau Grün:+2008" becomes "rot-gelb-blau grun-2008". +(For the rationale_, see below.) + +__ ../doctree.html#classes + +Examples:: + + .. class:: special + + This is a "special" paragraph. + + .. class:: exceptional remarkable + + An Exceptional Section + ====================== + + This is an ordinary paragraph. + + .. class:: multiple + + First paragraph. + + Second paragraph. + +The text above is parsed and transformed into this doctree fragment:: + + <paragraph classes="special"> + This is a "special" paragraph. + <section classes="exceptional remarkable"> + <title> + An Exceptional Section + <paragraph> + This is an ordinary paragraph. + <paragraph classes="multiple"> + First paragraph. + <paragraph classes="multiple"> + Second paragraph. + +.. [#] To set a "classes" attribute value on a block quote, the + "class" directive must be followed by an empty comment:: + + .. class:: highlights + .. + + Block quote text. + + Without the empty comment, the indented text would be interpreted as the + "class" directive's content, and the classes would be applied to each + element (paragraph, in this case) individually, instead of to the block + quote as a whole. + +.. _rationale: + +.. topic:: Rationale for "classes" Attribute Value Conversion + + + Docutils identifiers are converted to conform to the regular + expression ``[a-z](-?[a-z0-9]+)*``. For HTML + CSS compatibility, + identifiers (the "classes" and "id" attributes) should have no + underscores, colons, or periods. Hyphens may be used. + + - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens: + + ID and NAME tokens must begin with a letter ([A-Za-z]) and + may be followed by any number of letters, digits ([0-9]), + hyphens ("-"), underscores ("_"), colons (":"), and periods + ("."). + + - The `CSS1 spec`_ defines identifiers based on the "name" token + ("flex" tokenizer notation below; "latin1" and "escape" 8-bit + characters have been replaced with XML entities):: + + unicode \\[0-9a-f]{1,4} + latin1 [¡-ÿ] + escape {unicode}|\\[ -~¡-ÿ] + nmchar [-A-Za-z0-9]|{latin1}|{escape} + name {nmchar}+ + + The CSS rule does not include underscores ("_"), colons (":"), or + periods ("."), therefore "classes" and "id" attributes should not + contain these characters. Combined with HTML's requirements (the + first character must be a letter; no "unicode", "latin1", or + "escape" characters), this results in the regular expression + ``[A-Za-z][-A-Za-z0-9]*``. Docutils adds a normalisation by + downcasing and merge of consecutive hyphens. + + .. _HTML 4.01 spec: http://www.w3.org/TR/html401/ + .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1 + + +.. _role: + +Custom Interpreted Text Roles +============================= + +:Directive Type: "role" +:Doctree Element: None; affects subsequent parsing. +:Directive Arguments: Two; one required (new role name), one optional + (base role name, in parentheses). +:Directive Options: Possible (depends on base role). +:Directive Content: depends on base role. + +(New in Docutils 0.3.2) + +The "role" directive dynamically creates a custom interpreted text +role and registers it with the parser. This means that after +declaring a role like this:: + + .. role:: custom + +the document may use the new "custom" role:: + + An example of using :custom:`interpreted text` + +This will be parsed into the following document tree fragment:: + + <paragraph> + An example of using + <inline classes="custom"> + interpreted text + +The role must be declared in a document before it can be used. + +The new role may be based on an existing role, specified as a second +argument in parentheses (whitespace optional):: + + .. role:: custom(emphasis) + + :custom:`text` + +The parsed result is as follows:: + + <paragraph> + <emphasis classes="custom"> + text + +A special case is the `"raw" role`_: derived roles enable +inline `raw data pass-through`_, e.g.:: + + .. role:: raw-role(raw) + :format: html latex + + :raw-role:`raw text` + +If no base role is explicitly specified, a generic custom role is +automatically used. Subsequent interpreted text will produce an +"inline" element with a `"classes"`_ attribute, as in the first example +above. + +With most roles, the ":class:" option can be used to set a "classes" +attribute that is different from the role name. For example:: + + .. role:: custom + :class: special + + :custom:`interpreted text` + +This is the parsed result:: + + <paragraph> + <inline classes="special"> + interpreted text + +.. _role class: + +The following option is recognized by the "role" directive for most +base roles: + +``class`` : text + Set the `"classes"`_ attribute value on the element produced + (``inline``, or element associated with a base class) when the + custom interpreted text role is used. If no directive options are + specified, a "class" option with the directive argument (role + name) as the value is implied. See the class_ directive above. + +Specific base roles may support other options and/or directive +content. See the `reStructuredText Interpreted Text Roles`_ document +for details. + +.. _reStructuredText Interpreted Text Roles: roles.html + + +.. _default-role: + +Setting the Default Interpreted Text Role +========================================= + +:Directive Type: "default-role" +:Doctree Element: None; affects subsequent parsing. +:Directive Arguments: One, optional (new default role name). +:Directive Options: None. +:Directive Content: None. + +(New in Docutils 0.3.10) + +The "default-role" directive sets the default interpreted text role, +the role that is used for interpreted text without an explicit role. +For example, after setting the default role like this:: + + .. default-role:: subscript + +any subsequent use of implicit-role interpreted text in the document +will use the "subscript" role:: + + An example of a `default` role. + +This will be parsed into the following document tree fragment:: + + <paragraph> + An example of a + <subscript> + default + role. + +Custom roles may be used (see the "role_" directive above), but it +must have been declared in a document before it can be set as the +default role. See the `reStructuredText Interpreted Text Roles`_ +document for details of built-in roles. + +The directive may be used without an argument to restore the initial +default interpreted text role, which is application-dependent. The +initial default interpreted text role of the standard reStructuredText +parser is "title-reference". + + +Metadata Document Title +======================= + +:Directive Type: "title" +:Doctree Element: None. +:Directive Arguments: 1, required (the title text). +:Directive Options: None. +:Directive Content: None. + +The "title" directive specifies the document title as metadata, which +does not become part of the document body. It overrides a +document-supplied title. For example, in HTML output the metadata +document title appears in the title bar of the browser window. + + +Restructuredtext-Test-Directive +=============================== + +:Directive Type: "restructuredtext-test-directive" +:Doctree Element: system_warning +:Directive Arguments: None. +:Directive Options: None. +:Directive Content: Interpreted as a literal block. + +This directive is provided for test purposes only. (Nobody is +expected to type in a name *that* long!) It is converted into a +level-1 (info) system message showing the directive data, possibly +followed by a literal block containing the rest of the directive +block. + +-------------- +Common Options +-------------- + +Most of the directives that generate doctree elements support the following +options: + +_`:class:` : text + Set a `"classes"`_ attribute value on the doctree element generated by + the directive. See also the class_ directive. + +_`:name:` : text + Add `text` to the `"names"`_ attribute of the doctree element generated + by the directive. This allows `hyperlink references`_ to the element + using `text` as `reference name`_. + + Specifying the `name` option of a directive, e.g., :: + + .. image:: bild.png + :name: my picture + + is a concise syntax alternative to preceding it with a `hyperlink + target`_ :: + + .. _my picture: + + .. image:: bild.png + + New in Docutils 0.8. + + +.. _reference name: restructuredtext.html#reference-names +.. _hyperlink target: restructuredtext.html#hyperlink-targets +.. _hyperlink references: restructuredtext.html#hyperlink-references +.. _"classes": ../doctree.html#classes +.. _"names": ../doctree.html#names +.. _admonition: ../doctree.html#admonition +.. _block_quote: ../doctree.html#block-quote +.. _caption: ../doctree.html#caption +.. _compound: ../doctree.html#compound +.. _container: ../doctree.html#container +.. _decoration: ../doctree.html#decoration +.. _figure: ../doctree.html#figure +.. _footnote: ../doctree.html#footnote +.. _footnote_reference: ../doctree.html#footnote-reference +.. _generated: ../doctree.html#generated +.. _image: ../doctree.html#image +.. _inline elements: ../doctree.html#inline-elements +.. _literal_block: ../doctree.html#literal-block +.. _legend: ../doctree.html#legend +.. _line_block: ../doctree.html#line-block +.. _math_block: ../doctree.html#math-block +.. _pending: ../doctree.html#pending +.. _raw: ../doctree.html#raw +.. _rubric: ../doctree.html#rubric +.. _sidebar: ../doctree.html#sidebar +.. _table: ../doctree.html#table +.. _title: ../doctree.html#title +.. _topic: ../doctree.html#topic + + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/tests/texts/restructuredtext.rst b/tests/texts/restructuredtext.rst new file mode 100644 index 0000000..6de9669 --- /dev/null +++ b/tests/texts/restructuredtext.rst @@ -0,0 +1,3006 @@ +.. -*- coding: utf-8 -*- + +======================================= + reStructuredText Markup Specification +======================================= + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +.. Note:: + + This document is a detailed technical specification; it is not a + tutorial or a primer. If this is your first exposure to + reStructuredText, please read `A ReStructuredText Primer`_ and the + `Quick reStructuredText`_ user reference first. + +.. _A ReStructuredText Primer: ../../user/rst/quickstart.html +.. _Quick reStructuredText: ../../user/rst/quickref.html + + +reStructuredText_ is plaintext that uses simple and intuitive +constructs to indicate the structure of a document. These constructs +are equally easy to read in raw and processed forms. This document is +itself an example of reStructuredText (raw, if you are reading the +text file, or processed, if you are reading an HTML document, for +example). The reStructuredText parser is a component of Docutils_. + +Simple, implicit markup is used to indicate special constructs, such +as section headings, bullet lists, and emphasis. The markup used is +as minimal and unobtrusive as possible. Less often-used constructs +and extensions to the basic reStructuredText syntax may have more +elaborate or explicit markup. + +reStructuredText is applicable to documents of any length, from the +very small (such as inline program documentation fragments, e.g. +Python docstrings) to the quite large (this document). + +The first section gives a quick overview of the syntax of the +reStructuredText markup by example. A complete specification is given +in the `Syntax Details`_ section. + +`Literal blocks`_ (in which no markup processing is done) are used for +examples throughout this document, to illustrate the plaintext markup. + + +.. contents:: + + +----------------------- + Quick Syntax Overview +----------------------- + +A reStructuredText document is made up of body or block-level +elements, and may be structured into sections. Sections_ are +indicated through title style (underlines & optional overlines). +Sections contain body elements and/or subsections. Some body elements +contain further elements, such as lists containing list items, which +in turn may contain paragraphs and other body elements. Others, such +as paragraphs, contain text and `inline markup`_ elements. + +Here are examples of `body elements`_: + +- Paragraphs_ (and `inline markup`_):: + + Paragraphs contain text and may contain inline markup: + *emphasis*, **strong emphasis**, `interpreted text`, ``inline + literals``, standalone hyperlinks (http://www.python.org), + external hyperlinks (Python_), internal cross-references + (example_), footnote references ([1]_), citation references + ([CIT2002]_), substitution references (|example|), and _`inline + internal targets`. + + Paragraphs are separated by blank lines and are left-aligned. + +- Five types of lists: + + 1. `Bullet lists`_:: + + - This is a bullet list. + + - Bullets can be "*", "+", or "-". + + 2. `Enumerated lists`_:: + + 1. This is an enumerated list. + + 2. Enumerators may be arabic numbers, letters, or roman + numerals. + + 3. `Definition lists`_:: + + what + Definition lists associate a term with a definition. + + how + The term is a one-line phrase, and the definition is one + or more paragraphs or body elements, indented relative to + the term. + + 4. `Field lists`_:: + + :what: Field lists map field names to field bodies, like + database records. They are often part of an extension + syntax. + + :how: The field marker is a colon, the field name, and a + colon. + + The field body may contain one or more body elements, + indented relative to the field marker. + + 5. `Option lists`_, for listing command-line options:: + + -a command-line option "a" + -b file options can have arguments + and long descriptions + --long options can be long also + --input=file long options can also have + arguments + /V DOS/VMS-style options too + + There must be at least two spaces between the option and the + description. + +- `Literal blocks`_:: + + Literal blocks are either indented or line-prefix-quoted blocks, + and indicated with a double-colon ("::") at the end of the + preceding paragraph (right here -->):: + + if literal_block: + text = 'is left as-is' + spaces_and_linebreaks = 'are preserved' + markup_processing = None + +- `Block quotes`_:: + + Block quotes consist of indented body elements: + + This theory, that is mine, is mine. + + -- Anne Elk (Miss) + +- `Doctest blocks`_:: + + >>> print 'Python-specific usage examples; begun with ">>>"' + Python-specific usage examples; begun with ">>>" + >>> print '(cut and pasted from interactive Python sessions)' + (cut and pasted from interactive Python sessions) + +- Two syntaxes for tables_: + + 1. `Grid tables`_; complete, but complex and verbose:: + + +------------------------+------------+----------+ + | Header row, column 1 | Header 2 | Header 3 | + +========================+============+==========+ + | body row 1, column 1 | column 2 | column 3 | + +------------------------+------------+----------+ + | body row 2 | Cells may span | + +------------------------+-----------------------+ + + 2. `Simple tables`_; easy and compact, but limited:: + + ==================== ========== ========== + Header row, column 1 Header 2 Header 3 + ==================== ========== ========== + body row 1, column 1 column 2 column 3 + body row 2 Cells may span columns + ==================== ====================== + +- `Explicit markup blocks`_ all begin with an explicit block marker, + two periods and a space: + + - Footnotes_:: + + .. [1] A footnote contains body elements, consistently + indented by at least 3 spaces. + + - Citations_:: + + .. [CIT2002] Just like a footnote, except the label is + textual. + + - `Hyperlink targets`_:: + + .. _Python: http://www.python.org + + .. _example: + + The "_example" target above points to this paragraph. + + - Directives_:: + + .. image:: mylogo.png + + - `Substitution definitions`_:: + + .. |symbol here| image:: symbol.png + + - Comments_:: + + .. Comments begin with two dots and a space. Anything may + follow, except for the syntax of footnotes/citations, + hyperlink targets, directives, or substitution definitions. + + +---------------- + Syntax Details +---------------- + +Descriptions below list "doctree elements" (document tree element +names; XML DTD generic identifiers) corresponding to syntax +constructs. For details on the hierarchy of elements, please see `The +Docutils Document Tree`_ and the `Docutils Generic DTD`_ XML document +type definition. + + +Whitespace +========== + +Spaces are recommended for indentation_, but tabs may also be used. +Tabs will be converted to spaces. Tab stops are at every 8th column. + +Other whitespace characters (form feeds [chr(12)] and vertical tabs +[chr(11)]) are converted to single spaces before processing. + + +Blank Lines +----------- + +Blank lines are used to separate paragraphs and other elements. +Multiple successive blank lines are equivalent to a single blank line, +except within literal blocks (where all whitespace is preserved). +Blank lines may be omitted when the markup makes element separation +unambiguous, in conjunction with indentation. The first line of a +document is treated as if it is preceded by a blank line, and the last +line of a document is treated as if it is followed by a blank line. + + +Indentation +----------- + +Indentation is used to indicate -- and is only significant in +indicating -- block quotes, definitions (in definition list items), +and local nested content: + +- list item content (multi-line contents of list items, and multiple + body elements within a list item, including nested lists), +- the content of literal blocks, and +- the content of explicit markup blocks. + +Any text whose indentation is less than that of the current level +(i.e., unindented text or "dedents") ends the current level of +indentation. + +Since all indentation is significant, the level of indentation must be +consistent. For example, indentation is the sole markup indicator for +`block quotes`_:: + + This is a top-level paragraph. + + This paragraph belongs to a first-level block quote. + + Paragraph 2 of the first-level block quote. + +Multiple levels of indentation within a block quote will result in +more complex structures:: + + This is a top-level paragraph. + + This paragraph belongs to a first-level block quote. + + This paragraph belongs to a second-level block quote. + + Another top-level paragraph. + + This paragraph belongs to a second-level block quote. + + This paragraph belongs to a first-level block quote. The + second-level block quote above is inside this first-level + block quote. + +When a paragraph or other construct consists of more than one line of +text, the lines must be left-aligned:: + + This is a paragraph. The lines of + this paragraph are aligned at the left. + + This paragraph has problems. The + lines are not left-aligned. In addition + to potential misinterpretation, warning + and/or error messages will be generated + by the parser. + +Several constructs begin with a marker, and the body of the construct +must be indented relative to the marker. For constructs using simple +markers (`bullet lists`_, `enumerated lists`_, footnotes_, citations_, +`hyperlink targets`_, directives_, and comments_), the level of +indentation of the body is determined by the position of the first +line of text, which begins on the same line as the marker. For +example, bullet list bodies must be indented by at least two columns +relative to the left edge of the bullet:: + + - This is the first line of a bullet list + item's paragraph. All lines must align + relative to the first line. [1]_ + + This indented paragraph is interpreted + as a block quote. + + Because it is not sufficiently indented, + this paragraph does not belong to the list + item. + + .. [1] Here's a footnote. The second line is aligned + with the beginning of the footnote label. The ".." + marker is what determines the indentation. + +For constructs using complex markers (`field lists`_ and `option +lists`_), where the marker may contain arbitrary text, the indentation +of the first line *after* the marker determines the left edge of the +body. For example, field lists may have very long markers (containing +the field names):: + + :Hello: This field has a short field name, so aligning the field + body with the first line is feasible. + + :Number-of-African-swallows-required-to-carry-a-coconut: It would + be very difficult to align the field body with the left edge + of the first line. It may even be preferable not to begin the + body on the same line as the marker. + + +Escaping Mechanism +================== + +The character set universally available to plaintext documents, 7-bit +ASCII, is limited. No matter what characters are used for markup, +they will already have multiple meanings in written text. Therefore +markup characters *will* sometimes appear in text **without being +intended as markup**. Any serious markup system requires an escaping +mechanism to override the default meaning of the characters used for +the markup. In reStructuredText we use the backslash, commonly used +as an escaping character in other domains. + +A backslash followed by any character (except whitespace characters) +escapes that character. The escaped character represents the +character itself, and is prevented from playing a role in any markup +interpretation. The backslash is removed from the output. A literal +backslash is represented by two backslashes in a row (the first +backslash "escapes" the second, preventing it being interpreted in an +"escaping" role). + +Backslash-escaped whitespace characters are removed from the document. +This allows for character-level `inline markup`_. + +There are two contexts in which backslashes have no special meaning: +literal blocks and inline literals. In these contexts, a single +backslash represents a literal backslash, without having to double up. + +Please note that the reStructuredText specification and parser do not +address the issue of the representation or extraction of text input +(how and in what form the text actually *reaches* the parser). +Backslashes and other characters may serve a character-escaping +purpose in certain contexts and must be dealt with appropriately. For +example, Python uses backslashes in strings to escape certain +characters, but not others. The simplest solution when backslashes +appear in Python docstrings is to use raw docstrings:: + + r"""This is a raw docstring. Backslashes (\) are not touched.""" + + +Reference Names +=============== + +Simple reference names are single words consisting of alphanumerics +plus isolated (no two adjacent) internal hyphens, underscores, +periods, colons and plus signs; no whitespace or other characters are +allowed. Footnote labels (Footnotes_ & `Footnote References`_), citation +labels (Citations_ & `Citation References`_), `interpreted text`_ roles, +and some `hyperlink references`_ use the simple reference name syntax. + +Reference names using punctuation or whose names are phrases (two or +more space-separated words) are called "phrase-references". +Phrase-references are expressed by enclosing the phrase in backquotes +and treating the backquoted text as a reference name:: + + Want to learn about `my favorite programming language`_? + + .. _my favorite programming language: http://www.python.org + +Simple reference names may also optionally use backquotes. + +Reference names are whitespace-neutral and case-insensitive. When +resolving reference names internally: + +- whitespace is normalized (one or more spaces, horizontal or vertical + tabs, newlines, carriage returns, or form feeds, are interpreted as + a single space), and + +- case is normalized (all alphabetic characters are converted to + lowercase). + +For example, the following `hyperlink references`_ are equivalent:: + + - `A HYPERLINK`_ + - `a hyperlink`_ + - `A + Hyperlink`_ + +Hyperlinks_, footnotes_, and citations_ all share the same namespace +for reference names. The labels of citations (simple reference names) +and manually-numbered footnotes (numbers) are entered into the same +database as other hyperlink names. This means that a footnote +(defined as "``.. [1]``") which can be referred to by a footnote +reference (``[1]_``), can also be referred to by a plain hyperlink +reference (1_). Of course, each type of reference (hyperlink, +footnote, citation) may be processed and rendered differently. Some +care should be taken to avoid reference name conflicts. + + +Document Structure +================== + +Document +-------- + +Doctree element: document. + +The top-level element of a parsed reStructuredText document is the +"document" element. After initial parsing, the document element is a +simple container for a document fragment, consisting of `body +elements`_, transitions_, and sections_, but lacking a document title +or other bibliographic elements. The code that calls the parser may +choose to run one or more optional post-parse transforms_, +rearranging the document fragment into a complete document with a +title and possibly other metadata elements (author, date, etc.; see +`Bibliographic Fields`_). + +Specifically, there is no way to indicate a document title and +subtitle explicitly in reStructuredText. Instead, a lone top-level +section title (see Sections_ below) can be treated as the document +title. Similarly, a lone second-level section title immediately after +the "document title" can become the document subtitle. The rest of +the sections are then lifted up a level or two. See the `DocTitle +transform`_ for details. + + +Sections +-------- + +Doctree elements: section, title. + +Sections are identified through their titles, which are marked up with +adornment: "underlines" below the title text, or underlines and +matching "overlines" above the title. An underline/overline is a +single repeated punctuation character that begins in column 1 and +forms a line extending at least as far as the right edge of the title +text. Specifically, an underline/overline character may be any +non-alphanumeric printable 7-bit ASCII character [#]_. When an +overline is used, the length and character used must match the +underline. Underline-only adornment styles are distinct from +overline-and-underline styles that use the same character. There may +be any number of levels of section titles, although some output +formats may have limits (HTML has 6 levels). + +.. [#] The following are all valid section title adornment + characters:: + + ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ + + Some characters are more suitable than others. The following are + recommended:: + + = - ` : . ' " ~ ^ _ * + # + +Rather than imposing a fixed number and order of section title +adornment styles, the order enforced will be the order as encountered. +The first style encountered will be an outermost title (like HTML H1), +the second style will be a subtitle, the third will be a subsubtitle, +and so on. + +Below are examples of section title styles:: + + =============== + Section Title + =============== + + --------------- + Section Title + --------------- + + Section Title + ============= + + Section Title + ------------- + + Section Title + ````````````` + + Section Title + ''''''''''''' + + Section Title + ............. + + Section Title + ~~~~~~~~~~~~~ + + Section Title + ************* + + Section Title + +++++++++++++ + + Section Title + ^^^^^^^^^^^^^ + +When a title has both an underline and an overline, the title text may +be inset, as in the first two examples above. This is merely +aesthetic and not significant. Underline-only title text may *not* be +inset. + +A blank line after a title is optional. All text blocks up to the +next title of the same or higher level are included in a section (or +subsection, etc.). + +All section title styles need not be used, nor need any specific +section title style be used. However, a document must be consistent +in its use of section titles: once a hierarchy of title styles is +established, sections must use that hierarchy. + +Each section title automatically generates a hyperlink target pointing +to the section. The text of the hyperlink target (the "reference +name") is the same as that of the section title. See `Implicit +Hyperlink Targets`_ for a complete description. + +Sections may contain `body elements`_, transitions_, and nested +sections. + + +Transitions +----------- + +Doctree element: transition. + + Instead of subheads, extra space or a type ornament between + paragraphs may be used to mark text divisions or to signal + changes in subject or emphasis. + + (The Chicago Manual of Style, 14th edition, section 1.80) + +Transitions are commonly seen in novels and short fiction, as a gap +spanning one or more lines, with or without a type ornament such as a +row of asterisks. Transitions separate other body elements. A +transition should not begin or end a section or document, nor should +two transitions be immediately adjacent. + +The syntax for a transition marker is a horizontal line of 4 or more +repeated punctuation characters. The syntax is the same as section +title underlines without title text. Transition markers require blank +lines before and after:: + + Para. + + ---------- + + Para. + +Unlike section title underlines, no hierarchy of transition markers is +enforced, nor do differences in transition markers accomplish +anything. It is recommended that a single consistent style be used. + +The processing system is free to render transitions in output in any +way it likes. For example, horizontal rules (``<hr>``) in HTML output +would be an obvious choice. + + +Body Elements +============= + +Paragraphs +---------- + +Doctree element: paragraph. + +Paragraphs consist of blocks of left-aligned text with no markup +indicating any other body element. Blank lines separate paragraphs +from each other and from other body elements. Paragraphs may contain +`inline markup`_. + +Syntax diagram:: + + +------------------------------+ + | paragraph | + | | + +------------------------------+ + + +------------------------------+ + | paragraph | + | | + +------------------------------+ + + +Bullet Lists +------------ + +Doctree elements: bullet_list, list_item. + +A text block which begins with a "*", "+", "-", "•", "‣", or "⁃", +followed by whitespace, is a bullet list item (a.k.a. "unordered" list +item). List item bodies must be left-aligned and indented relative to +the bullet; the text immediately after the bullet determines the +indentation. For example:: + + - This is the first bullet list item. The blank line above the + first list item is required; blank lines between list items + (such as below this paragraph) are optional. + + - This is the first paragraph in the second item in the list. + + This is the second paragraph in the second item in the list. + The blank line above this paragraph is required. The left edge + of this paragraph lines up with the paragraph above, both + indented relative to the bullet. + + - This is a sublist. The bullet lines up with the left edge of + the text blocks above. A sublist is a new list so requires a + blank line above and below. + + - This is the third item of the main list. + + This paragraph is not part of the list. + +Here are examples of **incorrectly** formatted bullet lists:: + + - This first line is fine. + A blank line is required between list items and paragraphs. + (Warning) + + - The following line appears to be a new sublist, but it is not: + - This is a paragraph continuation, not a sublist (since there's + no blank line). This line is also incorrectly indented. + - Warnings may be issued by the implementation. + +Syntax diagram:: + + +------+-----------------------+ + | "- " | list item | + +------| (body elements)+ | + +-----------------------+ + + +Enumerated Lists +---------------- + +Doctree elements: enumerated_list, list_item. + +Enumerated lists (a.k.a. "ordered" lists) are similar to bullet lists, +but use enumerators instead of bullets. An enumerator consists of an +enumeration sequence member and formatting, followed by whitespace. +The following enumeration sequences are recognized: + +- arabic numerals: 1, 2, 3, ... (no upper limit). +- uppercase alphabet characters: A, B, C, ..., Z. +- lower-case alphabet characters: a, b, c, ..., z. +- uppercase Roman numerals: I, II, III, IV, ..., MMMMCMXCIX (4999). +- lowercase Roman numerals: i, ii, iii, iv, ..., mmmmcmxcix (4999). + +In addition, the auto-enumerator, "#", may be used to automatically +enumerate a list. Auto-enumerated lists may begin with explicit +enumeration, which sets the sequence. Fully auto-enumerated lists use +arabic numerals and begin with 1. (Auto-enumerated lists are new in +Docutils 0.3.8.) + +The following formatting types are recognized: + +- suffixed with a period: "1.", "A.", "a.", "I.", "i.". +- surrounded by parentheses: "(1)", "(A)", "(a)", "(I)", "(i)". +- suffixed with a right-parenthesis: "1)", "A)", "a)", "I)", "i)". + +While parsing an enumerated list, a new list will be started whenever: + +- An enumerator is encountered which does not have the same format and + sequence type as the current list (e.g. "1.", "(a)" produces two + separate lists). + +- The enumerators are not in sequence (e.g., "1.", "3." produces two + separate lists). + +It is recommended that the enumerator of the first list item be +ordinal-1 ("1", "A", "a", "I", or "i"). Although other start-values +will be recognized, they may not be supported by the output format. A +level-1 [info] system message will be generated for any list beginning +with a non-ordinal-1 enumerator. + +Lists using Roman numerals must begin with "I"/"i" or a +multi-character value, such as "II" or "XV". Any other +single-character Roman numeral ("V", "X", "L", "C", "D", "M") will be +interpreted as a letter of the alphabet, not as a Roman numeral. +Likewise, lists using letters of the alphabet may not begin with +"I"/"i", since these are recognized as Roman numeral 1. + +The second line of each enumerated list item is checked for validity. +This is to prevent ordinary paragraphs from being mistakenly +interpreted as list items, when they happen to begin with text +identical to enumerators. For example, this text is parsed as an +ordinary paragraph:: + + A. Einstein was a really + smart dude. + +However, ambiguity cannot be avoided if the paragraph consists of only +one line. This text is parsed as an enumerated list item:: + + A. Einstein was a really smart dude. + +If a single-line paragraph begins with text identical to an enumerator +("A.", "1.", "(b)", "I)", etc.), the first character will have to be +escaped in order to have the line parsed as an ordinary paragraph:: + + \A. Einstein was a really smart dude. + +Examples of nested enumerated lists:: + + 1. Item 1 initial text. + + a) Item 1a. + b) Item 1b. + + 2. a) Item 2a. + b) Item 2b. + +Example syntax diagram:: + + +-------+----------------------+ + | "1. " | list item | + +-------| (body elements)+ | + +----------------------+ + + +Definition Lists +---------------- + +Doctree elements: definition_list, definition_list_item, term, +classifier, definition. + +Each definition list item contains a term, optional classifiers, and a +definition. A term is a simple one-line word or phrase. Optional +classifiers may follow the term on the same line, each after an inline +" : " (space, colon, space). A definition is a block indented +relative to the term, and may contain multiple paragraphs and other +body elements. There may be no blank line between a term line and a +definition block (this distinguishes definition lists from `block +quotes`_). Blank lines are required before the first and after the +last definition list item, but are optional in-between. For example:: + + term 1 + Definition 1. + + term 2 + Definition 2, paragraph 1. + + Definition 2, paragraph 2. + + term 3 : classifier + Definition 3. + + term 4 : classifier one : classifier two + Definition 4. + +Inline markup is parsed in the term line before the classifier +delimiter (" : ") is recognized. The delimiter will only be +recognized if it appears outside of any inline markup. + +A definition list may be used in various ways, including: + +- As a dictionary or glossary. The term is the word itself, a + classifier may be used to indicate the usage of the term (noun, + verb, etc.), and the definition follows. + +- To describe program variables. The term is the variable name, a + classifier may be used to indicate the type of the variable (string, + integer, etc.), and the definition describes the variable's use in + the program. This usage of definition lists supports the classifier + syntax of Grouch_, a system for describing and enforcing a Python + object schema. + +Syntax diagram:: + + +----------------------------+ + | term [ " : " classifier ]* | + +--+-------------------------+--+ + | definition | + | (body elements)+ | + +----------------------------+ + + +Field Lists +----------- + +Doctree elements: field_list, field, field_name, field_body. + +Field lists are used as part of an extension syntax, such as options +for directives_, or database-like records meant for further +processing. They may also be used for two-column table-like +structures resembling database records (label & data pairs). +Applications of reStructuredText may recognize field names and +transform fields or field bodies in certain contexts. For examples, +see `Bibliographic Fields`_ below, or the "image_" and "meta_" +directives in `reStructuredText Directives`_. + +Field lists are mappings from field names to field bodies, modeled on +RFC822_ headers. A field name may consist of any characters, but +colons (":") inside of field names must be escaped with a backslash. +Inline markup is parsed in field names. Field names are +case-insensitive when further processed or transformed. The field +name, along with a single colon prefix and suffix, together form the +field marker. The field marker is followed by whitespace and the +field body. The field body may contain multiple body elements, +indented relative to the field marker. The first line after the field +name marker determines the indentation of the field body. For +example:: + + :Date: 2001-08-16 + :Version: 1 + :Authors: - Me + - Myself + - I + :Indentation: Since the field marker may be quite long, the second + and subsequent lines of the field body do not have to line up + with the first line, but they must be indented relative to the + field name marker, and they must line up with each other. + :Parameter i: integer + +The interpretation of individual words in a multi-word field name is +up to the application. The application may specify a syntax for the +field name. For example, second and subsequent words may be treated +as "arguments", quoted phrases may be treated as a single argument, +and direct support for the "name=value" syntax may be added. + +Standard RFC822_ headers cannot be used for this construct because +they are ambiguous. A word followed by a colon at the beginning of a +line is common in written text. However, in well-defined contexts +such as when a field list invariably occurs at the beginning of a +document (PEPs and email messages), standard RFC822 headers could be +used. + +Syntax diagram (simplified):: + + +--------------------+----------------------+ + | ":" field name ":" | field body | + +-------+------------+ | + | (body elements)+ | + +-----------------------------------+ + + +Bibliographic Fields +```````````````````` + +Doctree elements: docinfo, author, authors, organization, contact, +version, status, date, copyright, field, topic. + +When a field list is the first non-comment element in a document +(after the document title, if there is one), it may have its fields +transformed to document bibliographic data. This bibliographic data +corresponds to the front matter of a book, such as the title page and +copyright page. + +Certain registered field names (listed below) are recognized and +transformed to the corresponding doctree elements, most becoming child +elements of the "docinfo" element. No ordering is required of these +fields, although they may be rearranged to fit the document structure, +as noted. Unless otherwise indicated below, each of the bibliographic +elements' field bodies may contain a single paragraph only. Field +bodies may be checked for `RCS keywords`_ and cleaned up. Any +unrecognized fields will remain as generic fields in the docinfo +element. + +The registered bibliographic field names and their corresponding +doctree elements are as follows: + +- Field name "Author": author element. +- "Authors": authors. +- "Organization": organization. +- "Contact": contact. +- "Address": address. +- "Version": version. +- "Status": status. +- "Date": date. +- "Copyright": copyright. +- "Dedication": topic. +- "Abstract": topic. + +The "Authors" field may contain either: a single paragraph consisting +of a list of authors, separated by ";" or ","; or a bullet list whose +elements each contain a single paragraph per author. ";" is checked +first, so "Doe, Jane; Doe, John" will work. In some languages +(e.g. Swedish), there is no singular/plural distinction between +"Author" and "Authors", so only an "Authors" field is provided, and a +single name is interpreted as an "Author". If a single name contains +a comma, end it with a semicolon to disambiguate: ":Authors: Doe, +Jane;". + +The "Address" field is for a multi-line surface mailing address. +Newlines and whitespace will be preserved. + +The "Dedication" and "Abstract" fields may contain arbitrary body +elements. Only one of each is allowed. They become topic elements +with "Dedication" or "Abstract" titles (or language equivalents) +immediately following the docinfo element. + +This field-name-to-element mapping can be replaced for other +languages. See the `DocInfo transform`_ implementation documentation +for details. + +Unregistered/generic fields may contain one or more paragraphs or +arbitrary body elements. + + +RCS Keywords +```````````` + +`Bibliographic fields`_ recognized by the parser are normally checked +for RCS [#]_ keywords and cleaned up [#]_. RCS keywords may be +entered into source files as "$keyword$", and once stored under RCS or +CVS [#]_, they are expanded to "$keyword: expansion text $". For +example, a "Status" field will be transformed to a "status" element:: + + :Status: $keyword: expansion text $ + +.. [#] Revision Control System. +.. [#] RCS keyword processing can be turned off (unimplemented). +.. [#] Concurrent Versions System. CVS uses the same keywords as RCS. + +Processed, the "status" element's text will become simply "expansion +text". The dollar sign delimiters and leading RCS keyword name are +removed. + +The RCS keyword processing only kicks in when the field list is in +bibliographic context (first non-comment construct in the document, +after a document title if there is one). + + +Option Lists +------------ + +Doctree elements: option_list, option_list_item, option_group, option, +option_string, option_argument, description. + +Option lists are two-column lists of command-line options and +descriptions, documenting a program's options. For example:: + + -a Output all. + -b Output both (this description is + quite long). + -c arg Output just arg. + --long Output all day long. + + -p This option has two paragraphs in the description. + This is the first. + + This is the second. Blank lines may be omitted between + options (as above) or left in (as here and below). + + --very-long-option A VMS-style option. Note the adjustment for + the required two spaces. + + --an-even-longer-option + The description can also start on the next line. + + -2, --two This option has two variants. + + -f FILE, --file=FILE These two options are synonyms; both have + arguments. + + /V A VMS/DOS-style option. + +There are several types of options recognized by reStructuredText: + +- Short POSIX options consist of one dash and an option letter. +- Long POSIX options consist of two dashes and an option word; some + systems use a single dash. +- Old GNU-style "plus" options consist of one plus and an option + letter ("plus" options are deprecated now, their use discouraged). +- DOS/VMS options consist of a slash and an option letter or word. + +Please note that both POSIX-style and DOS/VMS-style options may be +used by DOS or Windows software. These and other variations are +sometimes used mixed together. The names above have been chosen for +convenience only. + +The syntax for short and long POSIX options is based on the syntax +supported by Python's getopt.py_ module, which implements an option +parser similar to the `GNU libc getopt_long()`_ function but with some +restrictions. There are many variant option systems, and +reStructuredText option lists do not support all of them. + +Although long POSIX and DOS/VMS option words may be allowed to be +truncated by the operating system or the application when used on the +command line, reStructuredText option lists do not show or support +this with any special syntax. The complete option word should be +given, supported by notes about truncation if and when applicable. + +Options may be followed by an argument placeholder, whose role and +syntax should be explained in the description text. Either a space or +an equals sign may be used as a delimiter between options and option +argument placeholders; short options ("-" or "+" prefix only) may omit +the delimiter. Option arguments may take one of two forms: + +- Begins with a letter (``[a-zA-Z]``) and subsequently consists of + letters, numbers, underscores and hyphens (``[a-zA-Z0-9_-]``). +- Begins with an open-angle-bracket (``<``) and ends with a + close-angle-bracket (``>``); any characters except angle brackets + are allowed internally. + +Multiple option "synonyms" may be listed, sharing a single +description. They must be separated by comma-space. + +There must be at least two spaces between the option(s) and the +description. The description may contain multiple body elements. The +first line after the option marker determines the indentation of the +description. As with other types of lists, blank lines are required +before the first option list item and after the last, but are optional +between option entries. + +Syntax diagram (simplified):: + + +----------------------------+-------------+ + | option [" " argument] " " | description | + +-------+--------------------+ | + | (body elements)+ | + +----------------------------------+ + + +Literal Blocks +-------------- + +Doctree element: literal_block. + +A paragraph consisting of two colons ("::") signifies that the +following text block(s) comprise a literal block. The literal block +must either be indented or quoted (see below). No markup processing +is done within a literal block. It is left as-is, and is typically +rendered in a monospaced typeface:: + + This is a typical paragraph. An indented literal block follows. + + :: + + for a in [5,4,3,2,1]: # this is program code, shown as-is + print a + print "it's..." + # a literal block continues until the indentation ends + + This text has returned to the indentation of the first paragraph, + is outside of the literal block, and is therefore treated as an + ordinary paragraph. + +The paragraph containing only "::" will be completely removed from the +output; no empty paragraph will remain. + +As a convenience, the "::" is recognized at the end of any paragraph. +If immediately preceded by whitespace, both colons will be removed +from the output (this is the "partially minimized" form). When text +immediately precedes the "::", *one* colon will be removed from the +output, leaving only one colon visible (i.e., "::" will be replaced by +":"; this is the "fully minimized" form). + +In other words, these are all equivalent (please pay attention to the +colons after "Paragraph"): + +1. Expanded form:: + + Paragraph: + + :: + + Literal block + +2. Partially minimized form:: + + Paragraph: :: + + Literal block + +3. Fully minimized form:: + + Paragraph:: + + Literal block + +All whitespace (including line breaks, but excluding minimum +indentation for indented literal blocks) is preserved. Blank lines +are required before and after a literal block, but these blank lines +are not included as part of the literal block. + + +Indented Literal Blocks +``````````````````````` + +Indented literal blocks are indicated by indentation relative to the +surrounding text (leading whitespace on each line). The minimum +indentation will be removed from each line of an indented literal +block. The literal block need not be contiguous; blank lines are +allowed between sections of indented text. The literal block ends +with the end of the indentation. + +Syntax diagram:: + + +------------------------------+ + | paragraph | + | (ends with "::") | + +------------------------------+ + +---------------------------+ + | indented literal block | + +---------------------------+ + + +Quoted Literal Blocks +````````````````````` + +Quoted literal blocks are unindented contiguous blocks of text where +each line begins with the same non-alphanumeric printable 7-bit ASCII +character [#]_. A blank line ends a quoted literal block. The +quoting characters are preserved in the processed document. + +.. [#] + The following are all valid quoting characters:: + + ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ + + Note that these are the same characters as are valid for title + adornment of sections_. + +Possible uses include literate programming in Haskell and email +quoting:: + + John Doe wrote:: + + >> Great idea! + > + > Why didn't I think of that? + + You just did! ;-) + +Syntax diagram:: + + +------------------------------+ + | paragraph | + | (ends with "::") | + +------------------------------+ + +------------------------------+ + | ">" per-line-quoted | + | ">" contiguous literal block | + +------------------------------+ + + +Line Blocks +----------- + +Doctree elements: line_block, line. (New in Docutils 0.3.5.) + +Line blocks are useful for address blocks, verse (poetry, song +lyrics), and unadorned lists, where the structure of lines is +significant. Line blocks are groups of lines beginning with vertical +bar ("|") prefixes. Each vertical bar prefix indicates a new line, so +line breaks are preserved. Initial indents are also significant, +resulting in a nested structure. Inline markup is supported. +Continuation lines are wrapped portions of long lines; they begin with +a space in place of the vertical bar. The left edge of a continuation +line must be indented, but need not be aligned with the left edge of +the text above it. A line block ends with a blank line. + +This example illustrates continuation lines:: + + | Lend us a couple of bob till Thursday. + | I'm absolutely skint. + | But I'm expecting a postal order and I can pay you back + as soon as it comes. + | Love, Ewan. + +This example illustrates the nesting of line blocks, indicated by the +initial indentation of new lines:: + + Take it away, Eric the Orchestra Leader! + + | A one, two, a one two three four + | + | Half a bee, philosophically, + | must, *ipso facto*, half not be. + | But half the bee has got to be, + | *vis a vis* its entity. D'you see? + | + | But can a bee be said to be + | or not to be an entire bee, + | when half the bee is not a bee, + | due to some ancient injury? + | + | Singing... + +Syntax diagram:: + + +------+-----------------------+ + | "| " | line | + +------| continuation line | + +-----------------------+ + + +Block Quotes +------------ + +Doctree element: block_quote, attribution. + +A text block that is indented relative to the preceding text, without +preceding markup indicating it to be a literal block or other content, +is a block quote. All markup processing (for body elements and inline +markup) continues within the block quote:: + + This is an ordinary paragraph, introducing a block quote. + + "It is my business to know things. That is my trade." + + -- Sherlock Holmes + +A block quote may end with an attribution: a text block beginning with +"--", "---", or a true em-dash, flush left within the block quote. If +the attribution consists of multiple lines, the left edges of the +second and subsequent lines must align. + +Multiple block quotes may occur consecutively if terminated with +attributions. + + Unindented paragraph. + + Block quote 1. + + -- Attribution 1 + + Block quote 2. + +`Empty comments`_ may be used to explicitly terminate preceding +constructs that would otherwise consume a block quote:: + + * List item. + + .. + + Block quote 3. + +Empty comments may also be used to separate block quotes:: + + Block quote 4. + + .. + + Block quote 5. + +Blank lines are required before and after a block quote, but these +blank lines are not included as part of the block quote. + +Syntax diagram:: + + +------------------------------+ + | (current level of | + | indentation) | + +------------------------------+ + +---------------------------+ + | block quote | + | (body elements)+ | + | | + | -- attribution text | + | (optional) | + +---------------------------+ + + +Doctest Blocks +-------------- + +Doctree element: doctest_block. + +Doctest blocks are interactive Python sessions cut-and-pasted into +docstrings. They are meant to illustrate usage by example, and +provide an elegant and powerful testing environment via the `doctest +module`_ in the Python standard library. + +Doctest blocks are text blocks which begin with ``">>> "``, the Python +interactive interpreter main prompt, and end with a blank line. +Doctest blocks are treated as a special case of literal blocks, +without requiring the literal block syntax. If both are present, the +literal block syntax takes priority over Doctest block syntax:: + + This is an ordinary paragraph. + + >>> print 'this is a Doctest block' + this is a Doctest block + + The following is a literal block:: + + >>> This is not recognized as a doctest block by + reStructuredText. It *will* be recognized by the doctest + module, though! + +Indentation is not required for doctest blocks. + + +Tables +------ + +Doctree elements: table, tgroup, colspec, thead, tbody, row, entry. + +ReStructuredText provides two syntaxes for delineating table cells: +`Grid Tables`_ and `Simple Tables`_. + +As with other body elements, blank lines are required before and after +tables. Tables' left edges should align with the left edge of +preceding text blocks; if indented, the table is considered to be part +of a block quote. + +Once isolated, each table cell is treated as a miniature document; the +top and bottom cell boundaries act as delimiting blank lines. Each +cell contains zero or more body elements. Cell contents may include +left and/or right margins, which are removed before processing. + + +Grid Tables +``````````` + +Grid tables provide a complete table representation via grid-like +"ASCII art". Grid tables allow arbitrary cell contents (body +elements), and both row and column spans. However, grid tables can be +cumbersome to produce, especially for simple data sets. The `Emacs +table mode`_ is a tool that allows easy editing of grid tables, in +Emacs. See `Simple Tables`_ for a simpler (but limited) +representation. + +Grid tables are described with a visual grid made up of the characters +"-", "=", "|", and "+". The hyphen ("-") is used for horizontal lines +(row separators). The equals sign ("=") may be used to separate +optional header rows from the table body (not supported by the `Emacs +table mode`_). The vertical bar ("|") is used for vertical lines +(column separators). The plus sign ("+") is used for intersections of +horizontal and vertical lines. Example:: + + +------------------------+------------+----------+----------+ + | Header row, column 1 | Header 2 | Header 3 | Header 4 | + | (header rows optional) | | | | + +========================+============+==========+==========+ + | body row 1, column 1 | column 2 | column 3 | column 4 | + +------------------------+------------+----------+----------+ + | body row 2 | Cells may span columns. | + +------------------------+------------+---------------------+ + | body row 3 | Cells may | - Table cells | + +------------------------+ span rows. | - contain | + | body row 4 | | - body elements. | + +------------------------+------------+---------------------+ + +Some care must be taken with grid tables to avoid undesired +interactions with cell text in rare cases. For example, the following +table contains a cell in row 2 spanning from column 2 to column 4:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + +If a vertical bar is used in the text of that cell, it could have +unintended effects if accidentally aligned with column boundaries:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | Use the command ``ls | more``. | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + +Several solutions are possible. All that is needed is to break the +continuity of the cell outline rectangle. One possibility is to shift +the text by adding an extra space before:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | Use the command ``ls | more``. | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + +Another possibility is to add an extra line to row 2:: + + +--------------+----------+-----------+-----------+ + | row 1, col 1 | column 2 | column 3 | column 4 | + +--------------+----------+-----------+-----------+ + | row 2 | Use the command ``ls | more``. | + | | | + +--------------+----------+-----------+-----------+ + | row 3 | | | | + +--------------+----------+-----------+-----------+ + + +Simple Tables +````````````` + +Simple tables provide a compact and easy to type but limited +row-oriented table representation for simple data sets. Cell contents +are typically single paragraphs, although arbitrary body elements may +be represented in most cells. Simple tables allow multi-line rows (in +all but the first column) and column spans, but not row spans. See +`Grid Tables`_ above for a complete table representation. + +Simple tables are described with horizontal borders made up of "=" and +"-" characters. The equals sign ("=") is used for top and bottom +table borders, and to separate optional header rows from the table +body. The hyphen ("-") is used to indicate column spans in a single +row by underlining the joined columns, and may optionally be used to +explicitly and/or visually separate rows. + +A simple table begins with a top border of equals signs with one or +more spaces at each column boundary (two or more spaces recommended). +Regardless of spans, the top border *must* fully describe all table +columns. There must be at least two columns in the table (to +differentiate it from section headers). The top border may be +followed by header rows, and the last of the optional header rows is +underlined with '=', again with spaces at column boundaries. There +may not be a blank line below the header row separator; it would be +interpreted as the bottom border of the table. The bottom boundary of +the table consists of '=' underlines, also with spaces at column +boundaries. For example, here is a truth table, a three-column table +with one header row and four body rows:: + + ===== ===== ======= + A B A and B + ===== ===== ======= + False False False + True False False + False True False + True True True + ===== ===== ======= + +Underlines of '-' may be used to indicate column spans by "filling in" +column margins to join adjacent columns. Column span underlines must +be complete (they must cover all columns) and align with established +column boundaries. Text lines containing column span underlines may +not contain any other text. A column span underline applies only to +one row immediately above it. For example, here is a table with a +column span in the header:: + + ===== ===== ====== + Inputs Output + ------------ ------ + A B A or B + ===== ===== ====== + False False False + True False True + False True True + True True True + ===== ===== ====== + +Each line of text must contain spaces at column boundaries, except +where cells have been joined by column spans. Each line of text +starts a new row, except when there is a blank cell in the first +column. In that case, that line of text is parsed as a continuation +line. For this reason, cells in the first column of new rows (*not* +continuation lines) *must* contain some text; blank cells would lead +to a misinterpretation (but see the tip below). Also, this mechanism +limits cells in the first column to only one line of text. Use `grid +tables`_ if this limitation is unacceptable. + +.. Tip:: + + To start a new row in a simple table without text in the first + column in the processed output, use one of these: + + * an empty comment (".."), which may be omitted from the processed + output (see Comments_ below) + + * a backslash escape ("``\``") followed by a space (see `Escaping + Mechanism`_ above) + +Underlines of '-' may also be used to visually separate rows, even if +there are no column spans. This is especially useful in long tables, +where rows are many lines long. + +Blank lines are permitted within simple tables. Their interpretation +depends on the context. Blank lines *between* rows are ignored. +Blank lines *within* multi-line rows may separate paragraphs or other +body elements within cells. + +The rightmost column is unbounded; text may continue past the edge of +the table (as indicated by the table borders). However, it is +recommended that borders be made long enough to contain the entire +text. + +The following example illustrates continuation lines (row 2 consists +of two lines of text, and four lines for row 3), a blank line +separating paragraphs (row 3, column 2), text extending past the right +edge of the table, and a new row which will have no text in the first +column in the processed output (row 4):: + + ===== ===== + col 1 col 2 + ===== ===== + 1 Second column of row 1. + 2 Second column of row 2. + Second line of paragraph. + 3 - Second column of row 3. + + - Second item in bullet + list (row 3, column 2). + \ Row 4; column 1 will be empty. + ===== ===== + + +Explicit Markup Blocks +---------------------- + +An explicit markup block is a text block: + +- whose first line begins with ".." followed by whitespace (the + "explicit markup start"), +- whose second and subsequent lines (if any) are indented relative to + the first, and +- which ends before an unindented line. + +Explicit markup blocks are analogous to bullet list items, with ".." +as the bullet. The text on the lines immediately after the explicit +markup start determines the indentation of the block body. The +maximum common indentation is always removed from the second and +subsequent lines of the block body. Therefore if the first construct +fits in one line, and the indentation of the first and second +constructs should differ, the first construct should not begin on the +same line as the explicit markup start. + +Blank lines are required between explicit markup blocks and other +elements, but are optional between explicit markup blocks where +unambiguous. + +The explicit markup syntax is used for footnotes, citations, hyperlink +targets, directives, substitution definitions, and comments. + + +Footnotes +````````` + +Doctree elements: footnote, label. + +Each footnote consists of an explicit markup start (".. "), a left +square bracket, the footnote label, a right square bracket, and +whitespace, followed by indented body elements. A footnote label can +be: + +- a whole decimal number consisting of one or more digits, + +- a single "#" (denoting `auto-numbered footnotes`_), + +- a "#" followed by a simple reference name (an `autonumber label`_), + or + +- a single "*" (denoting `auto-symbol footnotes`_). + +The footnote content (body elements) must be consistently indented (by +at least 3 spaces) and left-aligned. The first body element within a +footnote may often begin on the same line as the footnote label. +However, if the first element fits on one line and the indentation of +the remaining elements differ, the first element must begin on the +line after the footnote label. Otherwise, the difference in +indentation will not be detected. + +Footnotes may occur anywhere in the document, not only at the end. +Where and how they appear in the processed output depends on the +processing system. + +Here is a manually numbered footnote:: + + .. [1] Body elements go here. + +Each footnote automatically generates a hyperlink target pointing to +itself. The text of the hyperlink target name is the same as that of +the footnote label. `Auto-numbered footnotes`_ generate a number as +their footnote label and reference name. See `Implicit Hyperlink +Targets`_ for a complete description of the mechanism. + +Syntax diagram:: + + +-------+-------------------------+ + | ".. " | "[" label "]" footnote | + +-------+ | + | (body elements)+ | + +-------------------------+ + + +Auto-Numbered Footnotes +....................... + +A number sign ("#") may be used as the first character of a footnote +label to request automatic numbering of the footnote or footnote +reference. + +The first footnote to request automatic numbering is assigned the +label "1", the second is assigned the label "2", and so on (assuming +there are no manually numbered footnotes present; see `Mixed Manual +and Auto-Numbered Footnotes`_ below). A footnote which has +automatically received a label "1" generates an implicit hyperlink +target with name "1", just as if the label was explicitly specified. + +.. _autonumber label: `autonumber labels`_ + +A footnote may specify a label explicitly while at the same time +requesting automatic numbering: ``[#label]``. These labels are called +_`autonumber labels`. Autonumber labels do two things: + +- On the footnote itself, they generate a hyperlink target whose name + is the autonumber label (doesn't include the "#"). + +- They allow an automatically numbered footnote to be referred to more + than once, as a footnote reference or hyperlink reference. For + example:: + + If [#note]_ is the first footnote reference, it will show up as + "[1]". We can refer to it again as [#note]_ and again see + "[1]". We can also refer to it as note_ (an ordinary internal + hyperlink reference). + + .. [#note] This is the footnote labeled "note". + +The numbering is determined by the order of the footnotes, not by the +order of the references. For footnote references without autonumber +labels (``[#]_``), the footnotes and footnote references must be in +the same relative order but need not alternate in lock-step. For +example:: + + [#]_ is a reference to footnote 1, and [#]_ is a reference to + footnote 2. + + .. [#] This is footnote 1. + .. [#] This is footnote 2. + .. [#] This is footnote 3. + + [#]_ is a reference to footnote 3. + +Special care must be taken if footnotes themselves contain +auto-numbered footnote references, or if multiple references are made +in close proximity. Footnotes and references are noted in the order +they are encountered in the document, which is not necessarily the +same as the order in which a person would read them. + + +Auto-Symbol Footnotes +..................... + +An asterisk ("*") may be used for footnote labels to request automatic +symbol generation for footnotes and footnote references. The asterisk +may be the only character in the label. For example:: + + Here is a symbolic footnote reference: [*]_. + + .. [*] This is the footnote. + +A transform will insert symbols as labels into corresponding footnotes +and footnote references. The number of references must be equal to +the number of footnotes. One symbol footnote cannot have multiple +references. + +The standard Docutils system uses the following symbols for footnote +marks [#]_: + +- asterisk/star ("*") +- dagger (HTML character entity "†", Unicode U+02020) +- double dagger ("‡"/U+02021) +- section mark ("§"/U+000A7) +- pilcrow or paragraph mark ("¶"/U+000B6) +- number sign ("#") +- spade suit ("♠"/U+02660) +- heart suit ("♥"/U+02665) +- diamond suit ("♦"/U+02666) +- club suit ("♣"/U+02663) + +.. [#] This list was inspired by the list of symbols for "Note + Reference Marks" in The Chicago Manual of Style, 14th edition, + section 12.51. "Parallels" ("||") were given in CMoS instead of + the pilcrow. The last four symbols (the card suits) were added + arbitrarily. + +If more than ten symbols are required, the same sequence will be +reused, doubled and then tripled, and so on ("**" etc.). + +.. Note:: When using auto-symbol footnotes, the choice of output + encoding is important. Many of the symbols used are not encodable + in certain common text encodings such as Latin-1 (ISO 8859-1). The + use of UTF-8 for the output encoding is recommended. An + alternative for HTML and XML output is to use the + "xmlcharrefreplace" `output encoding error handler`__. + +__ ../../user/config.html#output-encoding-error-handler + + +Mixed Manual and Auto-Numbered Footnotes +........................................ + +Manual and automatic footnote numbering may both be used within a +single document, although the results may not be expected. Manual +numbering takes priority. Only unused footnote numbers are assigned +to auto-numbered footnotes. The following example should be +illustrative:: + + [2]_ will be "2" (manually numbered), + [#]_ will be "3" (anonymous auto-numbered), and + [#label]_ will be "1" (labeled auto-numbered). + + .. [2] This footnote is labeled manually, so its number is fixed. + + .. [#label] This autonumber-labeled footnote will be labeled "1". + It is the first auto-numbered footnote and no other footnote + with label "1" exists. The order of the footnotes is used to + determine numbering, not the order of the footnote references. + + .. [#] This footnote will be labeled "3". It is the second + auto-numbered footnote, but footnote label "2" is already used. + + +Citations +````````` + +Citations are identical to footnotes except that they use only +non-numeric labels such as ``[note]`` or ``[GVR2001]``. Citation +labels are simple `reference names`_ (case-insensitive single words +consisting of alphanumerics plus internal hyphens, underscores, and +periods; no whitespace). Citations may be rendered separately and +differently from footnotes. For example:: + + Here is a citation reference: [CIT2002]_. + + .. [CIT2002] This is the citation. It's just like a footnote, + except the label is textual. + + +.. _hyperlinks: + +Hyperlink Targets +````````````````` + +Doctree element: target. + +These are also called _`explicit hyperlink targets`, to differentiate +them from `implicit hyperlink targets`_ defined below. + +Hyperlink targets identify a location within or outside of a document, +which may be linked to by `hyperlink references`_. + +Hyperlink targets may be named or anonymous. Named hyperlink targets +consist of an explicit markup start (".. "), an underscore, the +reference name (no trailing underscore), a colon, whitespace, and a +link block:: + + .. _hyperlink-name: link-block + +Reference names are whitespace-neutral and case-insensitive. See +`Reference Names`_ for details and examples. + +Anonymous hyperlink targets consist of an explicit markup start +(".. "), two underscores, a colon, whitespace, and a link block; there +is no reference name:: + + .. __: anonymous-hyperlink-target-link-block + +An alternate syntax for anonymous hyperlinks consists of two +underscores, a space, and a link block:: + + __ anonymous-hyperlink-target-link-block + +See `Anonymous Hyperlinks`_ below. + +There are three types of hyperlink targets: internal, external, and +indirect. + +1. _`Internal hyperlink targets` have empty link blocks. They provide + an end point allowing a hyperlink to connect one place to another + within a document. An internal hyperlink target points to the + element following the target. For example:: + + Clicking on this internal hyperlink will take us to the target_ + below. + + .. _target: + + The hyperlink target above points to this paragraph. + + Internal hyperlink targets may be "chained". Multiple adjacent + internal hyperlink targets all point to the same element:: + + .. _target1: + .. _target2: + + The targets "target1" and "target2" are synonyms; they both + point to this paragraph. + + If the element "pointed to" is an external hyperlink target (with a + URI in its link block; see #2 below) the URI from the external + hyperlink target is propagated to the internal hyperlink targets; + they will all "point to" the same URI. There is no need to + duplicate a URI. For example, all three of the following hyperlink + targets refer to the same URI:: + + .. _Python DOC-SIG mailing list archive: + .. _archive: + .. _Doc-SIG: http://mail.python.org/pipermail/doc-sig/ + + An inline form of internal hyperlink target is available; see + `Inline Internal Targets`_. + +2. _`External hyperlink targets` have an absolute or relative URI or + email address in their link blocks. For example, take the + following input:: + + See the Python_ home page for info. + + `Write to me`_ with your questions. + + .. _Python: http://www.python.org + .. _Write to me: jdoe@example.com + + After processing into HTML, the hyperlinks might be expressed as:: + + See the <a href="http://www.python.org">Python</a> home page + for info. + + <a href="mailto:jdoe@example.com">Write to me</a> with your + questions. + + An external hyperlink's URI may begin on the same line as the + explicit markup start and target name, or it may begin in an + indented text block immediately following, with no intervening + blank lines. If there are multiple lines in the link block, they + are concatenated. Any whitespace is removed (whitespace is + permitted to allow for line wrapping). The following external + hyperlink targets are equivalent:: + + .. _one-liner: http://docutils.sourceforge.net/rst.html + + .. _starts-on-this-line: http:// + docutils.sourceforge.net/rst.html + + .. _entirely-below: + http://docutils. + sourceforge.net/rst.html + + If an external hyperlink target's URI contains an underscore as its + last character, it must be escaped to avoid being mistaken for an + indirect hyperlink target:: + + This link_ refers to a file called ``underscore_``. + + .. _link: underscore\_ + + It is possible (although not generally recommended) to include URIs + directly within hyperlink references. See `Embedded URIs and Aliases`_ + below. + +3. _`Indirect hyperlink targets` have a hyperlink reference in their + link blocks. In the following example, target "one" indirectly + references whatever target "two" references, and target "two" + references target "three", an internal hyperlink target. In + effect, all three reference the same thing:: + + .. _one: two_ + .. _two: three_ + .. _three: + + Just as with `hyperlink references`_ anywhere else in a document, + if a phrase-reference is used in the link block it must be enclosed + in backquotes. As with `external hyperlink targets`_, the link + block of an indirect hyperlink target may begin on the same line as + the explicit markup start or the next line. It may also be split + over multiple lines, in which case the lines are joined with + whitespace before being normalized. + + For example, the following indirect hyperlink targets are + equivalent:: + + .. _one-liner: `A HYPERLINK`_ + .. _entirely-below: + `a hyperlink`_ + .. _split: `A + Hyperlink`_ + + It is possible to include an alias directly within hyperlink + references. See `Embedded URIs and Aliases`_ below. + +If the reference name contains any colons, either: + +- the phrase must be enclosed in backquotes:: + + .. _`FAQTS: Computers: Programming: Languages: Python`: + http://python.faqts.com/ + +- or the colon(s) must be backslash-escaped in the link target:: + + .. _Chapter One\: "Tadpole Days": + + It's not easy being green... + +See `Implicit Hyperlink Targets`_ below for the resolution of +duplicate reference names. + +Syntax diagram:: + + +-------+----------------------+ + | ".. " | "_" name ":" link | + +-------+ block | + | | + +----------------------+ + + +Anonymous Hyperlinks +.................... + +The `World Wide Web Consortium`_ recommends in its `HTML Techniques +for Web Content Accessibility Guidelines`_ that authors should +"clearly identify the target of each link." Hyperlink references +should be as verbose as possible, but duplicating a verbose hyperlink +name in the target is onerous and error-prone. Anonymous hyperlinks +are designed to allow convenient verbose hyperlink references, and are +analogous to `Auto-Numbered Footnotes`_. They are particularly useful +in short or one-off documents. However, this feature is easily abused +and can result in unreadable plaintext and/or unmaintainable +documents. Caution is advised. + +Anonymous `hyperlink references`_ are specified with two underscores +instead of one:: + + See `the web site of my favorite programming language`__. + +Anonymous targets begin with ".. __:"; no reference name is required +or allowed:: + + .. __: http://www.python.org + +As a convenient alternative, anonymous targets may begin with "__" +only:: + + __ http://www.python.org + +The reference name of the reference is not used to match the reference +to its target. Instead, the order of anonymous hyperlink references +and targets within the document is significant: the first anonymous +reference will link to the first anonymous target. The number of +anonymous hyperlink references in a document must match the number of +anonymous targets. For readability, it is recommended that targets be +kept close to references. Take care when editing text containing +anonymous references; adding, removing, and rearranging references +require attention to the order of corresponding targets. + + +Directives +`````````` + +Doctree elements: depend on the directive. + +Directives are an extension mechanism for reStructuredText, a way of +adding support for new constructs without adding new primary syntax +(directives may support additional syntax locally). All standard +directives (those implemented and registered in the reference +reStructuredText parser) are described in the `reStructuredText +Directives`_ document, and are always available. Any other directives +are domain-specific, and may require special action to make them +available when processing the document. + +For example, here's how an image_ may be placed:: + + .. image:: mylogo.jpeg + +A figure_ (a graphic with a caption) may placed like this:: + + .. figure:: larch.png + + The larch. + +An admonition_ (note, caution, etc.) contains other body elements:: + + .. note:: This is a paragraph + + - Here is a bullet list. + +Directives are indicated by an explicit markup start (".. ") followed +by the directive type, two colons, and whitespace (together called the +"directive marker"). Directive types are case-insensitive single +words (alphanumerics plus isolated internal hyphens, underscores, +plus signs, colons, and periods; no whitespace). Two colons are used +after the directive type for these reasons: + +- Two colons are distinctive, and unlikely to be used in common text. + +- Two colons avoids clashes with common comment text like:: + + .. Danger: modify at your own risk! + +- If an implementation of reStructuredText does not recognize a + directive (i.e., the directive-handler is not installed), a level-3 + (error) system message is generated, and the entire directive block + (including the directive itself) will be included as a literal + block. Thus "::" is a natural choice. + +The directive block is consists of any text on the first line of the +directive after the directive marker, and any subsequent indented +text. The interpretation of the directive block is up to the +directive code. There are three logical parts to the directive block: + +1. Directive arguments. +2. Directive options. +3. Directive content. + +Individual directives can employ any combination of these parts. +Directive arguments can be filesystem paths, URLs, title text, etc. +Directive options are indicated using `field lists`_; the field names +and contents are directive-specific. Arguments and options must form +a contiguous block beginning on the first or second line of the +directive; a blank line indicates the beginning of the directive +content block. If either arguments and/or options are employed by the +directive, a blank line must separate them from the directive content. +The "figure" directive employs all three parts:: + + .. figure:: larch.png + :scale: 50 + + The larch. + +Simple directives may not require any content. If a directive that +does not employ a content block is followed by indented text anyway, +it is an error. If a block quote should immediately follow a +directive, use an empty comment in-between (see Comments_ below). + +Actions taken in response to directives and the interpretation of text +in the directive content block or subsequent text block(s) are +directive-dependent. See `reStructuredText Directives`_ for details. + +Directives are meant for the arbitrary processing of their contents, +which can be transformed into something possibly unrelated to the +original text. It may also be possible for directives to be used as +pragmas, to modify the behavior of the parser, such as to experiment +with alternate syntax. There is no parser support for this +functionality at present; if a reasonable need for pragma directives +is found, they may be supported. + +Directives do not generate "directive" elements; they are a *parser +construct* only, and have no intrinsic meaning outside of +reStructuredText. Instead, the parser will transform recognized +directives into (possibly specialized) document elements. Unknown +directives will trigger level-3 (error) system messages. + +Syntax diagram:: + + +-------+-------------------------------+ + | ".. " | directive type "::" directive | + +-------+ block | + | | + +-------------------------------+ + + +Substitution Definitions +```````````````````````` + +Doctree element: substitution_definition. + +Substitution definitions are indicated by an explicit markup start +(".. ") followed by a vertical bar, the substitution text, another +vertical bar, whitespace, and the definition block. Substitution text +may not begin or end with whitespace. A substitution definition block +contains an embedded inline-compatible directive (without the leading +".. "), such as "image_" or "replace_". For example:: + + The |biohazard| symbol must be used on containers used to + dispose of medical waste. + + .. |biohazard| image:: biohazard.png + +It is an error for a substitution definition block to directly or +indirectly contain a circular substitution reference. + +`Substitution references`_ are replaced in-line by the processed +contents of the corresponding definition (linked by matching +substitution text). Matches are case-sensitive but forgiving; if no +exact match is found, a case-insensitive comparison is attempted. + +Substitution definitions allow the power and flexibility of +block-level directives_ to be shared by inline text. They are a way +to include arbitrarily complex inline structures within text, while +keeping the details out of the flow of text. They are the equivalent +of SGML/XML's named entities or programming language macros. + +Without the substitution mechanism, every time someone wants an +application-specific new inline structure, they would have to petition +for a syntax change. In combination with existing directive syntax, +any inline structure can be coded without new syntax (except possibly +a new directive). + +Syntax diagram:: + + +-------+-----------------------------------------------------+ + | ".. " | "|" substitution text "| " directive type "::" data | + +-------+ directive block | + | | + +-----------------------------------------------------+ + +Following are some use cases for the substitution mechanism. Please +note that most of the embedded directives shown are examples only and +have not been implemented. + +Objects + Substitution references may be used to associate ambiguous text + with a unique object identifier. + + For example, many sites may wish to implement an inline "user" + directive:: + + |Michael| and |Jon| are our widget-wranglers. + + .. |Michael| user:: mjones + .. |Jon| user:: jhl + + Depending on the needs of the site, this may be used to index the + document for later searching, to hyperlink the inline text in + various ways (mailto, homepage, mouseover Javascript with profile + and contact information, etc.), or to customize presentation of + the text (include username in the inline text, include an icon + image with a link next to the text, make the text bold or a + different color, etc.). + + The same approach can be used in documents which frequently refer + to a particular type of objects with unique identifiers but + ambiguous common names. Movies, albums, books, photos, court + cases, and laws are possible. For example:: + + |The Transparent Society| offers a fascinating alternate view + on privacy issues. + + .. |The Transparent Society| book:: isbn=0738201448 + + Classes or functions, in contexts where the module or class names + are unclear and/or interpreted text cannot be used, are another + possibility:: + + 4XSLT has the convenience method |runString|, so you don't + have to mess with DOM objects if all you want is the + transformed output. + + .. |runString| function:: module=xml.xslt class=Processor + +Images + Images are a common use for substitution references:: + + West led the |H| 3, covered by dummy's |H| Q, East's |H| K, + and trumped in hand with the |S| 2. + + .. |H| image:: /images/heart.png + :height: 11 + :width: 11 + .. |S| image:: /images/spade.png + :height: 11 + :width: 11 + + * |Red light| means stop. + * |Green light| means go. + * |Yellow light| means go really fast. + + .. |Red light| image:: red_light.png + .. |Green light| image:: green_light.png + .. |Yellow light| image:: yellow_light.png + + |-><-| is the official symbol of POEE_. + + .. |-><-| image:: discord.png + .. _POEE: http://www.poee.org/ + + The "image_" directive has been implemented. + +Styles [#]_ + Substitution references may be used to associate inline text with + an externally defined presentation style:: + + Even |the text in Texas| is big. + + .. |the text in Texas| style:: big + + The style name may be meaningful in the context of some particular + output format (CSS class name for HTML output, LaTeX style name + for LaTeX, etc), or may be ignored for other output formats (such + as plaintext). + + .. @@@ This needs to be rethought & rewritten or removed: + + Interpreted text is unsuitable for this purpose because the set + of style names cannot be predefined - it is the domain of the + content author, not the author of the parser and output + formatter - and there is no way to associate a style name + argument with an interpreted text style role. Also, it may be + desirable to use the same mechanism for styling blocks:: + + .. style:: motto + At Bob's Underwear Shop, we'll do anything to get in + your pants. + + .. style:: disclaimer + All rights reversed. Reprint what you like. + + .. [#] There may be sufficient need for a "style" mechanism to + warrant simpler syntax such as an extension to the interpreted + text role syntax. The substitution mechanism is cumbersome for + simple text styling. + +Templates + Inline markup may be used for later processing by a template + engine. For example, a Zope_ author might write:: + + Welcome back, |name|! + + .. |name| tal:: replace user/getUserName + + After processing, this ZPT output would result:: + + Welcome back, + <span tal:replace="user/getUserName">name</span>! + + Zope would then transform this to something like "Welcome back, + David!" during a session with an actual user. + +Replacement text + The substitution mechanism may be used for simple macro + substitution. This may be appropriate when the replacement text + is repeated many times throughout one or more documents, + especially if it may need to change later. A short example is + unavoidably contrived:: + + |RST|_ is a little annoying to type over and over, especially + when writing about |RST| itself, and spelling out the + bicapitalized word |RST| every time isn't really necessary for + |RST| source readability. + + .. |RST| replace:: reStructuredText + .. _RST: http://docutils.sourceforge.net/rst.html + + Note the trailing underscore in the first use of a substitution + reference. This indicates a reference to the corresponding + hyperlink target. + + Substitution is also appropriate when the replacement text cannot + be represented using other inline constructs, or is obtrusively + long:: + + But still, that's nothing compared to a name like + |j2ee-cas|__. + + .. |j2ee-cas| replace:: + the Java `TM`:super: 2 Platform, Enterprise Edition Client + Access Services + __ http://developer.java.sun.com/developer/earlyAccess/ + j2eecas/ + + The "replace_" directive has been implemented. + + +Comments +```````` + +Doctree element: comment. + +Arbitrary indented text may follow the explicit markup start and will +be processed as a comment element. No further processing is done on +the comment block text; a comment contains a single "text blob". +Depending on the output formatter, comments may be removed from the +processed output. The only restriction on comments is that they not +use the same syntax as any of the other explicit markup constructs: +substitution definitions, directives, footnotes, citations, or +hyperlink targets. To ensure that none of the other explicit markup +constructs is recognized, leave the ".." on a line by itself:: + + .. This is a comment + .. + _so: is this! + .. + [and] this! + .. + this:: too! + .. + |even| this:: ! + +.. _empty comments: + +An explicit markup start followed by a blank line and nothing else +(apart from whitespace) is an "_`empty comment`". It serves to +terminate a preceding construct, and does **not** consume any indented +text following. To have a block quote follow a list or any indented +construct, insert an unindented empty comment in-between. + +Syntax diagram:: + + +-------+----------------------+ + | ".. " | comment | + +-------+ block | + | | + +----------------------+ + + +Implicit Hyperlink Targets +========================== + +Implicit hyperlink targets are generated by section titles, footnotes, +and citations, and may also be generated by extension constructs. +Implicit hyperlink targets otherwise behave identically to explicit +`hyperlink targets`_. + +Problems of ambiguity due to conflicting duplicate implicit and +explicit reference names are avoided by following this procedure: + +1. `Explicit hyperlink targets`_ override any implicit targets having + the same reference name. The implicit hyperlink targets are + removed, and level-1 (info) system messages are inserted. + +2. Duplicate implicit hyperlink targets are removed, and level-1 + (info) system messages inserted. For example, if two or more + sections have the same title (such as "Introduction" subsections of + a rigidly-structured document), there will be duplicate implicit + hyperlink targets. + +3. Duplicate explicit hyperlink targets are removed, and level-2 + (warning) system messages are inserted. Exception: duplicate + `external hyperlink targets`_ (identical hyperlink names and + referenced URIs) do not conflict, and are not removed. + +System messages are inserted where target links have been removed. +See "Error Handling" in `PEP 258`_. + +The parser must return a set of *unique* hyperlink targets. The +calling software (such as the Docutils_) can warn of unresolvable +links, giving reasons for the messages. + + +Inline Markup +============= + +In reStructuredText, inline markup applies to words or phrases within +a text block. The same whitespace and punctuation that serves to +delimit words in written text is used to delimit the inline markup +syntax constructs. The text within inline markup may not begin or end +with whitespace. Arbitrary `character-level inline markup`_ is +supported although not encouraged. Inline markup cannot be nested. + +There are nine inline markup constructs. Five of the constructs use +identical start-strings and end-strings to indicate the markup: + +- emphasis_: "*" +- `strong emphasis`_: "**" +- `interpreted text`_: "`" +- `inline literals`_: "``" +- `substitution references`_: "|" + +Three constructs use different start-strings and end-strings: + +- `inline internal targets`_: "_`" and "`" +- `footnote references`_: "[" and "]_" +- `hyperlink references`_: "`" and "\`_" (phrases), or just a + trailing "_" (single words) + +`Standalone hyperlinks`_ are recognized implicitly, and use no extra +markup. + +Inline markup recognition rules +------------------------------- + +Inline markup start-strings and end-strings are only recognized if all of +the following conditions are met: + +1. Inline markup start-strings must start a text block or be + immediately preceded by + + * whitespace, + * one of the ASCII characters ``- : / ' " < ( [ {`` or + * a non-ASCII punctuation character with `Unicode category`_ + `Pd` (Dash), + `Po` (Other), + `Ps` (Open), + `Pi` (Initial quote), or + `Pf` (Final quote) [#PiPf]_. + +2. Inline markup start-strings must be immediately followed by + non-whitespace. + +3. Inline markup end-strings must be immediately preceded by + non-whitespace. + +4. Inline markup end-strings must end a text block or be immediately + followed by + + * whitespace, + * one of the ASCII characters ``- . , : ; ! ? \ / ' " ) ] } >`` or + * a non-ASCII punctuation character with `Unicode category`_ + `Pd` (Dash), + `Po` (Other), + `Pe` (Close), + `Pf` (Final quote), or + `Pi` (Initial quote) [#PiPf]_. + +5. If an inline markup start-string is immediately preceded by one of the + ASCII characters ``' " < ( [ {``, or a character with Unicode character + category `Ps`, `Pi`, or `Pf`, it must not be followed by the + corresponding [#corresponding-quotes]_ closing character from + ``' " ) ] } >`` or the categories `Pe`, `Pf`, or `Pi`. + +6. An inline markup end-string must be separated by at least one + character from the start-string. + +7. An unescaped backslash preceding a start-string or end-string will + disable markup recognition, except for the end-string of `inline + literals`_. See `Escaping Mechanism`_ above for details. + +.. [#PiPf] `Pi` (Punctuation, Initial quote) characters are "usually + closing, sometimes opening". `Pf` (Punctuation, Final quote) + characters are "usually closing, sometimes opening". + +.. [#corresponding-quotes] For quotes, corresponding characters can be + any of the `quotation marks in international usage`_ + +.. _Unicode category: + http://www.unicode.org/Public/5.1.0/ucd/UCD.html#General_Category_Values + +.. _quotation marks in international usage: + http://en.wikipedia.org/wiki/Quotation_mark,_non-English_usage + +The inline markup recognition rules were devised to allow 90% of non-markup +uses of "*", "`", "_", and "|" without escaping. For example, none of the +following terms are recognized as containing inline markup strings: + +- 2*x a**b O(N**2) e**(x*y) f(x)*f(y) a|b file*.* (breaks 1) +- 2 * x a ** b (* BOM32_* ` `` _ __ | (breaks 2) +- "*" '|' (*) [*] {*} <*> + ‘*’ ‚*‘ ‘*‚ ’*’ ‚*’ + “*” „*“ “*„ ”*” „*” + »*« ›*‹ «*» »*» ›*› (breaks 5) +- || (breaks 6) +- __init__ __init__() + +No escaping is required inside the following inline markup examples: + +- *2 * x *a **b *.txt* (breaks 3) +- *2*x a**b O(N**2) e**(x*y) f(x)*f(y) a*(1+2)* (breaks 4) + +It may be desirable to use `inline literals`_ for some of these anyhow, +especially if they represent code snippets. It's a judgment call. + +These cases *do* require either literal-quoting or escaping to avoid +misinterpretation: + + \*4, class\_, \*args, \**kwargs, \`TeX-quoted', \*ML, \*.txt + +In most use cases, `inline literals`_ or `literal blocks`_ are the best +choice (by default, this also selects a monospaced font):: + + *4, class_, *args, **kwargs, `TeX-quoted', *ML, *.txt + +Recognition order +----------------- + +Inline markup delimiter characters are used for multiple constructs, +so to avoid ambiguity there must be a specific recognition order for +each character. The inline markup recognition order is as follows: + +- Asterisks: `Strong emphasis`_ ("**") is recognized before emphasis_ + ("*"). + +- Backquotes: `Inline literals`_ ("``"), `inline internal targets`_ + (leading "_`", trailing "`"), are mutually independent, and are + recognized before phrase `hyperlink references`_ (leading "`", + trailing "\`_") and `interpreted text`_ ("`"). + +- Trailing underscores: Footnote references ("[" + label + "]_") and + simple `hyperlink references`_ (name + trailing "_") are mutually + independent. + +- Vertical bars: `Substitution references`_ ("|") are independently + recognized. + +- `Standalone hyperlinks`_ are the last to be recognized. + + +Character-Level Inline Markup +----------------------------- + +It is possible to mark up individual characters within a word with +backslash escapes (see `Escaping Mechanism`_ above). Backslash +escapes can be used to allow arbitrary text to immediately follow +inline markup:: + + Python ``list``\s use square bracket syntax. + +The backslash will disappear from the processed document. The word +"list" will appear as inline literal text, and the letter "s" will +immediately follow it as normal text, with no space in-between. + +Arbitrary text may immediately precede inline markup using +backslash-escaped whitespace:: + + Possible in *re*\ ``Structured``\ *Text*, though not encouraged. + +The backslashes and spaces separating "re", "Structured", and "Text" +above will disappear from the processed document. + +.. CAUTION:: + + The use of backslash-escapes for character-level inline markup is + not encouraged. Such use is ugly and detrimental to the + unprocessed document's readability. Please use this feature + sparingly and only where absolutely necessary. + + +Emphasis +-------- + +Doctree element: emphasis. + +Start-string = end-string = "*". + +Text enclosed by single asterisk characters is emphasized:: + + This is *emphasized text*. + +Emphasized text is typically displayed in italics. + + +Strong Emphasis +--------------- + +Doctree element: strong. + +Start-string = end-string = "**". + +Text enclosed by double-asterisks is emphasized strongly:: + + This is **strong text**. + +Strongly emphasized text is typically displayed in boldface. + + +Interpreted Text +---------------- + +Doctree element: depends on the explicit or implicit role and +processing. + +Start-string = end-string = "`". + +Interpreted text is text that is meant to be related, indexed, linked, +summarized, or otherwise processed, but the text itself is typically +left alone. Interpreted text is enclosed by single backquote +characters:: + + This is `interpreted text`. + +The "role" of the interpreted text determines how the text is +interpreted. The role may be inferred implicitly (as above; the +"default role" is used) or indicated explicitly, using a role marker. +A role marker consists of a colon, the role name, and another colon. +A role name is a single word consisting of alphanumerics plus isolated +internal hyphens, underscores, plus signs, colons, and periods; +no whitespace or other characters are allowed. A role marker is +either a prefix or a suffix to the interpreted text, whichever reads +better; it's up to the author:: + + :role:`interpreted text` + + `interpreted text`:role: + +Interpreted text allows extensions to the available inline descriptive +markup constructs. To emphasis_, `strong emphasis`_, `inline +literals`_, and `hyperlink references`_, we can add "title reference", +"index entry", "acronym", "class", "red", "blinking" or anything else +we want. Only pre-determined roles are recognized; unknown roles will +generate errors. A core set of standard roles is implemented in the +reference parser; see `reStructuredText Interpreted Text Roles`_ for +individual descriptions. The role_ directive can be used to define +custom interpreted text roles. In addition, applications may support +specialized roles. + + +Inline Literals +--------------- + +Doctree element: literal. + +Start-string = end-string = "``". + +Text enclosed by double-backquotes is treated as inline literals:: + + This text is an example of ``inline literals``. + +Inline literals may contain any characters except two adjacent +backquotes in an end-string context (according to the recognition +rules above). No markup interpretation (including backslash-escape +interpretation) is done within inline literals. + +Line breaks are *not* preserved in inline literals. Although a +reStructuredText parser will preserve runs of spaces in its output, +the final representation of the processed document is dependent on the +output formatter, thus the preservation of whitespace cannot be +guaranteed. If the preservation of line breaks and/or other +whitespace is important, `literal blocks`_ should be used. + +Inline literals are useful for short code snippets. For example:: + + The regular expression ``[+-]?(\d+(\.\d*)?|\.\d+)`` matches + floating-point numbers (without exponents). + + +Hyperlink References +-------------------- + +Doctree element: reference. + +- Named hyperlink references: + + - Start-string = "" (empty string), end-string = "_". + - Start-string = "`", end-string = "\`_". (Phrase references.) + +- Anonymous hyperlink references: + + - Start-string = "" (empty string), end-string = "__". + - Start-string = "`", end-string = "\`__". (Phrase references.) + +Hyperlink references are indicated by a trailing underscore, "_", +except for `standalone hyperlinks`_ which are recognized +independently. The underscore can be thought of as a right-pointing +arrow. The trailing underscores point away from hyperlink references, +and the leading underscores point toward `hyperlink targets`_. + +Hyperlinks consist of two parts. In the text body, there is a source +link, a reference name with a trailing underscore (or two underscores +for `anonymous hyperlinks`_):: + + See the Python_ home page for info. + +A target link with a matching reference name must exist somewhere else +in the document. See `Hyperlink Targets`_ for a full description). + +`Anonymous hyperlinks`_ (which see) do not use reference names to +match references to targets, but otherwise behave similarly to named +hyperlinks. + + +Embedded URIs and Aliases +````````````````````````` + +A hyperlink reference may directly embed a target URI or (since +Docutils 0.11) a hyperlink reference within angle brackets ("<...>") +as follows:: + + See the `Python home page <http://www.python.org>`_ for info. + + This `link <Python home page_>`_ is an alias to the link above. + +This is exactly equivalent to:: + + See the `Python home page`_ for info. + + This link_ is an alias to the link above. + + .. _Python home page: http://www.python.org + .. _link: `Python home page`_ + +The bracketed URI must be preceded by whitespace and be the last text +before the end string. + +With a single trailing underscore, the reference is named and the same +target URI may be referred to again. +With two trailing underscores, the reference and target are both +anonymous, and the target cannot be referred to again. These are +"one-off" hyperlinks. For example:: + + `RFC 2396 <http://www.rfc-editor.org/rfc/rfc2396.txt>`__ and `RFC + 2732 <http://www.rfc-editor.org/rfc/rfc2732.txt>`__ together + define the syntax of URIs. + +Equivalent to:: + + `RFC 2396`__ and `RFC 2732`__ together define the syntax of URIs. + + __ http://www.rfc-editor.org/rfc/rfc2396.txt + __ http://www.rfc-editor.org/rfc/rfc2732.txt + +`Standalone hyperlinks`_ are treated as URIs, even if they end with an +underscore like in the example of a Python function documentation:: + + `__init__ <http:example.py.html#__init__>`__ + +If a target URI that is not recognized as `standalone hyperlink`_ happens +to end with an underscore, this needs to be backslash-escaped to avoid +being parsed as hyperlink reference. For example :: + + Use the `source <parrots.txt\_>`__. + +creates an anonymous reference to the file ``parrots.txt_``. + +If the reference text happens to end with angle-bracketed text that is +*not* a URI or hyperlink reference, at least one angle-bracket needs to +be backslash-escaped or an escaped space should follow. For example, here +are three references to titles describing a tag:: + + See `HTML Element: \<a>`_, `HTML Element: <b\> `_, and + `HTML Element: <c>\ `_. + +The reference text may also be omitted, in which case the URI will be +duplicated for use as the reference text. This is useful for relative +URIs where the address or file name is also the desired reference +text:: + + See `<a_named_relative_link>`_ or `<an_anonymous_relative_link>`__ + for details. + +.. CAUTION:: + + This construct offers easy authoring and maintenance of hyperlinks + at the expense of general readability. Inline URIs, especially + long ones, inevitably interrupt the natural flow of text. For + documents meant to be read in source form, the use of independent + block-level `hyperlink targets`_ is **strongly recommended**. The + embedded URI construct is most suited to documents intended *only* + to be read in processed form. + + +Inline Internal Targets +------------------------ + +Doctree element: target. + +Start-string = "_`", end-string = "`". + +Inline internal targets are the equivalent of explicit `internal +hyperlink targets`_, but may appear within running text. The syntax +begins with an underscore and a backquote, is followed by a hyperlink +name or phrase, and ends with a backquote. Inline internal targets +may not be anonymous. + +For example, the following paragraph contains a hyperlink target named +"Norwegian Blue":: + + Oh yes, the _`Norwegian Blue`. What's, um, what's wrong with it? + +See `Implicit Hyperlink Targets`_ for the resolution of duplicate +reference names. + + +Footnote References +------------------- + +Doctree element: footnote_reference. + +Start-string = "[", end-string = "]_". + +Each footnote reference consists of a square-bracketed label followed +by a trailing underscore. Footnote labels are one of: + +- one or more digits (i.e., a number), + +- a single "#" (denoting `auto-numbered footnotes`_), + +- a "#" followed by a simple reference name (an `autonumber label`_), + or + +- a single "*" (denoting `auto-symbol footnotes`_). + +For example:: + + Please RTFM [1]_. + + .. [1] Read The Fine Manual + + +Citation References +------------------- + +Doctree element: citation_reference. + +Start-string = "[", end-string = "]_". + +Each citation reference consists of a square-bracketed label followed +by a trailing underscore. Citation labels are simple `reference +names`_ (case-insensitive single words, consisting of alphanumerics +plus internal hyphens, underscores, and periods; no whitespace). + +For example:: + + Here is a citation reference: [CIT2002]_. + +See Citations_ for the citation itself. + + +Substitution References +----------------------- + +Doctree element: substitution_reference, reference. + +Start-string = "|", end-string = "|" (optionally followed by "_" or +"__"). + +Vertical bars are used to bracket the substitution reference text. A +substitution reference may also be a hyperlink reference by appending +a "_" (named) or "__" (anonymous) suffix; the substitution text is +used for the reference text in the named case. + +The processing system replaces substitution references with the +processed contents of the corresponding `substitution definitions`_ +(which see for the definition of "correspond"). Substitution +definitions produce inline-compatible elements. + +Examples:: + + This is a simple |substitution reference|. It will be replaced by + the processing system. + + This is a combination |substitution and hyperlink reference|_. In + addition to being replaced, the replacement text or element will + refer to the "substitution and hyperlink reference" target. + +.. _standalone hyperlink: + +Standalone Hyperlinks +--------------------- + +Doctree element: reference. + +Start-string = end-string = "" (empty string). + +A URI (absolute URI [#URI]_ or standalone email address) within a text +block is treated as a general external hyperlink with the URI itself +as the link's text. For example:: + + See http://www.python.org for info. + +would be marked up in HTML as:: + + See <a href="http://www.python.org">http://www.python.org</a> for + info. + +Two forms of URI are recognized: + +1. Absolute URIs. These consist of a scheme, a colon (":"), and a + scheme-specific part whose interpretation depends on the scheme. + + The scheme is the name of the protocol, such as "http", "ftp", + "mailto", or "telnet". The scheme consists of an initial letter, + followed by letters, numbers, and/or "+", "-", ".". Recognition is + limited to known schemes, per the `Official IANA Registry of URI + Schemes`_ and the W3C's `Retired Index of WWW Addressing Schemes`_. + + The scheme-specific part of the resource identifier may be either + hierarchical or opaque: + + - Hierarchical identifiers begin with one or two slashes and may + use slashes to separate hierarchical components of the path. + Examples are web pages and FTP sites:: + + http://www.python.org + + ftp://ftp.python.org/pub/python + + - Opaque identifiers do not begin with slashes. Examples are + email addresses and newsgroups:: + + mailto:someone@somewhere.com + + news:comp.lang.python + + With queries, fragments, and %-escape sequences, URIs can become + quite complicated. A reStructuredText parser must be able to + recognize any absolute URI, as defined in RFC2396_ and RFC2732_. + +2. Standalone email addresses, which are treated as if they were + absolute URIs with a "mailto:" scheme. Example:: + + someone@somewhere.com + +Punctuation at the end of a URI is not considered part of the URI, +unless the URI is terminated by a closing angle bracket (">"). +Backslashes may be used in URIs to escape markup characters, +specifically asterisks ("*") and underscores ("_") which are vaid URI +characters (see `Escaping Mechanism`_ above). + +.. [#URI] Uniform Resource Identifier. URIs are a general form of + URLs (Uniform Resource Locators). For the syntax of URIs see + RFC2396_ and RFC2732_. + + +Units +===== + +(New in Docutils 0.3.10.) + +All measures consist of a positive floating point number in standard +(non-scientific) notation and a unit, possibly separated by one or +more spaces. + +Units are only supported where explicitly mentioned in the reference +manuals. + + +Length Units +------------ + +The following length units are supported by the reStructuredText +parser: + +* em (ems, the height of the element's font) +* ex (x-height, the height of the letter "x") +* px (pixels, relative to the canvas resolution) +* in (inches; 1in=2.54cm) +* cm (centimeters; 1cm=10mm) +* mm (millimeters) +* pt (points; 1pt=1/72in) +* pc (picas; 1pc=12pt) + +This set corresponds to the `length units in CSS`_. + +(List and explanations taken from +http://www.htmlhelp.com/reference/css/units.html#length.) + +The following are all valid length values: "1.5em", "20 mm", ".5in". + +Length values without unit are completed with a writer-dependent +default (e.g. px with `html4css1`, pt with `latex2e`). See the writer +specific documentation in the `user doc`__ for details. + +.. _length units in CSS: + http://www.w3.org/TR/CSS2/syndata.html#length-units + +__ ../../user/ + +Percentage Units +---------------- + +Percentage values have a percent sign ("%") as unit. Percentage +values are relative to other values, depending on the context in which +they occur. + + +---------------- + Error Handling +---------------- + +Doctree element: system_message, problematic. + +Markup errors are handled according to the specification in `PEP +258`_. + + +.. _reStructuredText: http://docutils.sourceforge.net/rst.html +.. _Docutils: http://docutils.sourceforge.net/ +.. _The Docutils Document Tree: ../doctree.html +.. _Docutils Generic DTD: ../docutils.dtd +.. _transforms: + http://docutils.sourceforge.net/docutils/transforms/ +.. _Grouch: http://www.mems-exchange.org/software/grouch/ +.. _RFC822: http://www.rfc-editor.org/rfc/rfc822.txt +.. _DocTitle transform: +.. _DocInfo transform: + http://docutils.sourceforge.net/docutils/transforms/frontmatter.py +.. _getopt.py: + http://www.python.org/doc/current/lib/module-getopt.html +.. _GNU libc getopt_long(): + http://www.gnu.org/software/libc/manual/html_node/Getopt-Long-Options.html +.. _doctest module: + http://www.python.org/doc/current/lib/module-doctest.html +.. _Emacs table mode: http://table.sourceforge.net/ +.. _Official IANA Registry of URI Schemes: + http://www.iana.org/assignments/uri-schemes +.. _Retired Index of WWW Addressing Schemes: + http://www.w3.org/Addressing/schemes.html +.. _World Wide Web Consortium: http://www.w3.org/ +.. _HTML Techniques for Web Content Accessibility Guidelines: + http://www.w3.org/TR/WCAG10-HTML-TECHS/#link-text +.. _image: directives.html#image +.. _replace: directives.html#replace +.. _meta: directives.html#meta +.. _figure: directives.html#figure +.. _admonition: directives.html#admonitions +.. _role: directives.html#custom-interpreted-text-roles +.. _reStructuredText Directives: directives.html +.. _reStructuredText Interpreted Text Roles: roles.html +.. _RFC2396: http://www.rfc-editor.org/rfc/rfc2396.txt +.. _RFC2732: http://www.rfc-editor.org/rfc/rfc2732.txt +.. _Zope: http://www.zope.com/ +.. _PEP 258: ../../peps/pep-0258.html + + +.. + Local Variables: + mode: indented-text + indent-tabs-mode: nil + sentence-end-double-space: t + fill-column: 70 + End: diff --git a/tests/texts/roles.rst b/tests/texts/roles.rst new file mode 100644 index 0000000..e59605a --- /dev/null +++ b/tests/texts/roles.rst @@ -0,0 +1,380 @@ +========================================= + reStructuredText Interpreted Text Roles +========================================= + +:Author: David Goodger +:Contact: docutils-develop@lists.sourceforge.net +:Revision: $Revision$ +:Date: $Date$ +:Copyright: This document has been placed in the public domain. + +This document describes the interpreted text roles implemented in the +reference reStructuredText parser. + +Interpreted text uses backquotes (`) around the text. An explicit +role marker may optionally appear before or after the text, delimited +with colons. For example:: + + This is `interpreted text` using the default role. + + This is :title:`interpreted text` using an explicit role. + +A default role may be defined by applications of reStructuredText; it +is used if no explicit ``:role:`` prefix or suffix is given. The +"default default role" is `:title-reference:`_. It can be changed +using the default-role_ directive. + +See the `Interpreted Text`_ section in the `reStructuredText Markup +Specification`_ for syntax details. For details on the hierarchy of +elements, please see `The Docutils Document Tree`_ and the `Docutils +Generic DTD`_ XML document type definition. For interpreted text role +implementation details, see `Creating reStructuredText Interpreted +Text Roles`_. + +.. _"role" directive: directives.html#role +.. _default-role: directives.html#default-role +.. _Interpreted Text: restructuredtext.html#interpreted-text +.. _reStructuredText Markup Specification: restructuredtext.html +.. _The Docutils Document Tree: ../doctree.html +.. _Docutils Generic DTD: ../docutils.dtd +.. _Creating reStructuredText Interpreted Text Roles: + ../../howto/rst-roles.html + + +.. contents:: + + +--------------- + Customization +--------------- + +Custom interpreted text roles may be defined in a document with the +`"role" directive`_. Customization details are listed with each role. + +.. _class: + +A ``class`` option is recognized by the "role" directive for most +interpreted text roles. A description__ is provided in the `"role" +directive`_ documentation. + +__ directives.html#role-class + + +---------------- + Standard Roles +---------------- + +``:emphasis:`` +============== + +:Aliases: None +:DTD Element: emphasis +:Customization: + :Options: class_. + :Content: None. + +Implements emphasis. These are equivalent:: + + *text* + :emphasis:`text` + + +``:literal:`` +============== + +:Aliases: None +:DTD Element: literal +:Customization: + :Options: class_. + :Content: None. + +Implements inline literal text. These are equivalent:: + + ``text`` + :literal:`text` + +Care must be taken with backslash-escapes though. These are *not* +equivalent:: + + ``text \ and \ backslashes`` + :literal:`text \ and \ backslashes` + +The backslashes in the first line are preserved (and do nothing), +whereas the backslashes in the second line escape the following +spaces. + + +``:code:`` +========== + +:Aliases: None +:DTD Element: literal +:Customization: + :Options: class_, language + :Content: None. + +(New in Docutils 0.9.) + +The ``code`` role marks its content as code in a formal language. + +For syntax highlight of inline code, the `"role" directive`_ can be used to +build custom roles with the code language specified in the "language" +option. + +For example, the following creates a LaTeX-specific "latex" role:: + + .. role:: latex(code) + :language: latex + +Content of the new role is parsed and tagged by the Pygments_ syntax +highlighter. See the `code directive`_ for more info on parsing and display +of code in reStructuredText. + +In addition to "class_", the following option is recognized: + +``language`` : text + Name of the code's language. + See `supported languages and markup formats`_ for recognized values. + +.. _code directive: directives.html#code +.. _Pygments: http://pygments.org/ +.. _supported languages and markup formats: http://pygments.org/languages/ + + +``:math:`` +========== + +:Aliases: None +:DTD Element: math +:Customization: + :Options: class_ + :Content: None. + +(New in Docutils 0.8.) + +The ``math`` role marks its content as mathematical notation (inline +formula). + +The input format is LaTeX math syntax without the “math delimiters“ +(``$ $``), for example:: + + The area of a circle is :math:`A_\text{c} = (\pi/4) d^2`. + +See the `math directive`_ (producing display formulas) for more info +on mathematical notation in reStructuredText. + +.. _math directive: directives.html#math + + +``:pep-reference:`` +=================== + +:Aliases: ``:PEP:`` +:DTD Element: reference +:Customization: + :Options: class_. + :Content: None. + +The ``:pep-reference:`` role is used to create an HTTP reference to a +PEP (Python Enhancement Proposal). The ``:PEP:`` alias is usually +used. For example:: + + See :PEP:`287` for more information about reStructuredText. + +This is equivalent to:: + + See `PEP 287`__ for more information about reStructuredText. + + __ http://www.python.org/peps/pep-0287.html + + +``:rfc-reference:`` +=================== + +:Aliases: ``:RFC:`` +:DTD Element: reference +:Customization: + :Options: class_. + :Content: None. + +The ``:rfc-reference:`` role is used to create an HTTP reference to an +RFC (Internet Request for Comments). The ``:RFC:`` alias is usually +used. For example:: + + See :RFC:`2822` for information about email headers. + +This is equivalent to:: + + See `RFC 2822`__ for information about email headers. + + __ http://www.faqs.org/rfcs/rfc2822.html + + +``:strong:`` +============ + +:Aliases: None +:DTD Element: strong +:Customization: + :Options: class_. + :Content: None. + +Implements strong emphasis. These are equivalent:: + + **text** + :strong:`text` + + +``:subscript:`` +=============== + +:Aliases: ``:sub:`` +:DTD Element: subscript +:Customization: + :Options: class_. + :Content: None. + +Implements subscripts. + +.. Tip:: + + Whitespace or punctuation is required around interpreted text, but + often not desired with subscripts & superscripts. + Backslash-escaped whitespace can be used; the whitespace will be + removed from the processed document:: + + H\ :sub:`2`\ O + E = mc\ :sup:`2` + + In such cases, readability of the plain text can be greatly + improved with substitutions:: + + The chemical formula for pure water is |H2O|. + + .. |H2O| replace:: H\ :sub:`2`\ O + + See `the reStructuredText spec`__ for further information on + `character-level markup`__ and `the substitution mechanism`__. + + __ restructuredtext.html + __ restructuredtext.html#character-level-inline-markup + __ restructuredtext.html#substitution-references + + +``:superscript:`` +================= + +:Aliases: ``:sup:`` +:DTD Element: superscript +:Customization: + :Options: class_. + :Content: None. + +Implements superscripts. See the tip in `:subscript:`_ above. + + +``:title-reference:`` +===================== + +:Aliases: ``:title:``, ``:t:``. +:DTD Element: title_reference +:Customization: + :Options: class_. + :Content: None. + +The ``:title-reference:`` role is used to describe the titles of +books, periodicals, and other materials. It is the equivalent of the +HTML "cite" element, and it is expected that HTML writers will +typically render "title_reference" elements using "cite". + +Since title references are typically rendered with italics, they are +often marked up using ``*emphasis*``, which is misleading and vague. +The "title_reference" element provides accurate and unambiguous +descriptive markup. + +Let's assume ``:title-reference:`` is the default interpreted text +role (see below) for this example:: + + `Design Patterns` [GoF95]_ is an excellent read. + +The following document fragment (pseudo-XML_) will result from +processing:: + + <paragraph> + <title_reference> + Design Patterns + + <citation_reference refname="gof95"> + GoF95 + is an excellent read. + +``:title-reference:`` is the default interpreted text role in the +standard reStructuredText parser. This means that no explicit role is +required. Applications of reStructuredText may designate a different +default role, in which case the explicit ``:title-reference:`` role +must be used to obtain a ``title_reference`` element. + + +.. _pseudo-XML: ../doctree.html#pseudo-xml + + +------------------- + Specialized Roles +------------------- + +``raw`` +======= + +:Aliases: None +:DTD Element: raw +:Customization: + :Options: class_, format + :Content: None + +.. WARNING:: + + The "raw" role is a stop-gap measure allowing the author to bypass + reStructuredText's markup. It is a "power-user" feature that + should not be overused or abused. The use of "raw" ties documents + to specific output formats and makes them less portable. + + If you often need to use "raw"-derived interpreted text roles or + the "raw" directive, that is a sign either of overuse/abuse or that + functionality may be missing from reStructuredText. Please + describe your situation in a message to the Docutils-users_ mailing + list. + + .. _Docutils-users: ../../user/mailing-lists.html#docutils-user + +The "raw" role indicates non-reStructuredText data that is to be +passed untouched to the Writer. It is the inline equivalent of the +`"raw" directive`_; see its documentation for details on the +semantics. + +.. _"raw" directive: directives.html#raw-directive + +The "raw" role cannot be used directly. The `"role" directive`_ must +first be used to build custom roles based on the "raw" role. One or +more formats (Writer names) must be provided in a "format" option. + +For example, the following creates an HTML-specific "raw-html" role:: + + .. role:: raw-html(raw) + :format: html + +This role can now be used directly to pass data untouched to the HTML +Writer. For example:: + + If there just *has* to be a line break here, + :raw-html:`<br />` + it can be accomplished with a "raw"-derived role. + But the line block syntax should be considered first. + +.. Tip:: Roles based on "raw" should clearly indicate their origin, so + they are not mistaken for reStructuredText markup. Using a "raw-" + prefix for role names is recommended. + +In addition to "class_", the following option is recognized: + +``format`` : text + One or more space-separated output format names (Writer names). diff --git a/tox.ini b/tox.ini index 834e8e7..876d6c9 100644 --- a/tox.ini +++ b/tox.ini @@ -1,8 +1,11 @@ [tox] -envlist = py37 +envlist = py38 isolated_build = True [testenv:py{36,37,38,39}] +deps = + black + flake8 extras = sphinx test @@ -10,9 +13,19 @@ commands = pytest {posargs} [testenv:cli] extras = sphinx -deps = sphinx-panels commands = rst2myst {posargs} +[testenv:docs-{clean,update}] +extras = docs +allowlist_externals = + rm + echo +commands = + clean: rm -rf docs/_build + sphinx-build -n -W --keep-going -c docs/source docs/source docs/_build +commands_post = echo "open docs/_build/index.html" + + [flake8] max-line-length = 88 max-complexity = 13