Add validation of enumerated values

- directly mentioned and linked values are handled, some indirectly defined values are not handled yet
pydicom · Jan 7, 2024 · 4b23b82 · 4b23b82
1 parent 9118c37
commit 4b23b82
Show file tree

Hide file tree

Showing 14 changed files with 344 additions and 24 deletions.
diff --git a/.gitignore b/.gitignore
@@ -14,3 +14,9 @@ dist/
 
 # Tox environments
 .tox/
+
+# pytest
+.pytest_cache
+
+# DICOM standard (downloaded on demand)
+dicom_validator/tests/fixtures/standard/
diff --git a/CHANGES.md b/CHANGES.md
@@ -3,8 +3,11 @@ The released versions correspond to PyPi releases.
 
 ## [Version 0.5.0] (Unreleased)
 
+### Features
+* added checking of most enumerated values (see [#54](../../issues/54))
+
 ### Infrastructure
-* add CI tests for Python 3.12
+* added CI tests for Python 3.12
 
 ## [Version 0.4.1](https://pypi.python.org/pypi/dicom-validator/0.4.1) (2023-11-09)
 Mostly a bugfix release for the condition parser.

diff --git a/README.md b/README.md
@@ -73,9 +73,10 @@ the first time.
 ## validate_iods
 
 This checks a given DICOM file, or all DICOM files recursively in a given
-directory, for correct tags for the related SOP class. Only the presence or
-absence of the tag, and the presence of a tag value is checked, not the
-contained value itself (a check for correct enumerated values may be added later).
+directory, for correct tags for the related SOP class. The presence or
+absence of the tag and the presence of a tag value are checked, and in the
+case of an enumeration defined for the value, the value is also check for validity.
+More checks may be added later.
 This is done by looking up all required and optional modules for this
 SOP class, and checking the tags for these modules. Tags that are not allowed or
 missing in a module are listed. Parts 3 and 4 of the DICOM standard are used

diff --git a/TODO.md b/TODO.md
@@ -6,7 +6,6 @@
 #### Parse attribute description
 * add number of allowed sequences items (1 vs 1-n)
 * add restriction for number of items where available
-* allowed enum values
 
 #### Parse attribute and module conditions
 * collect all used styles for tests :on:

diff --git a/dicom_validator/spec_reader/enum_parser.py b/dicom_validator/spec_reader/enum_parser.py
@@ -0,0 +1,83 @@
+from typing import Callable, Optional, Dict
+
+from pydicom.valuerep import VR, INT_VR, STR_VR
+
+try:
+    import lxml.etree as ElementTree
+except ImportError:
+    import xml.etree.ElementTree as ElementTree
+
+from dicom_validator.spec_reader.condition import ValuesType
+
+
+class EnumParser:
+    """Parses enumerated values for a tag."""
+
+    docbook_ns = "{http://docbook.org/ns/docbook}"
+
+    def __init__(
+        self, find_section: Callable[[str], Optional[ElementTree.Element]]
+    ) -> None:
+        self._find_section = find_section
+        self._enum_cache: Dict[str, ValuesType] = {}
+
+    def parse(self, node: ElementTree.Element, vr: VR) -> ValuesType:
+        """Searches for enumerated values in the tag description and in linked sections.
+        Returns a list of the allowed values, or an empty list if none found.
+        """
+        var_list = node.find(self.docbook_ns + "variablelist")
+        if var_list is not None:
+            enums = self.parse_variable_list(var_list)
+        else:
+            enums = self.parse_linked_variablelist(node)
+        if enums:
+            if vr == VR.AT:
+                return []  # this is included in INT_VRs, but won't work
+            if vr in INT_VR:
+                int_enums: ValuesType = []
+                for e in enums:
+                    assert isinstance(e, str)
+                    if e.endswith("H"):
+                        int_enums.append(int(e[:-1], 16))
+                    else:
+                        int_enums.append(int(e))
+                return int_enums
+            if vr in STR_VR:
+                return enums
+            # any other VR does not make sense here
+            print(
+                f"Ignoring enum values: "
+                f"{', '.join([str(e) for e in enums])} with VR {vr}"
+            )
+        return []
+
+    def parse_variable_list(self, var_list) -> ValuesType:
+        # we assume that a variablelist contains enumerated values or defined terms
+        # we ignore defined terms, as they do not limit the possible values
+        title = var_list.find(self.docbook_ns + "title")
+        # TODO: handle cases with conditions
+        if title is None or title.text.lower() not in (
+            "enumerated values",
+            "enumerated values:",
+        ):
+            return []
+        terms = []
+        for item in var_list.findall(self.docbook_ns + "varlistentry"):
+            term = item.find(self.docbook_ns + "term")
+            if term is not None:
+                terms.append(term.text)
+        return terms
+
+    def parse_linked_variablelist(self, node) -> ValuesType:
+        for xref in node.findall(f"{self.docbook_ns}para/{self.docbook_ns}xref"):
+            link = xref.attrib.get("linkend")
+            if link and link.startswith("sect_"):
+                if link in self._enum_cache:
+                    return self._enum_cache[link]
+                section = self._find_section(link[5:])
+                if section is not None:
+                    var_list = section.find(f"{self.docbook_ns}variablelist")
+                    if var_list is not None:
+                        self._enum_cache[link] = self.parse_variable_list(var_list)
+                        return self._enum_cache[link]
+        return []
diff --git a/dicom_validator/spec_reader/part3_reader.py b/dicom_validator/spec_reader/part3_reader.py
@@ -10,6 +10,7 @@
 
 from dicom_validator.spec_reader.condition import ConditionType, ConditionOperator
 from dicom_validator.spec_reader.condition_parser import ConditionParser
+from dicom_validator.spec_reader.enum_parser import EnumParser
 from dicom_validator.spec_reader.spec_reader import (
     SpecReader,
     SpecReaderParseError,
@@ -23,7 +24,6 @@ class Part3Reader(SpecReader):
     def __init__(self, spec_dir, dict_info):
         super(Part3Reader, self).__init__(spec_dir)
         self.part_nr = 3
-        self._condition_parser = None
         self._dict_info = dict_info
         self._iod_descriptions = {}
         self._iod_nodes = {}
@@ -33,6 +33,10 @@ def __init__(self, spec_dir, dict_info):
         if not self.logger.hasHandlers():
             self.logger.addHandler(logging.StreamHandler(sys.stdout))
         self._condition_parser = ConditionParser(self._dict_info)
+        self._enum_parser = EnumParser(self.find_section)
+
+    def find_section(self, name):
+        return self.get_doc_root().find(f".//{self.docbook_ns}section[@label='{name}']")
 
     def iod_description(self, chapter):
         """Return the IOD information for the given chapter.
@@ -108,7 +112,7 @@ def module_descriptions(self):
 
     def _get_iod_nodes(self):
         if not self._iod_nodes:
-            chapter_a = self._find(self._get_doc_root(), ['chapter[@label="A"]'])
+            chapter_a = self._find(self.get_doc_root(), ['chapter[@label="A"]'])
             if chapter_a is None:
                 raise SpecReaderParseError("Chapter A in Part 3 not found")
             # ignore A.1
@@ -142,15 +146,15 @@ def _get_section_node(self, section):
         for section_part in section_parts[1:]:
             section_name = section_name + "." + section_part
             search_path.append(f'section[@label="{section_name}"]')
-        section_node = self._find(self._get_doc_root(), search_path)
+        section_node = self._find(self.get_doc_root(), search_path)
         # handle incorrect nesting in specific section
         if (
             section_node is None
             and section_parts[:-1] == ["C", "8", "31"]
             and int(section_parts[-1]) > 6
         ):
             search_path.insert(-1, 'section[@label="C.8.31.6"]')
-            section_node = self._find(self._get_doc_root(), search_path)
+            section_node = self._find(self.get_doc_root(), search_path)
         return section_node
 
     def _parse_iod_node(self, iod_node):
@@ -250,6 +254,11 @@ def _handle_regular_attribute(
                 current_descriptions[-1][tag_id]["cond"] = self._condition_parser.parse(
                     self._find_all_text(columns[3])
                 )
+            info = self._dict_info.get(tag_id)
+            if info:
+                enum_values = self._enum_parser.parse(columns[3], info["vr"])
+                if enum_values:
+                    current_descriptions[-1][tag_id]["enums"] = enum_values
 
             last_tag_id = tag_id
         return last_tag_id

diff --git a/dicom_validator/spec_reader/part4_reader.py b/dicom_validator/spec_reader/part4_reader.py
@@ -38,7 +38,7 @@ def iod_chapters(self):
 
     def _read_sop_table(self, chapter):
         table = self._find(
-            self._get_doc_root(),
+            self.get_doc_root(),
             ['chapter[@label="B"]', f'section[@label="{chapter}"]', "table", "tbody"],
         )
         if table is None:

diff --git a/dicom_validator/spec_reader/part6_reader.py b/dicom_validator/spec_reader/part6_reader.py
@@ -44,7 +44,7 @@ def data_element(self, tag_id):
     def _read_element_table(self):
         self._data_elements = {}
         table = self._find(
-            self._get_doc_root(), ['chapter[@label="6"]', "table", "tbody"]
+            self.get_doc_root(), ['chapter[@label="6"]', "table", "tbody"]
         )
         if table is None:
             raise SpecReaderParseError(
@@ -96,7 +96,7 @@ def _get_uids(self):
         if self._uids is None:
             self._uids = {}
             table = self._find(
-                self._get_doc_root(), ['chapter[@label="A"]', "table", "tbody"]
+                self.get_doc_root(), ['chapter[@label="A"]', "table", "tbody"]
             )
             if table is None:
                 raise SpecReaderParseError(

diff --git a/dicom_validator/spec_reader/spec_reader.py b/dicom_validator/spec_reader/spec_reader.py
@@ -49,7 +49,7 @@ def _get_doc_tree(self):
                 )
         return self._doc_trees.get(self.part_nr)
 
-    def _get_doc_root(self):
+    def get_doc_root(self):
         doc_tree = self._get_doc_tree()
         if doc_tree:
             return doc_tree.getroot()

diff --git a/dicom_validator/tests/spec_reader/test_enum_parser.py b/dicom_validator/tests/spec_reader/test_enum_parser.py
@@ -0,0 +1,142 @@
+from typing import Optional, Generator
+from xml.etree import ElementTree
+
+import pytest
+from pydicom.valuerep import VR
+
+from dicom_validator.spec_reader.enum_parser import EnumParser
+from dicom_validator.spec_reader.part3_reader import Part3Reader
+
+
+@pytest.fixture
+def chapter_c(dict_reader, spec_path):
+    yield Part3Reader(spec_path, dict_reader.data_elements()).get_doc_root().find(
+        ['chapter[@label="C"]']
+    )
+
+
+def find_chapter(name: str):
+    return None
+
+
+@pytest.fixture
+def parser() -> Generator[EnumParser, None, None]:
+    yield EnumParser(find_chapter)
+
+
+def section(contents, label="C.42.3.5") -> Optional[ElementTree.Element]:
+    xml = f"""<?xml version="1.0" encoding="utf-8"?>
+    <book xmlns="http://docbook.org/ns/docbook" xmlns:xl="http://www.w3.org/1999/xlink">
+    <chapter label="C"><section label="{label}" xml:id="sect_{label}">
+    {contents}
+    </section></chapter>
+    </book>"""
+    doc = ElementTree.fromstring(xml)
+    return doc.find(f".//{{http://docbook.org/ns/docbook}}section[@label='{label}']")
+
+
+class TestEmptyEnumParser:
+    @pytest.mark.section_content("")
+    def test_empty_description(self, parser):
+        assert parser.parse(section(""), VR.SH) == []
+
+    def test_incorrect_tag(self, parser):
+        content = """<variablelist>
+        <div>Enumerated Values:</div>
+        <varlistentry>
+        <term>NO</term>
+        </varlistentry>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.SS) == []
+
+    def test_empty_list(self, parser):
+        content = """<variablelist>
+        <title>Enumerated Values:</title>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.LO) == []
+
+
+class TestEnumParser:
+    @pytest.mark.section_content()
+    def test_single_enum(self, parser):
+        content = """<variablelist>
+        <title>Enumerated Values:</title>
+        <varlistentry>
+        <term>NO</term>
+        </varlistentry>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.SH) == ["NO"]
+
+    def test_single_enum_with_extra_tag(self, parser):
+        content = """<variablelist>
+        <title>Enumerated Values:</title>
+        <varlistentry>
+        <listitem>
+        <para xml:id="para_f6578fe2-d628-412e-8314-af2c8961b633"/>
+        </listitem>
+        <term>NO</term>
+        </varlistentry>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.SH) == ["NO"]
+
+    def test_two_enums(self, parser):
+        content = """<variablelist>
+        <title>Enumerated Values:</title>
+        <varlistentry>
+        <term>YES</term>
+        </varlistentry>
+        <varlistentry>
+        <term>NO</term>
+        </varlistentry>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.SH) == ["YES", "NO"]
+
+    def test_int_enums(self, parser):
+        content = """<variablelist>
+        <title>Enumerated Values:</title>
+        <varlistentry>
+        <term>0000</term>
+        </varlistentry>
+        <varlistentry>
+        <term>0001</term>
+        </varlistentry>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.US) == [0, 1]
+
+    def test_hex_enums(self, parser):
+        content = """<variablelist>
+        <title>Enumerated Values:</title>
+        <varlistentry>
+        <term>0010H</term>
+        </varlistentry>
+        <varlistentry>
+        <term>0011H</term>
+        </varlistentry>
+        </variablelist>"""
+        assert parser.parse(section(content), VR.US) == [16, 17]
+
+    def test_linked_enum(self):
+        content = """<para>Bla blah, see
+        <xref linkend="sect_10.7.1.2" xrefstyle="select: label"/>.</para>"
+        """
+        linked = """
+        <title>Pixel Spacing Calibration Type</title>
+        <para xml:id="para_f24c1">Pixel Spacing Calibration Type.</para>
+        <variablelist spacing="compact">
+            <title>Enumerated Values:</title>
+            <varlistentry>
+                <term>GEOMETRY</term>
+                <listitem>
+                    <para xml:id="para_a79e2faf">Description...</para>
+                </listitem>
+            </varlistentry>
+            <varlistentry>
+                <term>FIDUCIAL</term>
+                <listitem>
+                    <para xml:id="para_10de0799">Another...</para>
+                </listitem>
+            </varlistentry>
+        </variablelist>
+        """
+        parser = EnumParser(lambda s: section(linked, s))
+        assert parser.parse(section(content), VR.SH) == ["GEOMETRY", "FIDUCIAL"]
diff --git a/dicom_validator/tests/spec_reader/test_part3_reader.py b/dicom_validator/tests/spec_reader/test_part3_reader.py
@@ -168,3 +168,20 @@ def test_conditional_include_in_sr_module(self, reader):
         assert condition.operator == ConditionOperator.EqualsValue
         assert condition.tag == "(0040,A040)"
         assert condition.values == ["NUM"]
+
+    def test_parsed_enum_values(self, reader):
+        description = reader.module_description("10.25")
+        assert "(0082,0036)" in description
+        assert "enums" in description["(0082,0036)"]
+        enums = description["(0082,0036)"]["enums"]
+        assert enums == ["FAILURE", "WARNING", "INFORMATIVE"]
+
+    def test_linked_enum_values(self, reader):
+        description = reader.module_description("10.24")
+        assert "(300A,0450)" in description  # Device Motion Control Sequence
+        assert "items" in description["(300A,0450)"]
+        assert "(300A,0451)" in description["(300A,0450)"]["items"]
+        # Device Motion Execution Mode
+        tag = description["(300A,0450)"]["items"]["(300A,0451)"]
+        assert "enums" in tag
+        assert tag["enums"] == ["CONTINUOUS", "TRIGGERED", "AUTOMATIC"]