Skip to content

Commit

Permalink
Add validation of enumerated values
Browse files Browse the repository at this point in the history
- directly mentioned and linked values are handled,
  some indirectly defined values are not handled yet
  • Loading branch information
mrbean-bremen committed Jan 7, 2024
1 parent 9118c37 commit 4b23b82
Show file tree
Hide file tree
Showing 14 changed files with 344 additions and 24 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ dist/

# Tox environments
.tox/

# pytest
.pytest_cache

# DICOM standard (downloaded on demand)
dicom_validator/tests/fixtures/standard/
5 changes: 4 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ The released versions correspond to PyPi releases.

## [Version 0.5.0] (Unreleased)

### Features
* added checking of most enumerated values (see [#54](../../issues/54))

### Infrastructure
* add CI tests for Python 3.12
* added CI tests for Python 3.12

## [Version 0.4.1](https://pypi.python.org/pypi/dicom-validator/0.4.1) (2023-11-09)
Mostly a bugfix release for the condition parser.
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ the first time.
## validate_iods

This checks a given DICOM file, or all DICOM files recursively in a given
directory, for correct tags for the related SOP class. Only the presence or
absence of the tag, and the presence of a tag value is checked, not the
contained value itself (a check for correct enumerated values may be added later).
directory, for correct tags for the related SOP class. The presence or
absence of the tag and the presence of a tag value are checked, and in the
case of an enumeration defined for the value, the value is also check for validity.
More checks may be added later.
This is done by looking up all required and optional modules for this
SOP class, and checking the tags for these modules. Tags that are not allowed or
missing in a module are listed. Parts 3 and 4 of the DICOM standard are used
Expand Down
1 change: 0 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#### Parse attribute description
* add number of allowed sequences items (1 vs 1-n)
* add restriction for number of items where available
* allowed enum values

#### Parse attribute and module conditions
* collect all used styles for tests :on:
Expand Down
83 changes: 83 additions & 0 deletions dicom_validator/spec_reader/enum_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from typing import Callable, Optional, Dict

from pydicom.valuerep import VR, INT_VR, STR_VR

try:
import lxml.etree as ElementTree
except ImportError:
import xml.etree.ElementTree as ElementTree

from dicom_validator.spec_reader.condition import ValuesType


class EnumParser:
"""Parses enumerated values for a tag."""

docbook_ns = "{http://docbook.org/ns/docbook}"

def __init__(
self, find_section: Callable[[str], Optional[ElementTree.Element]]
) -> None:
self._find_section = find_section
self._enum_cache: Dict[str, ValuesType] = {}

def parse(self, node: ElementTree.Element, vr: VR) -> ValuesType:
"""Searches for enumerated values in the tag description and in linked sections.
Returns a list of the allowed values, or an empty list if none found.
"""
var_list = node.find(self.docbook_ns + "variablelist")
if var_list is not None:
enums = self.parse_variable_list(var_list)
else:
enums = self.parse_linked_variablelist(node)
if enums:
if vr == VR.AT:
return [] # this is included in INT_VRs, but won't work
if vr in INT_VR:
int_enums: ValuesType = []
for e in enums:
assert isinstance(e, str)
if e.endswith("H"):
int_enums.append(int(e[:-1], 16))
else:
int_enums.append(int(e))
return int_enums
if vr in STR_VR:
return enums
# any other VR does not make sense here
print(
f"Ignoring enum values: "
f"{', '.join([str(e) for e in enums])} with VR {vr}"
)
return []

def parse_variable_list(self, var_list) -> ValuesType:
# we assume that a variablelist contains enumerated values or defined terms
# we ignore defined terms, as they do not limit the possible values
title = var_list.find(self.docbook_ns + "title")
# TODO: handle cases with conditions
if title is None or title.text.lower() not in (
"enumerated values",
"enumerated values:",
):
return []
terms = []
for item in var_list.findall(self.docbook_ns + "varlistentry"):
term = item.find(self.docbook_ns + "term")
if term is not None:
terms.append(term.text)
return terms

def parse_linked_variablelist(self, node) -> ValuesType:
for xref in node.findall(f"{self.docbook_ns}para/{self.docbook_ns}xref"):
link = xref.attrib.get("linkend")
if link and link.startswith("sect_"):
if link in self._enum_cache:
return self._enum_cache[link]
section = self._find_section(link[5:])
if section is not None:
var_list = section.find(f"{self.docbook_ns}variablelist")
if var_list is not None:
self._enum_cache[link] = self.parse_variable_list(var_list)
return self._enum_cache[link]
return []
17 changes: 13 additions & 4 deletions dicom_validator/spec_reader/part3_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from dicom_validator.spec_reader.condition import ConditionType, ConditionOperator
from dicom_validator.spec_reader.condition_parser import ConditionParser
from dicom_validator.spec_reader.enum_parser import EnumParser
from dicom_validator.spec_reader.spec_reader import (
SpecReader,
SpecReaderParseError,
Expand All @@ -23,7 +24,6 @@ class Part3Reader(SpecReader):
def __init__(self, spec_dir, dict_info):
super(Part3Reader, self).__init__(spec_dir)
self.part_nr = 3
self._condition_parser = None
self._dict_info = dict_info
self._iod_descriptions = {}
self._iod_nodes = {}
Expand All @@ -33,6 +33,10 @@ def __init__(self, spec_dir, dict_info):
if not self.logger.hasHandlers():
self.logger.addHandler(logging.StreamHandler(sys.stdout))
self._condition_parser = ConditionParser(self._dict_info)
self._enum_parser = EnumParser(self.find_section)

def find_section(self, name):
return self.get_doc_root().find(f".//{self.docbook_ns}section[@label='{name}']")

def iod_description(self, chapter):
"""Return the IOD information for the given chapter.
Expand Down Expand Up @@ -108,7 +112,7 @@ def module_descriptions(self):

def _get_iod_nodes(self):
if not self._iod_nodes:
chapter_a = self._find(self._get_doc_root(), ['chapter[@label="A"]'])
chapter_a = self._find(self.get_doc_root(), ['chapter[@label="A"]'])
if chapter_a is None:
raise SpecReaderParseError("Chapter A in Part 3 not found")
# ignore A.1
Expand Down Expand Up @@ -142,15 +146,15 @@ def _get_section_node(self, section):
for section_part in section_parts[1:]:
section_name = section_name + "." + section_part
search_path.append(f'section[@label="{section_name}"]')
section_node = self._find(self._get_doc_root(), search_path)
section_node = self._find(self.get_doc_root(), search_path)
# handle incorrect nesting in specific section
if (
section_node is None
and section_parts[:-1] == ["C", "8", "31"]
and int(section_parts[-1]) > 6
):
search_path.insert(-1, 'section[@label="C.8.31.6"]')
section_node = self._find(self._get_doc_root(), search_path)
section_node = self._find(self.get_doc_root(), search_path)
return section_node

def _parse_iod_node(self, iod_node):
Expand Down Expand Up @@ -250,6 +254,11 @@ def _handle_regular_attribute(
current_descriptions[-1][tag_id]["cond"] = self._condition_parser.parse(
self._find_all_text(columns[3])
)
info = self._dict_info.get(tag_id)
if info:
enum_values = self._enum_parser.parse(columns[3], info["vr"])
if enum_values:
current_descriptions[-1][tag_id]["enums"] = enum_values

last_tag_id = tag_id
return last_tag_id
Expand Down
2 changes: 1 addition & 1 deletion dicom_validator/spec_reader/part4_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def iod_chapters(self):

def _read_sop_table(self, chapter):
table = self._find(
self._get_doc_root(),
self.get_doc_root(),
['chapter[@label="B"]', f'section[@label="{chapter}"]', "table", "tbody"],
)
if table is None:
Expand Down
4 changes: 2 additions & 2 deletions dicom_validator/spec_reader/part6_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def data_element(self, tag_id):
def _read_element_table(self):
self._data_elements = {}
table = self._find(
self._get_doc_root(), ['chapter[@label="6"]', "table", "tbody"]
self.get_doc_root(), ['chapter[@label="6"]', "table", "tbody"]
)
if table is None:
raise SpecReaderParseError(
Expand Down Expand Up @@ -96,7 +96,7 @@ def _get_uids(self):
if self._uids is None:
self._uids = {}
table = self._find(
self._get_doc_root(), ['chapter[@label="A"]', "table", "tbody"]
self.get_doc_root(), ['chapter[@label="A"]', "table", "tbody"]
)
if table is None:
raise SpecReaderParseError(
Expand Down
2 changes: 1 addition & 1 deletion dicom_validator/spec_reader/spec_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _get_doc_tree(self):
)
return self._doc_trees.get(self.part_nr)

def _get_doc_root(self):
def get_doc_root(self):
doc_tree = self._get_doc_tree()
if doc_tree:
return doc_tree.getroot()
Expand Down
142 changes: 142 additions & 0 deletions dicom_validator/tests/spec_reader/test_enum_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from typing import Optional, Generator
from xml.etree import ElementTree

import pytest
from pydicom.valuerep import VR

from dicom_validator.spec_reader.enum_parser import EnumParser
from dicom_validator.spec_reader.part3_reader import Part3Reader


@pytest.fixture
def chapter_c(dict_reader, spec_path):
yield Part3Reader(spec_path, dict_reader.data_elements()).get_doc_root().find(
['chapter[@label="C"]']
)


def find_chapter(name: str):
return None


@pytest.fixture
def parser() -> Generator[EnumParser, None, None]:
yield EnumParser(find_chapter)


def section(contents, label="C.42.3.5") -> Optional[ElementTree.Element]:
xml = f"""<?xml version="1.0" encoding="utf-8"?>
<book xmlns="http://docbook.org/ns/docbook" xmlns:xl="http://www.w3.org/1999/xlink">
<chapter label="C"><section label="{label}" xml:id="sect_{label}">
{contents}
</section></chapter>
</book>"""
doc = ElementTree.fromstring(xml)
return doc.find(f".//{{http://docbook.org/ns/docbook}}section[@label='{label}']")


class TestEmptyEnumParser:
@pytest.mark.section_content("")
def test_empty_description(self, parser):
assert parser.parse(section(""), VR.SH) == []

def test_incorrect_tag(self, parser):
content = """<variablelist>
<div>Enumerated Values:</div>
<varlistentry>
<term>NO</term>
</varlistentry>
</variablelist>"""
assert parser.parse(section(content), VR.SS) == []

def test_empty_list(self, parser):
content = """<variablelist>
<title>Enumerated Values:</title>
</variablelist>"""
assert parser.parse(section(content), VR.LO) == []


class TestEnumParser:
@pytest.mark.section_content()
def test_single_enum(self, parser):
content = """<variablelist>
<title>Enumerated Values:</title>
<varlistentry>
<term>NO</term>
</varlistentry>
</variablelist>"""
assert parser.parse(section(content), VR.SH) == ["NO"]

def test_single_enum_with_extra_tag(self, parser):
content = """<variablelist>
<title>Enumerated Values:</title>
<varlistentry>
<listitem>
<para xml:id="para_f6578fe2-d628-412e-8314-af2c8961b633"/>
</listitem>
<term>NO</term>
</varlistentry>
</variablelist>"""
assert parser.parse(section(content), VR.SH) == ["NO"]

def test_two_enums(self, parser):
content = """<variablelist>
<title>Enumerated Values:</title>
<varlistentry>
<term>YES</term>
</varlistentry>
<varlistentry>
<term>NO</term>
</varlistentry>
</variablelist>"""
assert parser.parse(section(content), VR.SH) == ["YES", "NO"]

def test_int_enums(self, parser):
content = """<variablelist>
<title>Enumerated Values:</title>
<varlistentry>
<term>0000</term>
</varlistentry>
<varlistentry>
<term>0001</term>
</varlistentry>
</variablelist>"""
assert parser.parse(section(content), VR.US) == [0, 1]

def test_hex_enums(self, parser):
content = """<variablelist>
<title>Enumerated Values:</title>
<varlistentry>
<term>0010H</term>
</varlistentry>
<varlistentry>
<term>0011H</term>
</varlistentry>
</variablelist>"""
assert parser.parse(section(content), VR.US) == [16, 17]

def test_linked_enum(self):
content = """<para>Bla blah, see
<xref linkend="sect_10.7.1.2" xrefstyle="select: label"/>.</para>"
"""
linked = """
<title>Pixel Spacing Calibration Type</title>
<para xml:id="para_f24c1">Pixel Spacing Calibration Type.</para>
<variablelist spacing="compact">
<title>Enumerated Values:</title>
<varlistentry>
<term>GEOMETRY</term>
<listitem>
<para xml:id="para_a79e2faf">Description...</para>
</listitem>
</varlistentry>
<varlistentry>
<term>FIDUCIAL</term>
<listitem>
<para xml:id="para_10de0799">Another...</para>
</listitem>
</varlistentry>
</variablelist>
"""
parser = EnumParser(lambda s: section(linked, s))
assert parser.parse(section(content), VR.SH) == ["GEOMETRY", "FIDUCIAL"]
17 changes: 17 additions & 0 deletions dicom_validator/tests/spec_reader/test_part3_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,20 @@ def test_conditional_include_in_sr_module(self, reader):
assert condition.operator == ConditionOperator.EqualsValue
assert condition.tag == "(0040,A040)"
assert condition.values == ["NUM"]

def test_parsed_enum_values(self, reader):
description = reader.module_description("10.25")
assert "(0082,0036)" in description
assert "enums" in description["(0082,0036)"]
enums = description["(0082,0036)"]["enums"]
assert enums == ["FAILURE", "WARNING", "INFORMATIVE"]

def test_linked_enum_values(self, reader):
description = reader.module_description("10.24")
assert "(300A,0450)" in description # Device Motion Control Sequence
assert "items" in description["(300A,0450)"]
assert "(300A,0451)" in description["(300A,0450)"]["items"]
# Device Motion Execution Mode
tag = description["(300A,0450)"]["items"]["(300A,0451)"]
assert "enums" in tag
assert tag["enums"] == ["CONTINUOUS", "TRIGGERED", "AUTOMATIC"]
Loading

0 comments on commit 4b23b82

Please sign in to comment.