From 50abe1c3a7b85a9ac59b0399ce4e789939f18422 Mon Sep 17 00:00:00 2001 From: Jan Range Date: Thu, 19 Dec 2024 15:23:52 +0100 Subject: [PATCH] store document positions upon parsing --- src/attribute.rs | 15 +++++- src/datamodel.rs | 6 +++ src/json/export.rs | 2 + src/markdown/parser.rs | 107 +++++++++++++++++++++++++++++++---------- src/object.rs | 27 ++++++++++- 5 files changed, 129 insertions(+), 28 deletions(-) diff --git a/src/attribute.rs b/src/attribute.rs index 9f71e4d..1e662f2 100644 --- a/src/attribute.rs +++ b/src/attribute.rs @@ -21,7 +21,7 @@ * */ -use crate::xmltype::XMLType; +use crate::{markdown::parser::Position, xmltype::XMLType}; use serde::{de::Visitor, Deserialize, Serialize}; use std::{error::Error, fmt, str::FromStr}; @@ -57,6 +57,9 @@ pub struct Attribute { pub xml: Option, /// Is an enumeration or not pub is_enum: bool, + /// The line number of the attribute + #[serde(skip_serializing)] + pub position: Option, } impl Attribute { @@ -79,6 +82,7 @@ impl Attribute { xml: Some(XMLType::from_str(name.as_str()).unwrap()), default: None, is_enum: false, + position: None, } } @@ -91,6 +95,15 @@ impl Attribute { self.docstring = docstring; } + /// Sets the line number of the attribute. + /// + /// # Arguments + /// + /// * `position` - The position to set. + pub fn set_position(&mut self, position: Position) { + self.position = Some(position); + } + /// Adds an option to the attribute. /// /// # Arguments diff --git a/src/datamodel.rs b/src/datamodel.rs index aa86d73..a46b66f 100644 --- a/src/datamodel.rs +++ b/src/datamodel.rs @@ -359,6 +359,7 @@ mod tests { xml: None, default: None, is_enum: false, + position: None, }); let mut obj2 = Object::new("Object2".to_string(), None); @@ -374,18 +375,21 @@ mod tests { xml: None, default: None, is_enum: false, + position: None, }); let enm1 = Enumeration { name: "Enum1".to_string(), mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]), docstring: "".to_string(), + position: None, }; let enm2 = Enumeration { name: "Enum2".to_string(), mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]), docstring: "".to_string(), + position: None, }; model1.objects.push(obj1); @@ -422,6 +426,7 @@ mod tests { xml: None, default: Some(DataType::String("".to_string())), is_enum: false, + position: None, }); obj.add_attribute(crate::attribute::Attribute { @@ -436,6 +441,7 @@ mod tests { xml: None, default: None, is_enum: false, + position: None, }); model.objects.push(obj); diff --git a/src/json/export.rs b/src/json/export.rs index da9b71c..166f936 100644 --- a/src/json/export.rs +++ b/src/json/export.rs @@ -530,6 +530,7 @@ mod tests { default: None, xml: None, is_enum: false, + position: None, }; let property: schema::Property = @@ -563,6 +564,7 @@ mod tests { default: None, xml: None, is_enum: false, + position: None, }; let property: schema::Property = diff --git a/src/markdown/parser.rs b/src/markdown/parser.rs index 6b46bfe..7d8b82b 100644 --- a/src/markdown/parser.rs +++ b/src/markdown/parser.rs @@ -25,10 +25,11 @@ use colored::Colorize; use core::panic; use lazy_static::lazy_static; use log::error; +use serde::{Deserialize, Serialize}; use std::collections::BTreeMap; use std::error::Error; -use pulldown_cmark::{CowStr, Event, HeadingLevel, Parser, Tag, TagEnd}; +use pulldown_cmark::{CowStr, Event, HeadingLevel, OffsetIter, Options, Parser, Tag, TagEnd}; use regex::Regex; use crate::attribute; @@ -82,6 +83,13 @@ enum ParserState { InHeading, } +// Add this struct to track positions +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +pub struct Position { + pub line: usize, + pub range: (usize, usize), +} + /// Parses a Markdown file at the given path and returns a `DataModel`. /// /// # Arguments @@ -98,9 +106,18 @@ pub fn parse_markdown(content: &str) -> Result { // Parse the frontmatter let config = parse_frontmatter(&content); - // Parse the markdown content - let parser = Parser::new(&content); - let mut iterator = parser.into_iter(); + // Create line offset mapping + let line_offsets: Vec = content + .char_indices() + .filter(|(_, c)| *c == '\n') + .map(|(i, _)| i) + .collect(); + + // Create parser with options to enable offset tracking + let mut options = Options::empty(); + options.insert(Options::ENABLE_HEADING_ATTRIBUTES); + let parser = Parser::new_ext(&content, options); + let mut iterator = parser.into_offset_iter(); let mut objects = Vec::new(); let mut enums = Vec::new(); @@ -110,15 +127,22 @@ pub fn parse_markdown(content: &str) -> Result { // Extract objects from the markdown file let mut state = ParserState::OutsideDefinition; while let Some(event) = iterator.next() { - process_object_event(&mut iterator, &mut objects, event, &mut model, &mut state); + process_object_event( + &mut iterator, + &mut objects, + event, + &mut model, + &mut state, + &line_offsets, + ); } // Reset the iterator let parser = Parser::new(&content); - let mut iterator = parser.into_iter(); + let mut iterator = parser.into_offset_iter(); - while let Some(event) = iterator.next() { - process_enum_event(&mut iterator, &mut enums, event); + while let Some((event, range)) = iterator.next() { + process_enum_event(&mut iterator, &mut enums, (event, range), &line_offsets); } // Filter empty objects and enums @@ -157,6 +181,20 @@ fn clean_content(content: &str) -> String { content } +// Helper function to convert byte offset to line number +fn get_position(line_offsets: &[usize], start: usize, end: usize) -> Position { + match line_offsets.binary_search(&start) { + Ok(line) => Position { + line: line + 1, + range: (start, end), + }, + Err(line) => Position { + line: line + 1, + range: (start, end), + }, + } +} + /// Processes a single Markdown event for object extraction. /// /// # Arguments @@ -166,12 +204,15 @@ fn clean_content(content: &str) -> String { /// * `event` - The current Markdown event. /// * `model` - A mutable reference to the data model. fn process_object_event( - iterator: &mut Parser, + iterator: &mut pulldown_cmark::OffsetIter, objects: &mut Vec, - event: Event, + event: (Event, std::ops::Range), // Now includes offset range model: &mut DataModel, state: &mut ParserState, + line_offsets: &[usize], // Pass in line offsets ) { + let (event, range) = event; + match event { Event::Start(tag) if tag == H1 => { model.name = Some(extract_name(iterator)); @@ -181,7 +222,8 @@ fn process_object_event( } Event::Start(tag) if tag == H3 => { *state = ParserState::InHeading; - let object = process_object_heading(iterator); + let mut object = process_object_heading(iterator); + object.set_position(get_position(line_offsets, range.start, range.end)); objects.push(object); } Event::End(tag) if tag == H3_END => { @@ -194,7 +236,7 @@ fn process_object_event( let parent = iterator.next(); match parent { - Some(Event::Text(text)) if text.to_string() != "]" => { + Some((Event::Text(text), _)) if text.to_string() != "]" => { last_object.parent = Some(text.to_string()); } _ => { @@ -220,7 +262,8 @@ fn process_object_event( if !last_object.has_attributes() { iterator.next(); let (required, attr_name) = extract_attr_name_required(iterator); - let attribute = attribute::Attribute::new(attr_name, required); + let mut attribute = attribute::Attribute::new(attr_name, required); + attribute.set_position(get_position(line_offsets, range.start, range.end)); objects.last_mut().unwrap().add_attribute(attribute); } else { let attr_strings = extract_attribute_options(iterator); @@ -235,7 +278,8 @@ fn process_object_event( } let (required, attr_string) = extract_attr_name_required(iterator); - let attribute = attribute::Attribute::new(attr_string, required); + let mut attribute = attribute::Attribute::new(attr_string, required); + attribute.set_position(get_position(line_offsets, range.start, range.end)); objects.last_mut().unwrap().add_attribute(attribute); } Event::Text(text) => { @@ -257,7 +301,7 @@ fn process_object_event( /// # Returns /// /// An `Object` created from the heading. -fn process_object_heading(iterator: &mut Parser) -> object::Object { +fn process_object_heading(iterator: &mut OffsetIter) -> object::Object { let heading = extract_name(iterator); let term = extract_object_term(&heading); let name = heading.split_whitespace().next().unwrap().to_string(); @@ -274,14 +318,14 @@ fn process_object_heading(iterator: &mut Parser) -> object::Object { /// # Returns /// /// A string containing the extracted name. -fn extract_name(iterator: &mut Parser) -> String { - if let Some(Event::Text(text)) = iterator.next() { +fn extract_name(iterator: &mut OffsetIter) -> String { + if let Some((Event::Text(text), _)) = iterator.next() { return text.to_string(); } // Try for two text events for _ in 0..2 { - if let Some(Event::Text(text)) = iterator.next() { + if let Some((Event::Text(text), _)) = iterator.next() { return text.to_string(); } } @@ -298,14 +342,14 @@ fn extract_name(iterator: &mut Parser) -> String { /// # Returns /// /// A tuple containing a boolean indicating if the attribute is required and the attribute name. -fn extract_attr_name_required(iterator: &mut Parser) -> (bool, String) { - if let Some(Event::Text(text)) = iterator.next() { +fn extract_attr_name_required(iterator: &mut OffsetIter) -> (bool, String) { + if let Some((Event::Text(text), _)) = iterator.next() { return (false, text.to_string()); } // Try for two text events for _ in 0..2 { - if let Some(Event::Text(text)) = iterator.next() { + if let Some((Event::Text(text), _)) = iterator.next() { return (true, text.to_string()); } } @@ -338,9 +382,9 @@ fn extract_object_term(heading: &str) -> Option { /// # Returns /// /// A vector of strings containing the extracted attribute options. -fn extract_attribute_options(iterator: &mut Parser) -> Vec { +fn extract_attribute_options(iterator: &mut OffsetIter) -> Vec { let mut options = Vec::new(); - while let Some(next) = iterator.next() { + while let Some((next, _)) = iterator.next() { match next { Event::Start(Tag::Item) => { let name = extract_name(iterator); @@ -435,20 +479,31 @@ fn process_option(option: &String) -> (String, String) { /// * `iterator` - A mutable reference to the parser iterator. /// * `enums` - A mutable reference to the vector of enumerations. /// * `event` - The current Markdown event. -pub fn process_enum_event(iterator: &mut Parser, enums: &mut Vec, event: Event) { +/// * `range` - The range of the event. +/// * `line_offsets` - The line offsets of the file. +pub fn process_enum_event( + iterator: &mut OffsetIter, + enums: &mut Vec, + event: (Event, std::ops::Range), + line_offsets: &[usize], +) { + let (event, range) = event; + match event { Event::Start(tag) if tag == H3 => { let enum_name = extract_name(iterator); - let enum_obj = Enumeration { + let mut enum_obj = Enumeration { name: enum_name, mappings: BTreeMap::new(), docstring: "".to_string(), + position: None, }; + enum_obj.set_position(get_position(line_offsets, range.start, range.end)); enums.push(enum_obj); } Event::Start(Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(_))) => { let event = iterator.next().unwrap(); - if let Event::Text(text) = event { + if let (Event::Text(text), _) = event { let mappings = text.to_string(); if enums.last_mut().is_some() { diff --git a/src/object.rs b/src/object.rs index eb38034..2590a5e 100644 --- a/src/object.rs +++ b/src/object.rs @@ -21,7 +21,7 @@ * */ -use crate::attribute::Attribute; +use crate::{attribute::Attribute, markdown::parser::Position}; use serde::{Deserialize, Serialize}; use serde_with::skip_serializing_none; use std::collections::BTreeMap; @@ -44,6 +44,9 @@ pub struct Object { pub term: Option, /// Parent object of the object. pub parent: Option, + /// The line number of the object + #[serde(skip_serializing)] + pub position: Option, } impl Object { @@ -64,6 +67,7 @@ impl Object { docstring: String::new(), term, parent: None, + position: None, } } @@ -85,6 +89,15 @@ impl Object { self.docstring = docstring; } + /// Sets the line number of the object. + /// + /// # Arguments + /// + /// * `position` - The position to set. + pub fn set_position(&mut self, position: Position) { + self.position = Some(position); + } + /// Retrieves the last attribute added to the object. /// /// # Returns @@ -164,6 +177,9 @@ pub struct Enumeration { pub mappings: BTreeMap, /// Documentation string for the enumeration. pub docstring: String, + /// The line number of the enumeration + #[serde(skip_serializing)] + pub position: Option, } impl Enumeration { @@ -175,6 +191,15 @@ impl Enumeration { pub fn has_values(&self) -> bool { !self.mappings.is_empty() } + + /// Sets the position of the enumeration. + /// + /// # Arguments + /// + /// * `position` - The position to set. + pub fn set_position(&mut self, position: Position) { + self.position = Some(position); + } } #[cfg(test)]