Skip to content

Commit

Permalink
store document positions upon parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
JR-1991 committed Dec 19, 2024
1 parent 9116d13 commit 50abe1c
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 28 deletions.
15 changes: 14 additions & 1 deletion src/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
*
*/

use crate::xmltype::XMLType;
use crate::{markdown::parser::Position, xmltype::XMLType};
use serde::{de::Visitor, Deserialize, Serialize};
use std::{error::Error, fmt, str::FromStr};

Expand Down Expand Up @@ -57,6 +57,9 @@ pub struct Attribute {
pub xml: Option<XMLType>,
/// Is an enumeration or not
pub is_enum: bool,
/// The line number of the attribute
#[serde(skip_serializing)]
pub position: Option<Position>,
}

impl Attribute {
Expand All @@ -79,6 +82,7 @@ impl Attribute {
xml: Some(XMLType::from_str(name.as_str()).unwrap()),
default: None,
is_enum: false,
position: None,
}
}

Expand All @@ -91,6 +95,15 @@ impl Attribute {
self.docstring = docstring;
}

/// Sets the line number of the attribute.
///
/// # Arguments
///
/// * `position` - The position to set.
pub fn set_position(&mut self, position: Position) {
self.position = Some(position);
}

/// Adds an option to the attribute.
///
/// # Arguments
Expand Down
6 changes: 6 additions & 0 deletions src/datamodel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ mod tests {
xml: None,
default: None,
is_enum: false,
position: None,
});

let mut obj2 = Object::new("Object2".to_string(), None);
Expand All @@ -374,18 +375,21 @@ mod tests {
xml: None,
default: None,
is_enum: false,
position: None,
});

let enm1 = Enumeration {
name: "Enum1".to_string(),
mappings: BTreeMap::from([("key1".to_string(), "value1".to_string())]),
docstring: "".to_string(),
position: None,
};

let enm2 = Enumeration {
name: "Enum2".to_string(),
mappings: BTreeMap::from([("key2".to_string(), "value2".to_string())]),
docstring: "".to_string(),
position: None,
};

model1.objects.push(obj1);
Expand Down Expand Up @@ -422,6 +426,7 @@ mod tests {
xml: None,
default: Some(DataType::String("".to_string())),
is_enum: false,
position: None,
});

obj.add_attribute(crate::attribute::Attribute {
Expand All @@ -436,6 +441,7 @@ mod tests {
xml: None,
default: None,
is_enum: false,
position: None,
});

model.objects.push(obj);
Expand Down
2 changes: 2 additions & 0 deletions src/json/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,7 @@ mod tests {
default: None,
xml: None,
is_enum: false,
position: None,
};

let property: schema::Property =
Expand Down Expand Up @@ -563,6 +564,7 @@ mod tests {
default: None,
xml: None,
is_enum: false,
position: None,
};

let property: schema::Property =
Expand Down
107 changes: 81 additions & 26 deletions src/markdown/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ use colored::Colorize;
use core::panic;
use lazy_static::lazy_static;
use log::error;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::error::Error;

use pulldown_cmark::{CowStr, Event, HeadingLevel, Parser, Tag, TagEnd};
use pulldown_cmark::{CowStr, Event, HeadingLevel, OffsetIter, Options, Parser, Tag, TagEnd};
use regex::Regex;

use crate::attribute;
Expand Down Expand Up @@ -82,6 +83,13 @@ enum ParserState {
InHeading,
}

// Add this struct to track positions
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub struct Position {
pub line: usize,
pub range: (usize, usize),
}

/// Parses a Markdown file at the given path and returns a `DataModel`.
///
/// # Arguments
Expand All @@ -98,9 +106,18 @@ pub fn parse_markdown(content: &str) -> Result<DataModel, Validator> {
// Parse the frontmatter
let config = parse_frontmatter(&content);

// Parse the markdown content
let parser = Parser::new(&content);
let mut iterator = parser.into_iter();
// Create line offset mapping
let line_offsets: Vec<usize> = content
.char_indices()
.filter(|(_, c)| *c == '\n')
.map(|(i, _)| i)
.collect();

// Create parser with options to enable offset tracking
let mut options = Options::empty();
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
let parser = Parser::new_ext(&content, options);
let mut iterator = parser.into_offset_iter();

let mut objects = Vec::new();
let mut enums = Vec::new();
Expand All @@ -110,15 +127,22 @@ pub fn parse_markdown(content: &str) -> Result<DataModel, Validator> {
// Extract objects from the markdown file
let mut state = ParserState::OutsideDefinition;
while let Some(event) = iterator.next() {
process_object_event(&mut iterator, &mut objects, event, &mut model, &mut state);
process_object_event(
&mut iterator,
&mut objects,
event,
&mut model,
&mut state,
&line_offsets,
);
}

// Reset the iterator
let parser = Parser::new(&content);
let mut iterator = parser.into_iter();
let mut iterator = parser.into_offset_iter();

while let Some(event) = iterator.next() {
process_enum_event(&mut iterator, &mut enums, event);
while let Some((event, range)) = iterator.next() {
process_enum_event(&mut iterator, &mut enums, (event, range), &line_offsets);
}

// Filter empty objects and enums
Expand Down Expand Up @@ -157,6 +181,20 @@ fn clean_content(content: &str) -> String {
content
}

// Helper function to convert byte offset to line number
fn get_position(line_offsets: &[usize], start: usize, end: usize) -> Position {
match line_offsets.binary_search(&start) {
Ok(line) => Position {
line: line + 1,
range: (start, end),
},
Err(line) => Position {
line: line + 1,
range: (start, end),
},
}
}

/// Processes a single Markdown event for object extraction.
///
/// # Arguments
Expand All @@ -166,12 +204,15 @@ fn clean_content(content: &str) -> String {
/// * `event` - The current Markdown event.
/// * `model` - A mutable reference to the data model.
fn process_object_event(
iterator: &mut Parser,
iterator: &mut pulldown_cmark::OffsetIter,
objects: &mut Vec<Object>,
event: Event,
event: (Event, std::ops::Range<usize>), // Now includes offset range
model: &mut DataModel,
state: &mut ParserState,
line_offsets: &[usize], // Pass in line offsets
) {
let (event, range) = event;

match event {
Event::Start(tag) if tag == H1 => {
model.name = Some(extract_name(iterator));
Expand All @@ -181,7 +222,8 @@ fn process_object_event(
}
Event::Start(tag) if tag == H3 => {
*state = ParserState::InHeading;
let object = process_object_heading(iterator);
let mut object = process_object_heading(iterator);
object.set_position(get_position(line_offsets, range.start, range.end));
objects.push(object);
}
Event::End(tag) if tag == H3_END => {
Expand All @@ -194,7 +236,7 @@ fn process_object_event(
let parent = iterator.next();

match parent {
Some(Event::Text(text)) if text.to_string() != "]" => {
Some((Event::Text(text), _)) if text.to_string() != "]" => {
last_object.parent = Some(text.to_string());
}
_ => {
Expand All @@ -220,7 +262,8 @@ fn process_object_event(
if !last_object.has_attributes() {
iterator.next();
let (required, attr_name) = extract_attr_name_required(iterator);
let attribute = attribute::Attribute::new(attr_name, required);
let mut attribute = attribute::Attribute::new(attr_name, required);
attribute.set_position(get_position(line_offsets, range.start, range.end));
objects.last_mut().unwrap().add_attribute(attribute);
} else {
let attr_strings = extract_attribute_options(iterator);
Expand All @@ -235,7 +278,8 @@ fn process_object_event(
}

let (required, attr_string) = extract_attr_name_required(iterator);
let attribute = attribute::Attribute::new(attr_string, required);
let mut attribute = attribute::Attribute::new(attr_string, required);
attribute.set_position(get_position(line_offsets, range.start, range.end));
objects.last_mut().unwrap().add_attribute(attribute);
}
Event::Text(text) => {
Expand All @@ -257,7 +301,7 @@ fn process_object_event(
/// # Returns
///
/// An `Object` created from the heading.
fn process_object_heading(iterator: &mut Parser) -> object::Object {
fn process_object_heading(iterator: &mut OffsetIter) -> object::Object {
let heading = extract_name(iterator);
let term = extract_object_term(&heading);
let name = heading.split_whitespace().next().unwrap().to_string();
Expand All @@ -274,14 +318,14 @@ fn process_object_heading(iterator: &mut Parser) -> object::Object {
/// # Returns
///
/// A string containing the extracted name.
fn extract_name(iterator: &mut Parser) -> String {
if let Some(Event::Text(text)) = iterator.next() {
fn extract_name(iterator: &mut OffsetIter) -> String {
if let Some((Event::Text(text), _)) = iterator.next() {
return text.to_string();
}

// Try for two text events
for _ in 0..2 {
if let Some(Event::Text(text)) = iterator.next() {
if let Some((Event::Text(text), _)) = iterator.next() {
return text.to_string();
}
}
Expand All @@ -298,14 +342,14 @@ fn extract_name(iterator: &mut Parser) -> String {
/// # Returns
///
/// A tuple containing a boolean indicating if the attribute is required and the attribute name.
fn extract_attr_name_required(iterator: &mut Parser) -> (bool, String) {
if let Some(Event::Text(text)) = iterator.next() {
fn extract_attr_name_required(iterator: &mut OffsetIter) -> (bool, String) {
if let Some((Event::Text(text), _)) = iterator.next() {
return (false, text.to_string());
}

// Try for two text events
for _ in 0..2 {
if let Some(Event::Text(text)) = iterator.next() {
if let Some((Event::Text(text), _)) = iterator.next() {
return (true, text.to_string());
}
}
Expand Down Expand Up @@ -338,9 +382,9 @@ fn extract_object_term(heading: &str) -> Option<String> {
/// # Returns
///
/// A vector of strings containing the extracted attribute options.
fn extract_attribute_options(iterator: &mut Parser) -> Vec<String> {
fn extract_attribute_options(iterator: &mut OffsetIter) -> Vec<String> {
let mut options = Vec::new();
while let Some(next) = iterator.next() {
while let Some((next, _)) = iterator.next() {
match next {
Event::Start(Tag::Item) => {
let name = extract_name(iterator);
Expand Down Expand Up @@ -435,20 +479,31 @@ fn process_option(option: &String) -> (String, String) {
/// * `iterator` - A mutable reference to the parser iterator.
/// * `enums` - A mutable reference to the vector of enumerations.
/// * `event` - The current Markdown event.
pub fn process_enum_event(iterator: &mut Parser, enums: &mut Vec<Enumeration>, event: Event) {
/// * `range` - The range of the event.
/// * `line_offsets` - The line offsets of the file.
pub fn process_enum_event(
iterator: &mut OffsetIter,
enums: &mut Vec<Enumeration>,
event: (Event, std::ops::Range<usize>),
line_offsets: &[usize],
) {
let (event, range) = event;

match event {
Event::Start(tag) if tag == H3 => {
let enum_name = extract_name(iterator);
let enum_obj = Enumeration {
let mut enum_obj = Enumeration {
name: enum_name,
mappings: BTreeMap::new(),
docstring: "".to_string(),
position: None,
};
enum_obj.set_position(get_position(line_offsets, range.start, range.end));
enums.push(enum_obj);
}
Event::Start(Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(_))) => {
let event = iterator.next().unwrap();
if let Event::Text(text) = event {
if let (Event::Text(text), _) = event {
let mappings = text.to_string();

if enums.last_mut().is_some() {
Expand Down
Loading

0 comments on commit 50abe1c

Please sign in to comment.