Skip to content

Commit

Permalink
use column instead of offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
JR-1991 committed Dec 23, 2024
1 parent 40bb606 commit 2dc0722
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 64 deletions.
76 changes: 59 additions & 17 deletions src/markdown/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,13 @@ enum ParserState {
#[cfg_attr(feature = "python", pyclass(get_all))]
pub struct Position {
pub line: usize,
pub range: (usize, usize),
pub column: (usize, usize),
}

impl PartialOrd for Position {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.line.cmp(&other.line))
}
}

/// Parses a Markdown file at the given path and returns a `DataModel`.
Expand Down Expand Up @@ -133,6 +139,7 @@ pub fn parse_markdown(content: &str) -> Result<DataModel, Validator> {
let mut state = ParserState::OutsideDefinition;
while let Some(event) = iterator.next() {
process_object_event(
&content,
&mut iterator,
&mut objects,
event,
Expand All @@ -147,7 +154,13 @@ pub fn parse_markdown(content: &str) -> Result<DataModel, Validator> {
let mut iterator = parser.into_offset_iter();

while let Some((event, range)) = iterator.next() {
process_enum_event(&mut iterator, &mut enums, (event, range), &line_offsets);
process_enum_event(
&content,
&mut iterator,
&mut enums,
(event, range),
&line_offsets,
);
}

// Filter empty objects and enums
Expand Down Expand Up @@ -186,17 +199,44 @@ fn clean_content(content: &str) -> String {
content
}

// Helper function to convert byte offset to line number
fn get_position(line_offsets: &[usize], start: usize, end: usize) -> Position {
match line_offsets.binary_search(&start) {
Ok(line) => Position {
line: line + 1,
range: (start, end),
},
Err(line) => Position {
line: line + 1,
range: (start, end),
},
// Helper function to convert byte offset to line and column numbers
fn get_position(content: &str, line_offsets: &[usize], start: usize, end: usize) -> Position {
let line = match line_offsets.binary_search(&start) {
Ok(line) => line + 1,
Err(line) => line + 1,
};

// Get the line content
let line_start = if line > 1 { line_offsets[line - 2] } else { 0 };
let line_end = if line <= line_offsets.len() {
line_offsets[line - 1]
} else {
content.len()
};
let line_content = &content[line_start..line_end];

// Count leading whitespace
let leading_space = line_content
.chars()
.take_while(|c| c.is_whitespace())
.count();

// Calculate column numbers, adding leading whitespace to start
let start_col = if line > 1 {
start - line_offsets[line - 2] + leading_space - 1
} else {
start + 1 + leading_space
};

let end_col = if line <= line_offsets.len() {
line_offsets[line - 1] - (if line > 1 { line_offsets[line - 2] } else { 0 })
} else {
end - (if line > 1 { line_offsets[line - 2] } else { 0 })
};

Position {
line,
column: (start_col, end_col),
}
}

Expand All @@ -209,6 +249,7 @@ fn get_position(line_offsets: &[usize], start: usize, end: usize) -> Position {
/// * `event` - The current Markdown event.
/// * `model` - A mutable reference to the data model.
fn process_object_event(
content: &str,
iterator: &mut pulldown_cmark::OffsetIter,
objects: &mut Vec<Object>,
event: (Event, std::ops::Range<usize>), // Now includes offset range
Expand All @@ -228,7 +269,7 @@ fn process_object_event(
Event::Start(tag) if tag == H3 => {
*state = ParserState::InHeading;
let mut object = process_object_heading(iterator);
object.set_position(get_position(line_offsets, range.start, range.end));
object.set_position(get_position(content, line_offsets, range.start, range.end));
objects.push(object);
}
Event::End(tag) if tag == H3_END => {
Expand Down Expand Up @@ -268,7 +309,7 @@ fn process_object_event(
iterator.next();
let (required, attr_name) = extract_attr_name_required(iterator);
let mut attribute = attribute::Attribute::new(attr_name, required);
attribute.set_position(get_position(line_offsets, range.start, range.end));
attribute.set_position(get_position(content, line_offsets, range.start, range.end));
objects.last_mut().unwrap().add_attribute(attribute);
} else {
let attr_strings = extract_attribute_options(iterator);
Expand All @@ -284,7 +325,7 @@ fn process_object_event(

let (required, attr_string) = extract_attr_name_required(iterator);
let mut attribute = attribute::Attribute::new(attr_string, required);
attribute.set_position(get_position(line_offsets, range.start, range.end));
attribute.set_position(get_position(content, line_offsets, range.start, range.end));
objects.last_mut().unwrap().add_attribute(attribute);
}
Event::Text(text) => {
Expand Down Expand Up @@ -487,6 +528,7 @@ fn process_option(option: &String) -> (String, String) {
/// * `range` - The range of the event.
/// * `line_offsets` - The line offsets of the file.
pub fn process_enum_event(
content: &str,
iterator: &mut OffsetIter,
enums: &mut Vec<Enumeration>,
event: (Event, std::ops::Range<usize>),
Expand All @@ -503,7 +545,7 @@ pub fn process_enum_event(
docstring: "".to_string(),
position: None,
};
enum_obj.set_position(get_position(line_offsets, range.start, range.end));
enum_obj.set_position(get_position(content, line_offsets, range.start, range.end));
enums.push(enum_obj);
}
Event::Start(Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(_))) => {
Expand Down
20 changes: 20 additions & 0 deletions src/validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ impl Validator {
for object in &model.objects {
self.validate_object(object, &types);
}

self.sort_errors();
}

/// Checks for duplicate object names within the model.
Expand Down Expand Up @@ -476,6 +478,24 @@ impl Validator {
.collect::<Vec<&str>>();
types
}

/// Sorts the validation errors by their line number, allowing for easier identification
/// of issues in the source code. The sorting is done in-place on the `errors` vector.
fn sort_errors(&mut self) {
self.errors.sort_by(|a, b| {
let line_a = a
.positions
.as_ref()
.and_then(|pos| pos.first())
.map(|pos| pos.line);
let line_b = b
.positions
.as_ref()
.and_then(|pos| pos.first())
.map(|pos| pos.line);
line_a.cmp(&line_b)
});
}
}

impl Default for Validator {
Expand Down
94 changes: 47 additions & 47 deletions tests/data/expected_invalid_complete.json
Original file line number Diff line number Diff line change
@@ -1,29 +1,6 @@
{
"is_valid": false,
"errors": [
{
"message": "Object 'Duplicate' is defined more than once.",
"object": "Duplicate",
"attribute": null,
"location": "Global",
"error_type": "DuplicateError",
"positions": [
{
"line": 25,
"range": [
306,
320
]
},
{
"line": 30,
"range": [
347,
361
]
}
]
},
{
"message": "Name '1number' must start with a letter.",
"object": "Test",
Expand All @@ -33,9 +10,9 @@
"positions": [
{
"line": 13,
"range": [
173,
267
"column": [
1,
10
]
}
]
Expand All @@ -49,9 +26,9 @@
"positions": [
{
"line": 15,
"range": [
200,
229
"column": [
1,
12
]
}
]
Expand All @@ -65,9 +42,9 @@
"positions": [
{
"line": 17,
"range": [
229,
267
"column": [
1,
17
]
}
]
Expand All @@ -81,9 +58,9 @@
"positions": [
{
"line": 20,
"range": [
267,
277
"column": [
1,
10
]
}
]
Expand All @@ -97,9 +74,32 @@
"positions": [
{
"line": 22,
"range": [
278,
306
"column": [
1,
10
]
}
]
},
{
"message": "Object 'Duplicate' is defined more than once.",
"object": "Duplicate",
"attribute": null,
"location": "Global",
"error_type": "DuplicateError",
"positions": [
{
"line": 25,
"column": [
1,
14
]
},
{
"line": 30,
"column": [
1,
14
]
}
]
Expand All @@ -113,16 +113,16 @@
"positions": [
{
"line": 37,
"range": [
413,
472
"column": [
1,
12
]
},
{
"line": 39,
"range": [
442,
472
"column": [
1,
12
]
}
]
Expand All @@ -136,9 +136,9 @@
"positions": [
{
"line": 44,
"range": [
484,
514
"column": [
1,
12
]
}
]
Expand Down

0 comments on commit 2dc0722

Please sign in to comment.