Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for coverage edges between spans and segmentation nodes #306

Merged
merged 6 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/workflows/release_capi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ on:
release:
types: [published]
workflow_run:
workflows:
workflows:
- Release
types:
types:
- completed
pull_request:
types: [labeled]
Expand All @@ -20,6 +20,7 @@ jobs:
uses: pozetroninc/[email protected]
with:
repository: ${{ github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v2
- uses: actions-rs/[email protected]
with:
Expand Down Expand Up @@ -47,6 +48,7 @@ jobs:
uses: pozetroninc/[email protected]
with:
repository: ${{ github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v2
- uses: actions-rs/[email protected]
with:
Expand Down Expand Up @@ -74,6 +76,7 @@ jobs:
uses: pozetroninc/[email protected]
with:
repository: ${{ github.repository }}
token: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/checkout@v2
- uses: actions-rs/[email protected]
with:
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Added support for coverage edges between span nodes an segmentation nodes when
calculating the AQL model index.

### Fixed

- Do not use recursion to calculate the indirect coverage edges in the model
index, since this could fail for deeply nested structures.

## [3.3.3] - 2024-07-12

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion cli/src/bin/annis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ impl AnnisRunner {
let readline = rl.readline(&prompt);
match readline {
Ok(line) => {
rl.add_history_entry(&line.clone());
rl.add_history_entry(line.clone());
if !self.exec(&line) {
break;
}
Expand Down
109 changes: 58 additions & 51 deletions graphannis/src/annis/db/aql/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,61 +111,50 @@ pub struct AQLGlobalStatistics {
fn calculate_inherited_coverage_edges(
graph: &mut AnnotationGraph,
n: NodeID,
all_cov_components: &[AnnotationComponent],
all_dom_gs: &[Arc<dyn GraphStorage>],
other_cov_gs: &[Arc<dyn GraphStorage>],
all_text_coverage_components: &[AnnotationComponent],
inherited_cov_component: &AnnotationComponent,
) -> std::result::Result<FxHashSet<NodeID>, ComponentTypeError> {
let mut directly_covered_token = FxHashSet::default();

for c in all_cov_components.iter() {
if let Some(gs) = graph.get_graphstorage_as_ref(c) {
let out: Result<Vec<u64>, graphannis_core::errors::GraphAnnisCoreError> =
gs.get_outgoing_edges(n).collect();
directly_covered_token.extend(out?);
}
}

if directly_covered_token.is_empty() {
let has_token_anno = graph
.get_node_annos()
.get_value_for_item(&n, &TOKEN_KEY)?
.is_some();
if has_token_anno {
// Even if technically a token does not cover itself, if we need to abort the recursion
// with the basic case
directly_covered_token.insert(n);
// Iterate over all all nodes that are somehow covered (by coverage or
// dominance edges) starting from the given node.
let all_text_cov_components_gs: Vec<_> = all_text_coverage_components
.iter()
.filter_map(|c| graph.get_graphstorage_as_ref(c))
.map(|gs| gs.as_edgecontainer())
.collect();

let all_text_cov_components_combined = UnionEdgeContainer::new(all_text_cov_components_gs);

let mut covered_token = FxHashSet::default();
{
let tok_helper = TokenHelper::new(graph)?;
for step in CycleSafeDFS::new(&all_text_cov_components_combined, n, 1, usize::MAX) {
let step = step?;
if tok_helper.is_token(step.node)? {
covered_token.insert(step.node);
}
}
}
};

let mut indirectly_covered_token = FxHashSet::default();
// recursivly get the covered token from all children connected by a dominance relation
for dom_gs in all_dom_gs {
for out in dom_gs.get_outgoing_edges(n) {
let out = out?;
indirectly_covered_token.extend(calculate_inherited_coverage_edges(
graph,
out,
all_cov_components,
all_dom_gs,
)?);
// Connect all non-token nodes to the covered token nodes if no such direct coverage already exists
let mut direct_coverage_targets = FxHashSet::default();
for gs in other_cov_gs.iter() {
for target in gs.get_outgoing_edges(n) {
direct_coverage_targets.insert(target?);
}
}
let inherited_gs_cov = graph.get_or_create_writable(inherited_cov_component)?;

if let Ok(gs_cov) = graph.get_or_create_writable(&AnnotationComponent::new(
AnnotationComponentType::Coverage,
ANNIS_NS.into(),
"inherited-coverage".into(),
)) {
// Ignore all already directly covered token when creating the inherited coverage edges
for t in indirectly_covered_token.difference(&directly_covered_token) {
gs_cov.add_edge(Edge {
for target in &covered_token {
if n != *target && !direct_coverage_targets.contains(target) {
inherited_gs_cov.add_edge(Edge {
source: n,
target: *t,
target: *target,
})?;
}
}

directly_covered_token.extend(indirectly_covered_token);
Ok(directly_covered_token)
Ok(covered_token)
}

pub struct AQLUpdateGraphIndex {
Expand Down Expand Up @@ -274,19 +263,37 @@ impl AQLUpdateGraphIndex {
) -> std::result::Result<(), ComponentTypeError> {
self.clear_left_right_token(graph)?;

let all_cov_components =
graph.get_all_components(Some(AnnotationComponentType::Coverage), None);
let all_dom_gs: Vec<Arc<dyn GraphStorage>> = graph
.get_all_components(Some(AnnotationComponentType::Dominance), Some(""))
let inherited_cov_component = AnnotationComponent::new(
AnnotationComponentType::Coverage,
ANNIS_NS.into(),
"inherited-coverage".into(),
);
let all_cov_components: Vec<_> = graph
.get_all_components(Some(AnnotationComponentType::Coverage), None)
.into_iter()
.filter_map(|c| graph.get_graphstorage(&c))
.filter(|c| c != &inherited_cov_component)
.collect();

let all_cov_gs: Vec<_> = all_cov_components
.iter()
.filter_map(|c| graph.get_graphstorage(c))
.collect();

let all_dom_components =
graph.get_all_components(Some(AnnotationComponentType::Dominance), None);
let all_text_coverage_components: Vec<AnnotationComponent> =
[all_cov_components, all_dom_components].concat();

// go over each node and calculate the left-most and right-most token
for invalid in self.invalid_nodes.iter()? {
let (n, _) = invalid?;
let covered_token =
calculate_inherited_coverage_edges(graph, n, &all_cov_components, &all_dom_gs)?;
let covered_token = calculate_inherited_coverage_edges(
graph,
n,
&all_cov_gs,
&all_text_coverage_components,
&inherited_cov_component,
)?;
self.calculate_token_alignment(
graph,
n,
Expand Down
Loading
Loading