From 2f7064629e6421da7af7b9d769f1ed08e7aa42f4 Mon Sep 17 00:00:00 2001 From: MichaelRoeder Date: Mon, 9 Nov 2015 14:41:15 +0100 Subject: [PATCH] Moved the filtering of markings for the D2KB and Entity Typing class into an EvaluationDecorator to handle the problem not only for experiment tasks but for experiment sub tasks in the same way. --- .../gerbil/evaluate/EvaluatorFactory.java | 31 ++++++---- ...SearcherBasedNotMatchingMarkingFilter.java | 59 +++++++++++++++++++ .../aksw/gerbil/execute/ExperimentTask.java | 27 ++------- .../filter/NotMatchingMarkingFilter.java | 24 -------- ...SearcherBasedNotMatchingMarkingFilter.java | 42 ------------- .../gerbil/utils/filter/MarkingFilter.java | 12 ++++ .../java/org/aksw/gerbil/SingleRunTest.java | 6 +- 7 files changed, 97 insertions(+), 104 deletions(-) create mode 100644 src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java delete mode 100644 src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java delete mode 100644 src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java diff --git a/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java b/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java index 992d883d7..e0a8e6a03 100644 --- a/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java +++ b/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java @@ -32,6 +32,7 @@ import org.aksw.gerbil.evaluate.impl.SpanMergingEvaluatorDecorator; import org.aksw.gerbil.evaluate.impl.SubTaskAverageCalculator; import org.aksw.gerbil.evaluate.impl.filter.MarkingFilteringEvaluatorDecorator; +import org.aksw.gerbil.evaluate.impl.filter.SearcherBasedNotMatchingMarkingFilter; import org.aksw.gerbil.matching.Matching; import org.aksw.gerbil.matching.MatchingsSearcher; import org.aksw.gerbil.matching.MatchingsSearcherFactory; @@ -39,6 +40,7 @@ import org.aksw.gerbil.matching.impl.HierarchicalMatchingsCounter; import org.aksw.gerbil.matching.impl.MatchingsCounterImpl; import org.aksw.gerbil.matching.impl.MeaningMatchingsSearcher; +import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher; import org.aksw.gerbil.semantic.kb.ExactWhiteListBasedUriKBClassifier; import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier; import org.aksw.gerbil.semantic.kb.UriKBClassifier; @@ -142,20 +144,25 @@ protected Evaluator createEvaluator(ExperimentType type, ExperimentTaskConfigura FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()); } case D2KB: { - return new ConfidenceScoreEvaluatorDecorator( - new InKBClassBasedFMeasureCalculator(new CompoundMatchingsCounter( - (MatchingsSearcher) MatchingsSearcherFactory - .createSpanMatchingsSearcher(configuration.matching), - new MeaningMatchingsSearcher(globalClassifier)), globalClassifier), - FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()); + return new SearcherBasedNotMatchingMarkingFilter( + new StrongSpanMatchingsSearcher(), + new ConfidenceScoreEvaluatorDecorator( + new InKBClassBasedFMeasureCalculator( + new CompoundMatchingsCounter( + (MatchingsSearcher) MatchingsSearcherFactory + .createSpanMatchingsSearcher(configuration.matching), + new MeaningMatchingsSearcher(globalClassifier)), + globalClassifier), + FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator())); } case ETyping: { - return new ConfidenceScoreEvaluatorDecorator( - new HierarchicalFMeasureCalculator(new HierarchicalMatchingsCounter( - (MatchingsSearcher) MatchingsSearcherFactory - .createSpanMatchingsSearcher(configuration.matching), - globalClassifier, inferencer)), - FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()); + return new SearcherBasedNotMatchingMarkingFilter(new StrongSpanMatchingsSearcher(), + new ConfidenceScoreEvaluatorDecorator( + new HierarchicalFMeasureCalculator(new HierarchicalMatchingsCounter( + (MatchingsSearcher) MatchingsSearcherFactory + .createSpanMatchingsSearcher(configuration.matching), + globalClassifier, inferencer)), + FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator())); } case OKE_Task1: { ExperimentTaskConfiguration subTaskConfig; diff --git a/src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java b/src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java new file mode 100644 index 000000000..8db8a5bd3 --- /dev/null +++ b/src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java @@ -0,0 +1,59 @@ +package org.aksw.gerbil.evaluate.impl.filter; + +import java.util.ArrayList; +import java.util.List; + +import org.aksw.gerbil.evaluate.AbstractEvaluatorDecorator; +import org.aksw.gerbil.evaluate.EvaluationResultContainer; +import org.aksw.gerbil.evaluate.Evaluator; +import org.aksw.gerbil.matching.MatchingsSearcher; +import org.aksw.gerbil.transfer.nif.Marking; + +import com.carrotsearch.hppc.BitSet; + +/** + * This evaluator decorator removes every marking from the given list that does + * not match the given gold standard list based on a given + * {@link MatchingsSearcher} instance. + * + * @author Michael Röder (roeder@informatik.uni-leipzig.de) + * + */ +public class SearcherBasedNotMatchingMarkingFilter extends AbstractEvaluatorDecorator { + + protected MatchingsSearcher searcher; + + public SearcherBasedNotMatchingMarkingFilter(MatchingsSearcher searcher, Evaluator evaluator) { + super(evaluator); + this.searcher = searcher; + } + + protected List> filterListOfMarkings(List> markings, List> goldStandard) { + List> filteredMarkings = new ArrayList>(markings.size()); + for (int i = 0; i < markings.size(); ++i) { + filteredMarkings.add(filterMarkings(markings.get(i), goldStandard.get(i))); + } + return filteredMarkings; + } + + protected List filterMarkings(List markings, List goldStandard) { + BitSet matchingElements; + BitSet alreadyUsedResults = new BitSet(goldStandard.size()); + List filteredMarkings = new ArrayList(markings.size()); + for (T marking : markings) { + matchingElements = searcher.findMatchings(marking, goldStandard, alreadyUsedResults); + if (!matchingElements.isEmpty()) { + filteredMarkings.add(marking); + alreadyUsedResults.set(matchingElements.nextSetBit(0)); + } + } + return filteredMarkings; + } + + @Override + public void evaluate(List> annotatorResults, List> goldStandard, + EvaluationResultContainer results) { + evaluator.evaluate(filterListOfMarkings(annotatorResults, goldStandard), goldStandard, results); + } + +} diff --git a/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java b/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java index f324f4e44..1ee15d1fc 100644 --- a/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java +++ b/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java @@ -45,8 +45,6 @@ import org.aksw.gerbil.evaluate.SubTaskResult; import org.aksw.gerbil.evaluate.impl.FMeasureCalculator; import org.aksw.gerbil.exceptions.GerbilException; -import org.aksw.gerbil.matching.filter.SearcherBasedNotMatchingMarkingFilter; -import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher; import org.aksw.gerbil.semantic.sameas.DatasetBasedSameAsRetriever; import org.aksw.gerbil.semantic.sameas.MultipleSameAsRetriever; import org.aksw.gerbil.semantic.sameas.SameAsRetriever; @@ -307,19 +305,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator, List> results = new ArrayList>(dataset.size()); List> goldStandard = new ArrayList>(dataset.size()); D2KBAnnotator linker = ((D2KBAnnotator) annotator); - // For D2KB we have to filter the results to get those results - // that are matching the positions - SearcherBasedNotMatchingMarkingFilter filter = new SearcherBasedNotMatchingMarkingFilter( - new StrongSpanMatchingsSearcher()); - List documentGS; for (Document document : dataset.getInstances()) { - documentGS = document.getMarkings(MeaningSpan.class); // reduce the document to a text and a list of Spans - results.add(filter.filterMarkings( - linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)), - documentGS)); - goldStandard.add(documentGS); + results.add(linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document))); + goldStandard.add(document.getMarkings(MeaningSpan.class)); taskState.increaseExperimentStepCount(); } if (annotatorOutputWriter != null) { @@ -413,20 +403,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator, List> results = new ArrayList>(dataset.size()); List> goldStandard = new ArrayList>(dataset.size()); EntityTyper typer = ((EntityTyper) annotator); - // For ETyping we have to filter the results to get those - // results - // that are matching the positions - SearcherBasedNotMatchingMarkingFilter filter = new SearcherBasedNotMatchingMarkingFilter( - new StrongSpanMatchingsSearcher()); - List documentGS; for (Document document : dataset.getInstances()) { - documentGS = document.getMarkings(TypedSpan.class); // reduce the document to a text and a list of Spans - results.add(filter.filterMarkings( - typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)), - documentGS)); - goldStandard.add(documentGS); + results.add(typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document))); + goldStandard.add(document.getMarkings(TypedSpan.class)); taskState.increaseExperimentStepCount(); } if (annotatorOutputWriter != null) { diff --git a/src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java b/src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java deleted file mode 100644 index abdb4ead8..000000000 --- a/src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.aksw.gerbil.matching.filter; - -import java.util.List; - -import org.aksw.gerbil.transfer.nif.Marking; - -/** - * Removes every {@link Marking} that is not matching. - * - * @author Michael Röder (roeder@informatik.uni-leipzig.de) - * - */ -public interface NotMatchingMarkingFilter { - - /** - * Returns a list of {@link Marking}s that does not contain any - * {@link Marking}s that is not matching the given gold standard list. - * - * @param markings - * the list of {@link Marking}s - * @return a filtered list of {@link Marking}s - */ - public List filterMarkings(List markings, List goldStandard); -} diff --git a/src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java b/src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java deleted file mode 100644 index cb842fc42..000000000 --- a/src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.aksw.gerbil.matching.filter; - -import java.util.ArrayList; -import java.util.List; - -import org.aksw.gerbil.matching.MatchingsSearcher; -import org.aksw.gerbil.transfer.nif.Marking; - -import com.carrotsearch.hppc.BitSet; - -/** - * This implementation of a marking filter removes every marking from the given - * list that does not match the given gold standard list. For identifying those - * markings a {@link MatchingsSearcher} is used. - * - * @author Michael Röder (roeder@informatik.uni-leipzig.de) - * - */ -public class SearcherBasedNotMatchingMarkingFilter implements NotMatchingMarkingFilter { - - protected MatchingsSearcher searcher; - - public SearcherBasedNotMatchingMarkingFilter(MatchingsSearcher searcher) { - this.searcher = searcher; - } - - @Override - public List filterMarkings(List markings, List goldStandard) { - BitSet matchingElements; - BitSet alreadyUsedResults = new BitSet(goldStandard.size()); - List filteredMarkings = new ArrayList(markings.size()); - for (T marking : markings) { - matchingElements = searcher.findMatchings(marking, goldStandard, alreadyUsedResults); - if (!matchingElements.isEmpty()) { - filteredMarkings.add(marking); - alreadyUsedResults.set(matchingElements.nextSetBit(0)); - } - } - return filteredMarkings; - } - -} diff --git a/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java b/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java index c1a0ba01c..b98e1b50b 100644 --- a/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java +++ b/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java @@ -22,8 +22,20 @@ public interface MarkingFilter { + /** + * Returns true if the marking is good and does not have to be filtered out. + * + * @param marking + * @return + */ public boolean isMarkingGood(T marking); + /** + * Returns a filtered list based on the given list. + * + * @param markings + * @return + */ public List filterList(List markings); public List> filterListOfLists(List> markings); diff --git a/src/test/java/org/aksw/gerbil/SingleRunTest.java b/src/test/java/org/aksw/gerbil/SingleRunTest.java index 7e52b9ffc..e40d6e3c7 100644 --- a/src/test/java/org/aksw/gerbil/SingleRunTest.java +++ b/src/test/java/org/aksw/gerbil/SingleRunTest.java @@ -43,10 +43,10 @@ public class SingleRunTest implements TaskObserver { private static final Logger LOGGER = LoggerFactory.getLogger(SingleRunTest.class); - private static final String ANNOTATOR_NAME = "FOX"; - private static final String DATASET_NAME = "N3-Reuters-128"; + private static final String ANNOTATOR_NAME = "TagMe 2"; + private static final String DATASET_NAME = "ACE2004"; private static final ExperimentType EXPERIMENT_TYPE = ExperimentType.D2KB; - private static final Matching MATCHING = Matching.WEAK_ANNOTATION_MATCH; + private static final Matching MATCHING = Matching.STRONG_ENTITY_MATCH; public static void main(String[] args) throws Exception { SingleRunTest test = new SingleRunTest();