diff --git a/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java b/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java index 992d883d7..e0a8e6a03 100644 --- a/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java +++ b/src/main/java/org/aksw/gerbil/evaluate/EvaluatorFactory.java @@ -32,6 +32,7 @@ import org.aksw.gerbil.evaluate.impl.SpanMergingEvaluatorDecorator; import org.aksw.gerbil.evaluate.impl.SubTaskAverageCalculator; import org.aksw.gerbil.evaluate.impl.filter.MarkingFilteringEvaluatorDecorator; +import org.aksw.gerbil.evaluate.impl.filter.SearcherBasedNotMatchingMarkingFilter; import org.aksw.gerbil.matching.Matching; import org.aksw.gerbil.matching.MatchingsSearcher; import org.aksw.gerbil.matching.MatchingsSearcherFactory; @@ -39,6 +40,7 @@ import org.aksw.gerbil.matching.impl.HierarchicalMatchingsCounter; import org.aksw.gerbil.matching.impl.MatchingsCounterImpl; import org.aksw.gerbil.matching.impl.MeaningMatchingsSearcher; +import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher; import org.aksw.gerbil.semantic.kb.ExactWhiteListBasedUriKBClassifier; import org.aksw.gerbil.semantic.kb.SimpleWhiteListBasedUriKBClassifier; import org.aksw.gerbil.semantic.kb.UriKBClassifier; @@ -142,20 +144,25 @@ protected Evaluator createEvaluator(ExperimentType type, ExperimentTaskConfigura FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()); } case D2KB: { - return new ConfidenceScoreEvaluatorDecorator( - new InKBClassBasedFMeasureCalculator(new CompoundMatchingsCounter( - (MatchingsSearcher) MatchingsSearcherFactory - .createSpanMatchingsSearcher(configuration.matching), - new MeaningMatchingsSearcher(globalClassifier)), globalClassifier), - FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()); + return new SearcherBasedNotMatchingMarkingFilter( + new StrongSpanMatchingsSearcher(), + new ConfidenceScoreEvaluatorDecorator( + new InKBClassBasedFMeasureCalculator( + new CompoundMatchingsCounter( + (MatchingsSearcher) MatchingsSearcherFactory + .createSpanMatchingsSearcher(configuration.matching), + new MeaningMatchingsSearcher(globalClassifier)), + globalClassifier), + FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator())); } case ETyping: { - return new ConfidenceScoreEvaluatorDecorator( - new HierarchicalFMeasureCalculator(new HierarchicalMatchingsCounter( - (MatchingsSearcher) MatchingsSearcherFactory - .createSpanMatchingsSearcher(configuration.matching), - globalClassifier, inferencer)), - FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator()); + return new SearcherBasedNotMatchingMarkingFilter(new StrongSpanMatchingsSearcher(), + new ConfidenceScoreEvaluatorDecorator( + new HierarchicalFMeasureCalculator(new HierarchicalMatchingsCounter( + (MatchingsSearcher) MatchingsSearcherFactory + .createSpanMatchingsSearcher(configuration.matching), + globalClassifier, inferencer)), + FMeasureCalculator.MICRO_F1_SCORE_NAME, new DoubleResultComparator())); } case OKE_Task1: { ExperimentTaskConfiguration subTaskConfig; diff --git a/src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java b/src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java new file mode 100644 index 000000000..8db8a5bd3 --- /dev/null +++ b/src/main/java/org/aksw/gerbil/evaluate/impl/filter/SearcherBasedNotMatchingMarkingFilter.java @@ -0,0 +1,59 @@ +package org.aksw.gerbil.evaluate.impl.filter; + +import java.util.ArrayList; +import java.util.List; + +import org.aksw.gerbil.evaluate.AbstractEvaluatorDecorator; +import org.aksw.gerbil.evaluate.EvaluationResultContainer; +import org.aksw.gerbil.evaluate.Evaluator; +import org.aksw.gerbil.matching.MatchingsSearcher; +import org.aksw.gerbil.transfer.nif.Marking; + +import com.carrotsearch.hppc.BitSet; + +/** + * This evaluator decorator removes every marking from the given list that does + * not match the given gold standard list based on a given + * {@link MatchingsSearcher} instance. + * + * @author Michael Röder (roeder@informatik.uni-leipzig.de) + * + */ +public class SearcherBasedNotMatchingMarkingFilter extends AbstractEvaluatorDecorator { + + protected MatchingsSearcher searcher; + + public SearcherBasedNotMatchingMarkingFilter(MatchingsSearcher searcher, Evaluator evaluator) { + super(evaluator); + this.searcher = searcher; + } + + protected List> filterListOfMarkings(List> markings, List> goldStandard) { + List> filteredMarkings = new ArrayList>(markings.size()); + for (int i = 0; i < markings.size(); ++i) { + filteredMarkings.add(filterMarkings(markings.get(i), goldStandard.get(i))); + } + return filteredMarkings; + } + + protected List filterMarkings(List markings, List goldStandard) { + BitSet matchingElements; + BitSet alreadyUsedResults = new BitSet(goldStandard.size()); + List filteredMarkings = new ArrayList(markings.size()); + for (T marking : markings) { + matchingElements = searcher.findMatchings(marking, goldStandard, alreadyUsedResults); + if (!matchingElements.isEmpty()) { + filteredMarkings.add(marking); + alreadyUsedResults.set(matchingElements.nextSetBit(0)); + } + } + return filteredMarkings; + } + + @Override + public void evaluate(List> annotatorResults, List> goldStandard, + EvaluationResultContainer results) { + evaluator.evaluate(filterListOfMarkings(annotatorResults, goldStandard), goldStandard, results); + } + +} diff --git a/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java b/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java index f324f4e44..1ee15d1fc 100644 --- a/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java +++ b/src/main/java/org/aksw/gerbil/execute/ExperimentTask.java @@ -45,8 +45,6 @@ import org.aksw.gerbil.evaluate.SubTaskResult; import org.aksw.gerbil.evaluate.impl.FMeasureCalculator; import org.aksw.gerbil.exceptions.GerbilException; -import org.aksw.gerbil.matching.filter.SearcherBasedNotMatchingMarkingFilter; -import org.aksw.gerbil.matching.impl.StrongSpanMatchingsSearcher; import org.aksw.gerbil.semantic.sameas.DatasetBasedSameAsRetriever; import org.aksw.gerbil.semantic.sameas.MultipleSameAsRetriever; import org.aksw.gerbil.semantic.sameas.SameAsRetriever; @@ -307,19 +305,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator, List> results = new ArrayList>(dataset.size()); List> goldStandard = new ArrayList>(dataset.size()); D2KBAnnotator linker = ((D2KBAnnotator) annotator); - // For D2KB we have to filter the results to get those results - // that are matching the positions - SearcherBasedNotMatchingMarkingFilter filter = new SearcherBasedNotMatchingMarkingFilter( - new StrongSpanMatchingsSearcher()); - List documentGS; for (Document document : dataset.getInstances()) { - documentGS = document.getMarkings(MeaningSpan.class); // reduce the document to a text and a list of Spans - results.add(filter.filterMarkings( - linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document)), - documentGS)); - goldStandard.add(documentGS); + results.add(linker.performD2KBTask(DocumentInformationReducer.reduceToTextAndSpans(document))); + goldStandard.add(document.getMarkings(MeaningSpan.class)); taskState.increaseExperimentStepCount(); } if (annotatorOutputWriter != null) { @@ -413,20 +403,11 @@ protected EvaluationResult runExperiment(Dataset dataset, Annotator annotator, List> results = new ArrayList>(dataset.size()); List> goldStandard = new ArrayList>(dataset.size()); EntityTyper typer = ((EntityTyper) annotator); - // For ETyping we have to filter the results to get those - // results - // that are matching the positions - SearcherBasedNotMatchingMarkingFilter filter = new SearcherBasedNotMatchingMarkingFilter( - new StrongSpanMatchingsSearcher()); - List documentGS; for (Document document : dataset.getInstances()) { - documentGS = document.getMarkings(TypedSpan.class); // reduce the document to a text and a list of Spans - results.add(filter.filterMarkings( - typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document)), - documentGS)); - goldStandard.add(documentGS); + results.add(typer.performTyping(DocumentInformationReducer.reduceToTextAndSpans(document))); + goldStandard.add(document.getMarkings(TypedSpan.class)); taskState.increaseExperimentStepCount(); } if (annotatorOutputWriter != null) { diff --git a/src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java b/src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java deleted file mode 100644 index abdb4ead8..000000000 --- a/src/main/java/org/aksw/gerbil/matching/filter/NotMatchingMarkingFilter.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.aksw.gerbil.matching.filter; - -import java.util.List; - -import org.aksw.gerbil.transfer.nif.Marking; - -/** - * Removes every {@link Marking} that is not matching. - * - * @author Michael Röder (roeder@informatik.uni-leipzig.de) - * - */ -public interface NotMatchingMarkingFilter { - - /** - * Returns a list of {@link Marking}s that does not contain any - * {@link Marking}s that is not matching the given gold standard list. - * - * @param markings - * the list of {@link Marking}s - * @return a filtered list of {@link Marking}s - */ - public List filterMarkings(List markings, List goldStandard); -} diff --git a/src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java b/src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java deleted file mode 100644 index cb842fc42..000000000 --- a/src/main/java/org/aksw/gerbil/matching/filter/SearcherBasedNotMatchingMarkingFilter.java +++ /dev/null @@ -1,42 +0,0 @@ -package org.aksw.gerbil.matching.filter; - -import java.util.ArrayList; -import java.util.List; - -import org.aksw.gerbil.matching.MatchingsSearcher; -import org.aksw.gerbil.transfer.nif.Marking; - -import com.carrotsearch.hppc.BitSet; - -/** - * This implementation of a marking filter removes every marking from the given - * list that does not match the given gold standard list. For identifying those - * markings a {@link MatchingsSearcher} is used. - * - * @author Michael Röder (roeder@informatik.uni-leipzig.de) - * - */ -public class SearcherBasedNotMatchingMarkingFilter implements NotMatchingMarkingFilter { - - protected MatchingsSearcher searcher; - - public SearcherBasedNotMatchingMarkingFilter(MatchingsSearcher searcher) { - this.searcher = searcher; - } - - @Override - public List filterMarkings(List markings, List goldStandard) { - BitSet matchingElements; - BitSet alreadyUsedResults = new BitSet(goldStandard.size()); - List filteredMarkings = new ArrayList(markings.size()); - for (T marking : markings) { - matchingElements = searcher.findMatchings(marking, goldStandard, alreadyUsedResults); - if (!matchingElements.isEmpty()) { - filteredMarkings.add(marking); - alreadyUsedResults.set(matchingElements.nextSetBit(0)); - } - } - return filteredMarkings; - } - -} diff --git a/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java b/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java index c1a0ba01c..b98e1b50b 100644 --- a/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java +++ b/src/main/java/org/aksw/gerbil/utils/filter/MarkingFilter.java @@ -22,8 +22,20 @@ public interface MarkingFilter { + /** + * Returns true if the marking is good and does not have to be filtered out. + * + * @param marking + * @return + */ public boolean isMarkingGood(T marking); + /** + * Returns a filtered list based on the given list. + * + * @param markings + * @return + */ public List filterList(List markings); public List> filterListOfLists(List> markings); diff --git a/src/test/java/org/aksw/gerbil/SingleRunTest.java b/src/test/java/org/aksw/gerbil/SingleRunTest.java index 7e52b9ffc..e40d6e3c7 100644 --- a/src/test/java/org/aksw/gerbil/SingleRunTest.java +++ b/src/test/java/org/aksw/gerbil/SingleRunTest.java @@ -43,10 +43,10 @@ public class SingleRunTest implements TaskObserver { private static final Logger LOGGER = LoggerFactory.getLogger(SingleRunTest.class); - private static final String ANNOTATOR_NAME = "FOX"; - private static final String DATASET_NAME = "N3-Reuters-128"; + private static final String ANNOTATOR_NAME = "TagMe 2"; + private static final String DATASET_NAME = "ACE2004"; private static final ExperimentType EXPERIMENT_TYPE = ExperimentType.D2KB; - private static final Matching MATCHING = Matching.WEAK_ANNOTATION_MATCH; + private static final Matching MATCHING = Matching.STRONG_ENTITY_MATCH; public static void main(String[] args) throws Exception { SingleRunTest test = new SingleRunTest();