diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/__init__.py b/aspect_based_sentiment_analysis/__init__.py old mode 100644 new mode 100755 index 59fadae..f917b1c --- a/aspect_based_sentiment_analysis/__init__.py +++ b/aspect_based_sentiment_analysis/__init__.py @@ -31,7 +31,7 @@ from .text_splitters import sentencizer from . import plots -from .plots import explain +from .plots import display from . import training from . import recognizers diff --git a/aspect_based_sentiment_analysis/alignment.py b/aspect_based_sentiment_analysis/alignment.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/aux_models.py b/aspect_based_sentiment_analysis/aux_models.py old mode 100644 new mode 100755 index 60d47c3..621375b --- a/aspect_based_sentiment_analysis/aux_models.py +++ b/aspect_based_sentiment_analysis/aux_models.py @@ -2,12 +2,30 @@ from typing import List import tensorflow as tf -import transformers +from transformers import TFPreTrainedModel from .data_types import Pattern from .data_types import TokenizedExample +def get_key_set(patterns: List[Pattern], n: int, k: int = 1): + """ + + Parameters + ---------- + patterns + n + The number of elements in the key set. + k + The number of the sorted (from the most important) candidates + of the key sets. + + Returns + ------- + + """ + + class ReferenceRecognizer(ABC): """ """ @@ -19,6 +37,8 @@ def __call__( attention_grads: tf.Tensor ) -> bool: """ """ + if not aspect: + return True class PatternRecognizer(ABC): @@ -34,8 +54,7 @@ def __call__( """ """ -class BasicReferenceRecognizer(ReferenceRecognizer, - transformers.TFPreTrainedModel): +class BasicReferenceRecognizer(ReferenceRecognizer, TFPreTrainedModel): """ Briefly, it represents a text and an aspect as two vectors, and predicts that a text relates to an aspect if the cosine similarity is bigger than diff --git a/aspect_based_sentiment_analysis/data_types.py b/aspect_based_sentiment_analysis/data_types.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/loads.py b/aspect_based_sentiment_analysis/loads.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/models.py b/aspect_based_sentiment_analysis/models.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/pipelines.py b/aspect_based_sentiment_analysis/pipelines.py old mode 100644 new mode 100755 index 92266a6..1357595 --- a/aspect_based_sentiment_analysis/pipelines.py +++ b/aspect_based_sentiment_analysis/pipelines.py @@ -258,18 +258,25 @@ def __call__( return completed_subtask return completed_task - def transform( - self, - text: str, - aspects: List[str] - ) -> CompletedTask: - task = self.preprocess(text, aspects) - input_batch = self.encode(task.tokenized_examples) + # def transform( + # self, + # text: str, + # aspects: List[str] + # ) -> CompletedTask: + # task = self.preprocess(text, aspects) + # input_batch = self.encode(task.tokenized_examples) + # output_batch = self.predict(input_batch) + # reviews = self.professor.review(task, output_batch) \ + # if self.professor else None + # completed_task = self.postprocess(task, output_batch, reviews) + # return completed_task + + def transform(self, batch, mask: List[List[int]] = None): + tokenized_examples = self.tokenize(batch, mask) + input_batch = self.encode(tokenized_examples) output_batch = self.predict(input_batch) - reviews = self.professor.review(task, output_batch) \ - if self.professor else None - completed_task = self.postprocess(task, output_batch, reviews) - return completed_task + predictions = self.review(tokenized_examples, output_batch) + return predictions def preprocess(self, text: str, aspects: List[str]) -> Task: texts = self.text_splitter(text) if self.text_splitter else [text] diff --git a/aspect_based_sentiment_analysis/plots.py b/aspect_based_sentiment_analysis/plots.py old mode 100644 new mode 100755 index 35e59df..7b7da8a --- a/aspect_based_sentiment_analysis/plots.py +++ b/aspect_based_sentiment_analysis/plots.py @@ -2,9 +2,9 @@ from typing import List from typing import Tuple import numpy as np +from IPython.core.display import display as ipython_display from IPython.core.display import HTML -from .data_types import PredictedExample -from .data_types import Pattern +from .data_types import Pattern, Review def html_escape(text): @@ -45,7 +45,8 @@ def highlight_pattern(pattern: Pattern) -> str: return highlighted_text -def explain(example: PredictedExample): +def display_html(patterns: List[Pattern]): + # TODO aspect = example.aspect_representation texts = [f'Words connected with the "{example.aspect}" aspect:
'] texts.extend(highlight_sequence(aspect.tokens, aspect.look_at)) @@ -56,3 +57,13 @@ def explain(example: PredictedExample): text = ' '.join(texts) html_text = HTML(text) return html_text + + +def display_patterns(patterns: List[Pattern]): + html_text = display_html(patterns) + return ipython_display(html_text) + + +def display(review: Review): + html_text = display_html(review.patterns) + return ipython_display(html_text) diff --git a/aspect_based_sentiment_analysis/professors.py b/aspect_based_sentiment_analysis/professors.py old mode 100644 new mode 100755 index c4c46f3..26885d1 --- a/aspect_based_sentiment_analysis/professors.py +++ b/aspect_based_sentiment_analysis/professors.py @@ -18,7 +18,7 @@ @dataclass class _Professor(ABC): """ """ - ref_recognizer: ReferenceRecognizer = None + reference_recognizer: ReferenceRecognizer = None pattern_recognizer: PatternRecognizer = None def make_decision( @@ -57,10 +57,10 @@ def review( output_batch: OutputBatch ) -> Iterable[Review]: for example, args in zip(task, output_batch): - is_reference = self.ref_recognizer(example, *args) \ - if self.ref_recognizer else None + is_reference = self.reference_recognizer(example, *args) \ + if self.reference_recognizer else None patterns = self.pattern_recognizer(example, *args) \ - if self.pattern_recognizer and is_reference is not False else\ + if self.pattern_recognizer and is_reference is not False else \ None review = Review(is_reference, patterns) yield review diff --git a/aspect_based_sentiment_analysis/recognizers.py b/aspect_based_sentiment_analysis/recognizers.py old mode 100644 new mode 100755 index 5fbf99e..c8e7286 --- a/aspect_based_sentiment_analysis/recognizers.py +++ b/aspect_based_sentiment_analysis/recognizers.py @@ -52,6 +52,7 @@ class AttentionGradientProduct(PatternRecognizer): percentile of the total information. Default 80% of weights magnitude. """ information_in_patterns: int = 80 + is_pattern_scaled: bool = False def __call__( self, @@ -76,8 +77,8 @@ def __call__( # threshold = 0.05 # round_decimals = 2 # - # product = attentions * attention_grads - # product = tf.abs(product) + # product = attentions * tf.abs(attention_grads) + # product = tf.reduce_sum(product, axis=(0, 1)) # attention_grads = alignment.merge_input( # product, alignment=example.alignment) @@ -88,8 +89,10 @@ def __call__( # # mixtures = product[text_ids, :][:, text_ids] # mixtures /= np.max(mixtures + 1e-9, axis=1).reshape(-1, 1) - # np.fill_diagonal(mixtures, 1) - # mixtures *= w.reshape(-1, 1) + # np.fill_diagonal(mixtures, 1) # as well, we could use max on diagonal + # mixtures /= L1 norm + # if self.is_pattern_scaled: + # mixtures *= w.reshape(-1, 1) # # mixtures = np.where(mixtures > threshold, mixtures, 0) # mixtures = np.round(mixtures, decimals=round_decimals) diff --git a/aspect_based_sentiment_analysis/text_splitters.py b/aspect_based_sentiment_analysis/text_splitters.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/__init__.py b/aspect_based_sentiment_analysis/training/__init__.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/callbacks.py b/aspect_based_sentiment_analysis/training/callbacks.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/classifier.py b/aspect_based_sentiment_analysis/training/classifier.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/data_types.py b/aspect_based_sentiment_analysis/training/data_types.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/datasets.py b/aspect_based_sentiment_analysis/training/datasets.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/errors.py b/aspect_based_sentiment_analysis/training/errors.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/metrics.py b/aspect_based_sentiment_analysis/training/metrics.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/training/routines.py b/aspect_based_sentiment_analysis/training/routines.py old mode 100644 new mode 100755 diff --git a/aspect_based_sentiment_analysis/utils.py b/aspect_based_sentiment_analysis/utils.py old mode 100644 new mode 100755 diff --git a/data/semeval/adapter.py b/data/semeval/adapter.py old mode 100644 new mode 100755 diff --git a/environment.yml b/environment.yml old mode 100644 new mode 100755 diff --git a/examples/pattens.ipynb b/examples/pattens.ipynb old mode 100644 new mode 100755 index eea440c..e2d55f8 --- a/examples/pattens.ipynb +++ b/examples/pattens.ipynb @@ -85,7 +85,7 @@ } ], "source": [ - "html = absa.explain(example)\n", + "html = absa.display_html(example)\n", "display(html)" ], "metadata": { diff --git a/examples/patterns.png b/examples/patterns.png old mode 100644 new mode 100755 diff --git a/examples/train_classifier.py b/examples/train_classifier.py old mode 100644 new mode 100755 diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 diff --git a/tests/absa/test_alignment.py b/tests/absa/test_alignment.py old mode 100644 new mode 100755 diff --git a/tests/absa/test_dataset.py b/tests/absa/test_dataset.py old mode 100644 new mode 100755 diff --git a/tests/absa/test_loads.py b/tests/absa/test_loads.py old mode 100644 new mode 100755 diff --git a/tests/absa/test_pipeline.py b/tests/absa/test_pipeline.py old mode 100644 new mode 100755 diff --git a/tests/absa/test_recognizers.py b/tests/absa/test_recognizers.py old mode 100644 new mode 100755 diff --git a/tests/absa/test_text_splitters.py b/tests/absa/test_text_splitters.py old mode 100644 new mode 100755 diff --git a/tests/conftest.py b/tests/conftest.py old mode 100644 new mode 100755 index cc7836f..edb1312 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,7 +13,7 @@ def pytest_configure(config): def pytest_collection_modifyitems(config, items): if config.getoption("--run-slow"): - # --run-run-slow given in cli: do not skip slow checks + # --run-slow given in cli: do not skip slow checks return reason_desc = "need --run-slow option to run" skip_sanity_check = pytest.mark.skip(reason=reason_desc) diff --git a/tests/data/test_semeval.py b/tests/data/test_semeval.py old mode 100644 new mode 100755 diff --git a/tests/test_performance.py b/tests/test_performance.py old mode 100644 new mode 100755 diff --git a/tests/training/test_callbacks.py b/tests/training/test_callbacks.py old mode 100644 new mode 100755 diff --git a/tests/training/test_sanity_check.py b/tests/training/test_sanity_check.py old mode 100644 new mode 100755