Skip to content

Commit

Permalink
Merge pull request #55 from nnick14/update_hf_tf
Browse files Browse the repository at this point in the history
Update TensorFlow and Transformers versions
  • Loading branch information
rolczynski authored Jul 27, 2021
2 parents ebfd0b6 + 66f8182 commit 8b7fde1
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 33 deletions.
59 changes: 45 additions & 14 deletions aspect_based_sentiment_analysis/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import logging
from abc import ABC
from abc import abstractmethod
from typing import Tuple
from typing import Tuple, Optional, Union

import numpy as np

import transformers
from transformers.modeling_tf_utils import TFModelInputType
import tensorflow as tf
from tensorflow.keras import layers

Expand Down Expand Up @@ -120,9 +123,11 @@ class BertABSClassifier(ABSClassifier, transformers.TFBertPreTrainedModel):
def __init__(self, config: BertABSCConfig, **kwargs):
super().__init__(config, **kwargs)
self.bert = transformers.TFBertMainLayer(
config, name="bert")
config, name="bert"
)
initializer = transformers.modeling_tf_utils.get_initializer(
config.initializer_range)
config.initializer_range
)
self.dropout = layers.Dropout(config.hidden_dropout_prob)
self.classifier = layers.Dense(
config.num_polarities,
Expand All @@ -131,21 +136,47 @@ def __init__(self, config: BertABSCConfig, **kwargs):
)

def call(
self,
token_ids: tf.Tensor,
attention_mask: tf.Tensor = None,
token_type_ids: tf.Tensor = None,
training: bool = False,
**bert_kwargs
self,
input_ids: Optional[TFModelInputType] = None,
attention_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
token_type_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
position_ids: Optional[Union[np.ndarray, tf.Tensor]] = None,
head_mask: Optional[Union[np.ndarray, tf.Tensor]] = None,
inputs_embeds: Optional[Union[np.ndarray, tf.Tensor]] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
training: Optional[bool] = False,
**kwargs,
) -> Tuple[tf.Tensor, Tuple[tf.Tensor, ...], Tuple[tf.Tensor, ...]]:
outputs = self.bert(
inputs=token_ids,
inputs = transformers.modeling_tf_utils.input_processing(
func=self.call,
config=self.config,
input_ids=input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
training=training,
**bert_kwargs
kwargs_call=kwargs,
)
outputs = self.bert(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
token_type_ids=inputs["token_type_ids"],
position_ids=inputs["position_ids"],
head_mask=inputs["head_mask"],
inputs_embeds=inputs["inputs_embeds"],
output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"],
training=inputs["training"],
)
sequence_output, pooled_output, hidden_states, attentions = outputs
pooled_output = outputs[1]
pooled_output = self.dropout(pooled_output, training=training)
logits = self.classifier(pooled_output)
return logits, hidden_states, attentions
return logits, outputs.hidden_states, outputs.attentions
9 changes: 4 additions & 5 deletions aspect_based_sentiment_analysis/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,7 @@ def preprocess(self, text: str, aspects: List[str]) -> Task:
"""

@abstractmethod
def tokenize(self, examples: Iterable[Example]) -> Iterable[
TokenizedExample]:
def tokenize(self, examples: Iterable[Example]) -> Iterable[TokenizedExample]:
"""
Tokenize the example. The model can not process the raw pair of two
strings (text, aspect) directly.
Expand All @@ -113,8 +112,8 @@ def tokenize(self, examples: Iterable[Example]) -> Iterable[
def encode(self, examples: Iterable[TokenizedExample]) -> InputBatch:
"""
Encode tokenized examples. The input batch is a container of tensors
crucial for the model to make a prediction. The names are compatible
with the *transformers* package.
crucial for the model to make a prediction. The names are compatible
with the *transformers* package.
Parameters
----------
Expand Down Expand Up @@ -250,7 +249,7 @@ def predict(self, input_batch: InputBatch) -> OutputBatch:
# output including hidden states and attentions.
with tf.GradientTape() as tape:
logits, hidden_states, attentions = self.model.call(
token_ids=input_batch.token_ids,
input_ids=input_batch.token_ids,
attention_mask=input_batch.attention_mask,
token_type_ids=input_batch.token_type_ids
)
Expand Down
2 changes: 1 addition & 1 deletion aspect_based_sentiment_analysis/text_splitters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def sentencizer(name: str = 'en_core_web_sm') -> Callable[[str], List[str]]:

def wrapper(text: str) -> List[str]:
doc = nlp(text)
sentences = [sent.string.strip() for sent in doc.sents]
sentences = [str(sent).strip() for sent in doc.sents]
return sentences

return wrapper
2 changes: 1 addition & 1 deletion aspect_based_sentiment_analysis/training/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def preprocess_batch(
encoded = self.tokenizer.batch_encode_plus(
pairs,
add_special_tokens=True,
pad_to_max_length=True,
padding=True,
return_attention_masks=True,
return_tensors='tf'
)
Expand Down
20 changes: 10 additions & 10 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
name: Aspect-Based-Sentiment-Analysis
dependencies:
- python=3.6.9
- scikit-learn=0.22
- pytest=5.3
- pytest-timeout=1.3
- scikit-learn=0.24.2
- pytest=6.2.4
- pytest-timeout=1.4.2
- ipython=7.3
- spacy=2.3
- dataclasses=0.7
- spacy=3.1.0
- dataclasses=0.8
- pip:
- tensorflow==2.2
- google-cloud-storage==1.26
- testfixtures==6.14
- transformers==2.5
- optuna==1.3
- tensorflow==2.5.0
- google-cloud-storage==1.40.0
- testfixtures==6.17.1
- transformers==4.8.2
- optuna==2.8.0
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
include_package_data=False,
packages=find_packages(),
install_requires=[
'tensorflow==2.4.0',
'transformers==2.5',
'tensorflow==2.5.0',
'transformers==4.8.2',
'pytest',
'scikit-learn',
'ipython',
Expand Down

0 comments on commit 8b7fde1

Please sign in to comment.