-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodels.py
145 lines (120 loc) · 5.46 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import torch
from torch import nn
from utils import pack_for_rnn_seq, unpack_from_rnn_seq
class MLP(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, dropout_prob):
super().__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.dropout_prob = dropout_prob
mlp_layers = []
for i in range(num_layers):
if i == 0:
layer_input_dim = input_dim
else:
layer_input_dim = hidden_dim
linear_layer = nn.Linear(in_features=layer_input_dim,
out_features=hidden_dim)
relu_layer = nn.ReLU()
dropout_layer = nn.Dropout(dropout_prob)
mlp_layer = nn.Sequential(linear_layer, relu_layer, dropout_layer)
mlp_layers.append(mlp_layer)
self.mlp = nn.Sequential(*mlp_layers)
def forward(self, input):
"""
Args:
input (Variable): A float variable of size
(batch_size, input_dim).
Returns:
output (Variable): A float variable of size
(batch_size, hidden_dim), which is the result of
applying MLP to the input argument.
"""
return self.mlp(input)
class NLIClassifier(nn.Module):
def __init__(self, sentence_dim, hidden_dim, num_layers, num_classes,
dropout_prob):
super().__init__()
self.sentence_dim = sentence_dim
self.hidden_dim = hidden_dim
self.dropout_prob = dropout_prob
self.mlp = MLP(input_dim=4 * sentence_dim, hidden_dim=hidden_dim,
num_layers=num_layers, dropout_prob=dropout_prob)
self.clf_linear = nn.Linear(in_features=hidden_dim,
out_features=num_classes)
def forward(self, pre, hyp):
mlp_input = torch.cat([pre, hyp, (pre - hyp).abs(), pre * hyp], dim=1)
mlp_output = self.mlp(mlp_input)
output = self.clf_linear(mlp_output)
return output
class ShortcutStackedEncoder(nn.Module):
def __init__(self, input_dim, hidden_dims):
super().__init__()
self.input_dim = input_dim
self.hidden_dims = hidden_dims
self.num_layers = len(hidden_dims)
for i in range(self.num_layers):
lstm_input_dim = input_dim + 2*sum(hidden_dims[:i])
lstm_layer = nn.LSTM(
input_size=lstm_input_dim, hidden_size=hidden_dims[i],
bidirectional=True, batch_first=False)
setattr(self, f'lstm_layer_{i}', lstm_layer)
def get_lstm_layer(self, i):
return getattr(self, f'lstm_layer_{i}')
def forward(self, input, lengths):
prev_lstm_output = None
lstm_input = input
for i in range(self.num_layers):
if i > 0:
lstm_input = torch.cat([lstm_input, prev_lstm_output], dim=2)
lstm_input_packed, reverse_indices = pack_for_rnn_seq(
inputs=lstm_input, lengths=lengths)
lstm_layer = self.get_lstm_layer(i)
lstm_output_packed, _ = lstm_layer(lstm_input_packed)
lstm_output = unpack_from_rnn_seq(
packed_seq=lstm_output_packed, reverse_indices=reverse_indices)
prev_lstm_output = lstm_output
sentence_vector = torch.max(prev_lstm_output, dim=0)[0]
return sentence_vector
class NLIModel(nn.Module):
def __init__(self, num_words, word_dim, lstm_hidden_dims,
mlp_hidden_dim, mlp_num_layers, num_classes, dropout_prob):
super().__init__()
self.num_words = num_words
self.word_dim = word_dim
self.lstm_hidden_dims = lstm_hidden_dims
self.mlp_hidden_dim = mlp_hidden_dim
self.mlp_num_layers = mlp_num_layers
self.num_classes = num_classes
self.dropout_prob = dropout_prob
self.word_embedding = nn.Embedding(num_embeddings=num_words,
embedding_dim=word_dim)
self.encoder = ShortcutStackedEncoder(
input_dim=word_dim, hidden_dims=lstm_hidden_dims)
self.classifier = NLIClassifier(
sentence_dim=2 * lstm_hidden_dims[-1], hidden_dim=mlp_hidden_dim,
num_layers=mlp_num_layers, num_classes=num_classes,
dropout_prob=dropout_prob)
def forward(self, pre_input, pre_lengths, hyp_input, hyp_lengths):
"""
Args:
pre_input (Variable): A long variable containing indices for
premise words. Size: (max_length, batch_size).
pre_lengths (Tensor): A long tensor containing lengths for
sentences in the premise batch.
hyp_input (Variable): A long variable containing indices for
hypothesis words. Size: (max_length, batch_size).
pre_lengths (Tensor): A long tensor containing lengths for
sentences in the hypothesis batch.
Returns:
output (Variable): A float variable containing the
unnormalized probability for each class
:return:
"""
pre_input_emb = self.word_embedding(pre_input)
hyp_input_emb = self.word_embedding(hyp_input)
pre_vector = self.encoder(input=pre_input_emb, lengths=pre_lengths)
hyp_vector = self.encoder(input=hyp_input_emb, lengths=hyp_lengths)
classifier_output = self.classifier(pre=pre_vector, hyp=hyp_vector)
return classifier_output