-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
151 lines (122 loc) · 5.51 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
"""
Model classes for neural Dialogue Act Recognition.
There are two types of models:
- Encoders take an utterances and produce an n-dimensional vector.
- DAR models, which take a sequence of encoded utterances (a conversation)
and produce a sequence of dialogue act tags.
Contents:
- WordVecAvg - baseline word vector averaging
- DARRNN - a simple RNN DAR model
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import transformers
class SimpleDARRNN(nn.Module):
def __init__(self, utt_size, n_tags):
super().__init__()
self.decoder = nn.Linear(utt_size, n_tags)
def forward(self,x,hidden):
decoded = self.decoder(x.view(x.size(0)*x.size(1), x.size(2)))
return decoded.view(x.size(0), x.size(1), decoded.size(1)), hidden
def init_hidden(self, batch_size):
import torch
return torch.tensor([])
class DARRNN(nn.Module):
"""Container module with an encoder, a recurrent module, and a decoder."""
def __init__(self, utt_size, n_tags, hidden_size, n_layers, dropout=0.5, use_lstm=False):
"""
utt_size - size of the encoded utterance
hidden_size - size of the hidden layer
n_tags - number of dialogue act tags
n_layers - number of hidden RNN layers
"""
super().__init__()
self.use_lstm = use_lstm
self.drop = nn.Dropout(dropout)
if use_lstm:
self.rnn = nn.LSTM(utt_size, hidden_size, n_layers, dropout=dropout)
else:
self.rnn = nn.RNN(utt_size, hidden_size, n_layers, nonlinearity='relu', dropout=dropout) # TODO: try tanh too?
self.decoder = nn.Linear(hidden_size, n_tags)
self.init_weights()
self.hidden_size = hidden_size
self.n_layers = n_layers
def init_weights(self):
initrange = 0.1
self.decoder.bias.data.zero_()
self.decoder.weight.data.uniform_(-initrange, initrange)
def forward(self, x, hidden):
x, hidden = self.rnn(x, hidden)
x = self.drop(x)
decoded = self.decoder(x.view(x.size(0)*x.size(1), x.size(2)))
return decoded.view(x.size(0), x.size(1), decoded.size(1)), hidden
def init_hidden(self, batch_size):
weight = next(self.parameters())
if self.use_lstm:
return (weight.new_zeros(self.n_layers, batch_size, self.hidden_size),
weight.new_zeros(self.n_layers, batch_size, self.hidden_size))
return weight.new_zeros(self.n_layers, batch_size, self.hidden_size)
class WordVecAvg(nn.Module):
""" Baseline word vector encoder. Simply averages an utterance's word vectors
"""
def __init__(self, embedding):
super().__init__()
self.embedding = embedding
@classmethod
def from_pretrained(cls, weights, freeze_embedding=False):
embedding = nn.Embedding.from_pretrained(weights, freeze=freeze_embedding, padding_idx=0)
return cls(embedding)
@classmethod
def random_init(cls, num_embeddings, embedding_dim):
embedding = nn.Embedding(num_embeddings, embedding_dim, padding_idx=0)
return cls(embedding)
def forward(self, x):
x = self.embedding(x).sum(dim=1)
return x
class KimCNN(nn.Module):
""" CNN utt encoder based on Kim (2014): https://github.com/yoonkim/CNN_sentence
"""
def __init__(self, vocab_size, utt_size, embedding_dim, embedding, window_sizes, feature_maps):
super().__init__()
self.embedding = embedding
self.convs = nn.ModuleList([nn.Conv2d(1, feature_maps, (window_size, embedding_dim))
for window_size in window_sizes])
self.linear = nn.Linear(len(window_sizes)*feature_maps, utt_size)
self.dropout = nn.Dropout(0.5)
@classmethod
def from_pretrained(cls, vocab_size, utt_size, embedding_dim, weights, freeze_embedding, *args):
embedding = nn.Embedding.from_pretrained(weights, freeze=freeze_embedding, padding_idx=0)
return cls(vocab_size, utt_size, embedding_dim, embedding, *args)
@classmethod
def random_init(cls, vocab_size, utt_size, embedding_dim, *args):
embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
return cls(vocab_size, utt_size, embedding_dim, embedding, *args)
def forward(self, x):
x = self.embedding(x)
x = x.unsqueeze(1)
x = [F.relu(conv(x)).squeeze(3) for conv in self.convs]
x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]
x = torch.cat(x, 1)
x = self.dropout(x)
x = self.linear(x)
return x
class BertEncoder(nn.Module):
def __init__(self, utt_size, from_pretrained=True, pretrained_dir=None, finetune_bert=True, resize=False):
super().__init__()
if from_pretrained:
pretrained_str = pretrained_dir or 'bert-base-uncased' # default to library-suppled pre-trained model
self.bert = transformers.BertModel.from_pretrained(pretrained_str)
else:
config = transformers.BertConfig.from_json_file('data/bert-base-uncased_config.json')
self.bert = transformers.BertModel(config)
if resize:
self.bert.resize_token_embeddings(resize)
if not finetune_bert:
for param in self.bert.parameters():
param.requires_grad = False
self.linear = nn.Linear(768, utt_size)
def forward(self, x):
_, x = self.bert(x) # use the pooled [CLS] token output (_ is the 12 hidden states)
x = self.linear(x)
return x