-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbuild_data.py
27 lines (21 loc) · 862 Bytes
/
build_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
from trial.semeval_build_data import main as run_semeval
from trial.bc5_build_data import main as run_bc5
from utils import Timer
from data_utils import export_trimmed_fasttext_vectors
from data_utils import load_vocab
from constant import constants_semeval as sem_constants
from constant import constants_bc5 as bc5_constants
if __name__ == '__main__':
timer = Timer()
timer.start("Building data...")
print('Build trimmed embeddings')
bc5_vocab = load_vocab(bc5_constants.ALL_WORDS)
sem_vocab = load_vocab(sem_constants.ALL_WORDS)
export_trimmed_fasttext_vectors(bc5_vocab, 'data/w2v_model/bc5_fasttext.npz')
export_trimmed_fasttext_vectors(sem_vocab, 'data/w2v_model/semeval_fasttext.npz')
print('Build pickle file')
os.makedirs('data/pickle', exist_ok=True)
run_semeval()
run_bc5()
timer.stop()