forked from ChuXiaokai/baidu_ultr_dataset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubmit.py
65 lines (54 loc) · 2.13 KB
/
submit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# coding=utf-8
# File Name: evaluate.py
# Author: Lixin Zou
# Mail: [email protected]
# Created Time: Tue Sep 13 23:21:03 2022
#### Demo submission to WSDM Cup 2023 #####
# Step 1:
# Get the prediction score of model.
# > ``` python submit.py --emb_dim 768 --nlayer 12 --nhead 12 --dropout 0.1 --eval_batch_size 100 --test_annotate_path ./data/wsdm_round_1/wsdm_test_1.txt```
# To be noticed, the prediction score file must be ended with csv. Otherwise, the evaluation may fail.
# Step 2:
# Compress the prediction score file to zip file and submit the competition page.
from baseline_model.utils.sys_tools import find_class
import torch
import numpy as np
import warnings
import sys
from metrics import *
from Transformer4Ranking.model import *
from dataloader import *
from args import config
warnings.filterwarnings('ignore')
print(config)
exp_settings = config.exp_settings
token_encoder = TransformerModel(
ntoken=config.ntokens,
hidden=config.emb_dim,
nhead=config.nhead,
nlayers=config.nlayers,
dropout=config.dropout,
mode='finetune'
)
method_str = exp_settings['method_name']
if method_str not in ['IPWrank', 'DLA', 'RegressionEM', 'PairDebias', 'NavieAlgorithm']:
print("please choose a method in 'IPWrank', 'DLA', 'RegressionEM', 'PairDebias', 'NavieAlgorithm'")
sys.exit()
model = find_class('baseline_model.learning_algorithm.'+method_str)\
(exp_settings=exp_settings, encoder_model=token_encoder)
if config.evaluate_model_path != "":
# load model
model.model.load_state_dict(torch.load(config.evaluate_model_path))
# load dataset
test_annotate_dataset = TestDataset(config.test_annotate_path, max_seq_len=config.max_seq_len, data_type='annotate')
test_annotate_loader = DataLoader(test_annotate_dataset, batch_size=config.eval_batch_size)
# evaluate
total_scores = []
for test_data_batch in test_annotate_loader:
feed_input = build_feed_dict(test_data_batch)
score = model.get_scores(feed_input)
score = score.cpu().detach().numpy().tolist()
total_scores += score
with open(config.result_path, "w") as f:
f.writelines("\n".join(map(str, total_scores)))