-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_en-si_comet.sh
executable file
·33 lines (26 loc) · 1.63 KB
/
test_en-si_comet.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/bash
source activate unmt
git rev-parse HEAD
export PYTHONPATH=".:${PYTHONPATH}"
MODEL_DIR=$1
TRANSLATION_OUTPUT_DIR=$MODEL_DIR/../translation_results
# mkdir -p $TRANSLATION_OUTPUT_DIR
# IndicNLP_dir=./indic_nlp_library
# IndicNLP_resources_dir=./indic_nlp_resources
# export PYTHONPATH=$PYTHONPATH:${IndicNLP_dir}
# export INDIC_RESOURCES_PATH=${IndicNLP_resources_dir}
reference_dir=Data/translation_references
# Example default run: test_en-si_comet.sh Output/en-si_pipeline/best_bleu
echo "$(date +'%Y-%m-%d %H:%M:%S') test_en-si_comet: Calculating en->si test translation scores"
# Tokenize both the reference(gold answer) and hypothesis(prediction)
# if [ ! -f ${reference_dir}/si-en.si.test.tok ]; then
# echo "Tokenizing reference"
# python ${IndicNLP_dir}/indicnlp/cli/cliparser.py tokenize ${reference_dir}/si-en.si.test ${reference_dir}/si-en.si.test.tok -l sin
# fi
# python ${IndicNLP_dir}/indicnlp/cli/cliparser.py tokenize ${TRANSLATION_OUTPUT_DIR}/si-en.en.test.si ${TRANSLATION_OUTPUT_DIR}/si-en.en.test.si.tok -l sin
# Calculate the bleu score (`none` = no tokenizer is used)
en2si="$(comet-score -s ${reference_dir}/si-en.en.test -t ${TRANSLATION_OUTPUT_DIR}/si-en.en.test.si -r ${reference_dir}/si-en.si.test --quiet)"
echo "$(date +'%Y-%m-%d %H:%M:%S') test_en-si_comet: en->si test COMET: $en2si"
echo "$(date +'%Y-%m-%d %H:%M:%S') test_en-si_comet: Calculating si->en test translation scores"
si2en="$(comet-score -s ${reference_dir}/si-en.si.test -t ${TRANSLATION_OUTPUT_DIR}/si-en.si.test.en -r ${reference_dir}/si-en.en.test --quiet)"
echo "$(date +'%Y-%m-%d %H:%M:%S') test_en-si_comet: si->en test COMET: $si2en"