-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathdefault.cfg
165 lines (130 loc) · 4.84 KB
/
default.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#When loading some cfg file in a 4lang module, unspecified parameters are assigned default values from this file
#Wherever possible, these values correspond to the most typical settings and test datasets distributed with 4lang
#Stanford Parser
[stanford]
#may in the future support using remote servers for parsing, leave it False for now
remote = False
#full path of Stanford Parser directory, to be set in env variable STANFORDPATH
dir = %(stanfordpath)s
#name of parser JAR file
parser = stanford-parser.jar
#name of model to load
model = englishRNN.ser.gz
#full path of jython executable, to be set in env variable JYTHONPATH
jython = %(jythonpath)s
[magyarlanc]
dir = %(magyarlancpath)s
jar = %(magyarlancpath)s/magyarlanc-2.0.jar
#miscellaneous data
[data]
#directory to save output of dependency parsing
deps_dir = %(fourlangpath)s/test/deps
#directory for temporary files
tmp_dir = %(fourlangpath)s/test/tmp
#dictionary data
[dict]
#input format
#possible values are: longman, collins, wiktionary, eksz, nszt
input_type = longman
#path to input file
input_file = %(fourlangpath)s/test/input/longman_test.xml
#path to JSON file containing parsed dictionary entries
output_file = %(fourlangpath)s/test/dict/longman_test.json
#text_to_4lang options
[text]
#path to input data
input_sens = %(fourlangpath)s/test/input/mrhug_story.sens
#set to True to perform expansion on graphs built from text
expand = False
#set True to print dot files for each sentence
print_graphs = True
#path to save dot files
graph_dir = %(fourlangpath)s/test/graphs/text
#if True, only dependency parsing will run and its output saved, but 4lang
#graphs won't be built. Useful when working with large datasets.
parse_only = False
#path to save output of parsers
deps_dir = %(fourlangpath)s/test/deps/text
#options to control which definitions are included by dict_to_4lang
[filter]
#include multiword expressions
keep_multiword = False
#include words with apostrophes
keep_apostrophes = False
#discard all but the first definition of each headword
first_only = True
[lemmatizer]
#full path of hunmorph binaries and models, to be set in env variable HUNTOOLSBINPATH
hunmorph_path = %(huntoolsbinpath)s
#path of cache (loaded but not updated by default, see docs)
cache_file = %(fourlangpath)s/data/hunmorph_cache.txt
#options related to 4lang graphs
[machine]
#file containing 4lang dictionary
definitions = 4lang
#extra data for 4lang, currently not in use
plurals = 4lang.plural
primitives = 4lang.primitive
#pickle file to load 4lang graphs from
definitions_binary = %(fourlangpath)s/data/machines/4lang.pickle
#pickle file to save 4lang graphs
definitions_binary_out = %(fourlangpath)s/test/machines/wikt_test.pickle
#pickle file to save expanded 4lang graphs
expanded_definitions = %(fourlangpath)s/test/machines/wikt_test_expanded.pickle
#path of directory for printing dot graphs
graph_dir = %(fourlangpath)s/test/graphs/wikt_test
[deps]
#path to the map from dependencies to 4lang edges
dep_map = %(fourlangpath)s/dep_to_4lang.txt
#language of the mapping (en or hu)
lang = en
#options for testing the word similarity module
[word_sim]
definitions_binary = %(fourlangpath)s/data/machines/longman_firsts.pickle
dep_map = %(fourlangpath)s/dep_to_4lang.txt
graph_dir = %(fourlangpath)s/data/graphs/sts
sim_types: fullgraph
out_dir: %(fourlangpath)s/test/output/
shortest_path_res: %(fourlangpath)s/test/output/dijstra_res.txt
calc_shortest_path: true
expand_path: False
batch = true
expand = False
[similarity]
word: True
compositional: False
#options for experimental sentence similarity system
[sim]
similarity_type = word_test
#word_test_data = %(fourlangpath)s/ws_data/wordsim_similarity_goldstandard.txt
word_test_data = %(fourlangpath)s/test/input/sim_data/SimLex-999.txt
graph_dir = %(fourlangpath)s/test/graphs/sts_test
deps_dir = %(fourlangpath)s/test/deps/sts_test
#options for experimental question answering system
[qa]
input_file = %(fourlangpath)s/test/input/clef_qa_sample.xml
output_file = %(fourlangpath)s/test/qa/clef_qa_sample.answers
graph_dir = %(fourlangpath)s/test/graphs/qa_test
deps_dir = %(fourlangpath)s/test/deps/qa_test
[demo]
tmp_root = %(fourlangpath)s/data/tmp/demo
[context]
stanford_output = %(fourlangpath)s/test/input/stanford_output_test.txt
raw_output = %(fourlangpath)s/test/context/test_small
context_file = %(fourlangpath)s/test/context/context_small
[fullgraph]
upper_exclude: true
freq_file: %(fourlangpath)s/test/input/freq/longman_tab_sep_freq.txt
# minimum required freq
freq_val: 0
# minimum number to exclude from the top
freq_count: 50
# NOTE: there is an AND relationship between freq_val and freq_count
# nodename_option:
# 0: all nodes are unique
# 1: all nodes are printnames
# 2: only: uppercase + 'lack', 'before', 'not', 'have' are unique
nodename_option: 1
weighted: False
color_based: False
embedding_path: %(fourlangpath)s/test/input/embedding/paragram_vectors_utf8.txt