forked from ixa-ehu/ixa-pipe-pos
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrainParams.properties
81 lines (65 loc) · 2.87 KB
/
trainParams.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Sample machine learning properties file
# Choose between PERCEPTRON or MAXENT
Algorithm=PERCEPTRON
Iterations=500
Cutoff=0
Threads=4
##################################################
#### Custom parameters added by ixa-pipe-pos ####
##################################################
# Component: choose between POS or Lemma
Component=Lemma
# Languages supported: de, en, es, eu, fr, gl, it, nl
Language=en
# TrainingCorpus:
TrainSet=/home/ragerri/experiments/lemma/eu-ud/eu-train.tsv
TestSet=/home/ragerri/experiments/lemma/eu-ud/eu-dev.tsv
# CorpusFormat: word\tabpos\tab\lemma
CorpusFormat=tabulated
# OutputModel: if commented out, ixa-pipe-pos will save the model with the
# name of this properties file
OutputModel=trainParams.bin
# Beamsize 1 amounts to greedy search
#BeamSize=3
##################
#### FEATURES ####
##################
#FeatureSet: use opennlp set of features or a more extended featureset
#including trigrams and other stuff. If commented out it defaults to baseline.
#Possible values are "Opennlp" or "Baseline".
FeatureSet=Baseline
# DictionaryFeatures: Provide path to a tag dictionary in opennlp format.
# This tag dictionary reduces the candidate pos tags for each entry to the tags
# present in the dictionary only. If commented out this feature is not used.
#DictionaryFeatures=/home/ragerri/javacode/ixa-pipe-pos/pos-resources/en/pos-dictionaries
# AutoDictFeatures: Provide a cutoff to automatically build a pos tag
# dictionary using the training data. The cutoff needs to be >= 1. If
# commented out this feature is not used.
AutoDictFeatures=1
# NgramDictFeatures: Provide a cutoff to automatically build a ngram
# dictionary using the training data. The cutoff needs to be >= 1. If commented
# out this feature is not used.
NgramDictFeatures=1
#####################################
#### CROSS VALIDATION PARAMETERS ####
#####################################
# Cross Validation Folds; if commented out it defaults to 10 cross validation
# folds.
Folds=5
# Evaluation type: choose between 'detailed' and 'error; only for cross-validation.
# It defaults to detailed evaluation.
EvaluationType=detailed