This repository has been archived by the owner on Oct 17, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.py
103 lines (91 loc) · 3.09 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
BASE_DIR = '/home/rhodochrosited/'
PREPROCESS = {
'vocab_file': os.path.join(BASE_DIR, 'data/vocab-9171.txt'),
'use_morphs': True,
'max_seq_length-search': 384,
'max_query_length-search': 64,
'max_query_length-similarity': 25, # = seq_length
'keywords_tags': ['NNG', 'NNP', 'NNB', 'NNBC', 'MAG', 'VV', 'VA', 'VCP', 'VCN', 'SL', 'SN'],
'clean_tags': ['JK', 'JX', 'JC']
}
BERT = { # 새로운 TENSOR SERVING 모델을 만들 때 사용
'model_path-search': os.path.join(BASE_DIR, 'hdd2/FINAL_SQUAD/model.ckpt-12408'),
'model_path-similarity': os.path.join(BASE_DIR, 'hdd2/FINAL_PRETRAIN/model.ckpt-990000'),
'bert_json': os.path.join(BASE_DIR, 'squad_train_model/bert_config.json'),
'similarity_layer': -1,
# ELMO LIKE FEATURE VECTOR LAYERS 여러레이어를 더하거나, -2, -3... 하위 레이어 만을 사용 해보는 방법들 시도해보기
'version-similarity': 5,
# 1: 128 seq length 75000 step
# 2: 512 seq length 990000 step
# 3: 2 + similarity layer -2
# 4: -12 layer(단어임베딩)
'version-search': 2,
# 1: F1 score 71
# 2: F1 score 83.6 + train+dev -> (92)
'version-sentiment': 3,
# 2: predict - 1.0/ 0.0
# 3: predict - 0.0~1.0 (중립추가 하기위해)
'max_seq_length-search': 384,
'max_seq_length-similarity': 25,
'MODEL_DIR': os.path.join(BASE_DIR, 'hdd2/tensor_serving_models')
}
TENSOR_SERVING = {
'url-search': 'http://10.140.0.8:8501/v1/models/search:predict',
'url-similarity': 'http://10.140.0.8:8502/v1/models/similarity:predict',
'url-sentiment': 'http://10.140.0.8:8503/v1/models/sentiment:predict',
'url-search-v': 'http://10.140.0.8:8501/v1/models/search',
'url-similarity-v': 'http://10.140.0.8:8502/v1/models/similarity',
'url-sentiment-v': 'http://10.140.0.8:8503/v1/models/sentiment'
}
HANDLER = {
'DUMMY': None
}
QUESTION = {
'categories': ['shuttle_bus', 'food', 'talk', 'search', 'book', 'prepared', 'test'],
'tfidf_token_pattern': r'(?u)\b[가-힣]+\b'
}
SEARCH = {
'sublinear_tf': True,
'max_context_num': 3 # search에서 tf idf 로 최대한 찾을 문단의 수
}
ANALYSIS = {
# T-SNE
'perplexity': 30.0, # 5~50 권장
'learning_rate': 200.0, # 10~1000
'n_iter': 1000, # 최소 250
'metric': 'cosine', # distance metric ( x_i <-> x_j )
'method': 'barnes_hut', # 속도개선
'n_components': 2, # y 차원
'categories': QUESTION['categories']
}
QUERY = {
'distance': 'cosine',
'jaccard_threshold': 0.7,
'search_threshold': 15,
'idf_weight': 0.1,
'cosine_threshold': 0.87,
}
MONGODB = {
'ip': 'localhost',
'port': 27017,
'db_name': 'chatbot',
'col_questions': 'questions',
'col_queries': 'queries',
'col_contexts': 'contexts',
'col_julimmal': 'julimmal',
'col_stopword': 'stopword',
'username': "mhlee",
'password': "mhlee"
}
FLASK = {
'host': '0.0.0.0',
'port': 8000,
'desc': None,
'version': '0.3',
'title': 'Willson-Hanyang_Chatbot',
'debug': False
}
if __name__ == '__main__':
print(BASE_DIR)
print(PREPROCESS)