-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjtt_result_worker.py
82 lines (61 loc) · 2.23 KB
/
jtt_result_worker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import sqlite3
from pyspider.result import ResultWorker
from time import time
from threading import Lock
sql_lock = Lock()
filed_list = ['url', 'article_class', 'article_content', 'article_num', 'article_year', 'date', 'form', 'index_num',
'info_class', 'mechanism', 'name', 'type']
class JTTResultWorker(ResultWorker):
res_db_conn = sqlite3.connect('./zw.db')
jtt_cur = res_db_conn.cursor()
# last_commit_time = time()
def on_result(self, task, result):
# assert task['taskid']
# assert task['project']
# assert task['url']
# assert result
#
# for filed in filed_list:
# assert result[filed]
sql_command_template = """
insert into dt_article ({})
values ({});
"""
print('\n'*2)
print("-"*20)
print('')
print("程序获取到数据:")
for filed in filed_list:
if filed in ['article_content']:
print('\n')
print(filed + ':\n', result[filed][:100] + "...")
print('\n')
continue
try:
print(filed + ':', result[filed])
except Exception as e:
print(e)
print("-"*20)
print('\n'*3)
# with sql_lock:
current_sql_command = ''
try:
for filed in filed_list:
if result[filed] is not None:
result[filed] = result[filed].replace('"', '”').replace("'", '”')
current_sql_command = sql_command_template.format(
','.join(['`' + filed + '`' for filed in filed_list]),
','.join(["'" + result[filed] + "'" if result[filed] is not None else 'null' for filed in filed_list])
)
except Exception as e:
print(e)
print("error")
# print(current_sql_command)
if not current_sql_command == '':
sql_msg = self.jtt_cur.execute(current_sql_command)
# for msg in sql_msg:
# print("sql return msg: ", msg)
# if time() - self.last_commit_time > 5:
self.res_db_conn.commit()
# self.last_commit_time = time()
# your processing code goes here