-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcard_analyze.py
159 lines (142 loc) · 4.87 KB
/
card_analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python
# coding: utf-8
# @NightWish in Dec.29.2014
import logbook
from pymongo import MongoClient
from redis import StrictRedis
m_db = MongoClient(host='127.0.0.1', port=27017)['12306']
r_db = StrictRedis(host='127.0.0.1', port=6379, db=0)
def analysis(doc):
'''
analysis the user info:
birth position
birth time
man or famale
mail
'''
# analysis the ID card
# get the use-ful info
card = doc['card']
if len(card) is 18:
position_num = [card[:2], card[:4], card[:6]]
year, month = card[6:10], card[10:12]
gender = card[16]
elif len(card) is 15:
position_num = [card[:2], card[:4], card[:6]]
year, month = card[6:8], card[8:10]
gender = card[14]
# convert to use-ful info
try:
position_code = [int('{:0<6}'.format(item))
for item in position_num]
year, month = int(year), int(month)
gender = int(gender) % 2
except StandardError, error_info:
logbook.error('error:{}, card:{}'.format(error_info, card))
return
# prepare doc
key_list = []
key_list.extend(position_code)
key_list.append('year:{}'.format(year))
key_list.append('month:{}'.format(month))
key_list.append('female') \
if gender is 0 \
else key_list.append('male')
msetnx_dict = {item: 0 for item in key_list}
# count
r_db.msetnx(msetnx_dict)
incr = lambda key: r_db.incr(key)
map(incr, key_list)
def save_position(level):
# dump the all province data into mongo
# get all position key
if level is 'province':
positions = r_db.keys('??0000')
elif level is 'city':
positions = r_db.keys('????00')
# produce every position
for position in positions:
# get the count of position
count = r_db.get(position)
# get the position info
position_info = m_db.position.find_one({'_id': int(position)})
if not position_info:
position_info = {
'_id': int(position), 'position': '未知'.decode('utf8')}
# create the mongo doc
position_info.update({'count': int(count)})
# save the doc
# clean the data
if level is 'province':
ret = m_db.card.province.save(position_info)
r_db.move(position, 2)
elif level is 'city':
parent_pos = int(position) / 10000 * 10000
parent_info = m_db.position.find_one({'_id': parent_pos})
position_info['position'] = '{parent} {self}'.format(
parent=parent_info['position'].encode('utf8'),
self=position_info['position'].encode('utf8'),)
ret = m_db.card.city.save(position_info)
r_db.move(position, 3)
logbook.info(ret)
def save_birth(level):
# select the all birth info
if level is 'year':
birth = r_db.keys('year:*')
elif level is 'month':
birth = r_db.keys('month:*')
# save the birth data
for item in birth:
# get the count of birth
count = int(r_db.get(item))
# save the data
_id = int(item.split(':')[1])
if level is 'year':
m_db.birth.year.save({'_id': _id, 'count': count})
elif level is 'month':
m_db.birth.month.save({'_id': _id, 'count': count})
r_db.delete(item)
def save_gender():
male_count = r_db.get('male')
ret = m_db.gender.save({'_id': 'male', 'count': male_count})
logbook.info(ret)
female_count = r_db.get('female') or 0
ret = m_db.gender.save({'_id': 'female', 'count': female_count})
logbook.info(ret)
r_db.delete('male')
r_db.delete('female')
def save_county():
while r_db.randomkey:
key = r_db.randomkey()
if not key.isdigit():
logbook.warn(key)
continue
count = int(r_db.get(key))
parent_id = int(key) / 100 * 100
parent = m_db.card.city.find_one({'_id': parent_id})
if not parent:
parent = {
'_id': int(parent_id), 'position': '未知'.decode('utf8')}
self_doc = m_db.position.find_one({'_id': key})
if not self_doc:
self_doc = {
'_id': int(key), 'position': '未知'.decode('utf8')}
self_doc['count'] = count
self_doc['position'] = '{parent} {self}'.format(
parent=parent['position'].encode('utf8'),
self=self_doc['position'].encode('utf8'),)
ret = m_db.card.county.save(self_doc)
logbook.info(ret)
r_db.delete(key)
if __name__ == '__main__':
cursor = m_db.user.find()
map(analysis, cursor)
logbook.info('start process province')
save_position('province')
logbook.info('start process city')
save_position('city')
save_birth('month')
save_birth('year')
save_gender()
logbook.info('start process county')
save_county()