diff --git a/page_parse/user/person.py b/page_parse/user/person.py index b03f5bda..6bf36acd 100755 --- a/page_parse/user/person.py +++ b/page_parse/user/person.py @@ -92,21 +92,21 @@ def get_detail(html, uid): user.description = description.encode('gbk', 'ignore').decode('gbk') elif '注册时间:' in each_str: user.register_time = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\t', '').replace( - '\r\n', '') + '\r\n', '').replace(' ', '') if '标签信息' in basic_str: basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'}) for each in basic_info: if '标签:' in each.get_text(): user.tags = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\t', '').replace( - '\n\n\n', '') .strip().replace('\r\n', ';') + '\n\n\n', '') .strip().replace('\r\n', ';').replace(' ', '') if '教育信息' in basic_str: basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'}) for each in basic_info: if '大学:' in each.get_text(): user.education_info = each.find(attrs={'class': 'pt_detail'}).get_text().replace('\r\n', ',') \ - .replace('\t', '').replace('\n', ';').lstrip(';').rstrip(';') + .replace('\t', '').replace('\n', ';').lstrip(';').rstrip(';').replace(' ', '') if '工作信息' in basic_str: basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'}) @@ -116,7 +116,7 @@ def get_detail(html, uid): jobs = each.find_all(attrs={'class': 'pt_detail'}) for job in jobs: jobs_info.append(job.get_text().replace('\r\n', '').replace('\t', '').replace('\n', '')) - user.work_info = ';'.join(jobs_info) + user.work_info = ';'.join(jobs_info).replace(' ', '') if '联系信息' in basic_str: basic_info = each_module.find_all(attrs={'class': 'li_1 clearfix'}) @@ -129,7 +129,7 @@ def get_detail(html, uid): contact_info.append('email:' + each.find(attrs={'class': 'pt_detail'}).get_text()) if 'MSN:' in each.get_text(): contact_info.append('msn:' + each.find(attrs={'class': 'pt_detail'}).get_text()) - user.contact_info = ';'.join(contact_info) + user.contact_info = ';'.join(contact_info).replace(' ', '') except Exception as why: print('解析出错,具体原因为{why}'.format(why=why))