Skip to content

Commit

Permalink
Merge pull request #2 from jiangwei1995910/ziroom
Browse files Browse the repository at this point in the history
自如爬虫
  • Loading branch information
Jinnrry authored Feb 22, 2019
2 parents 4c1e293 + d0fa27f commit 37c26f8
Show file tree
Hide file tree
Showing 10 changed files with 414 additions and 34 deletions.
188 changes: 187 additions & 1 deletion SQL.sql
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,190 @@ CREATE TABLE `lianjia_transaction` (
`url` varchar(500) COLLATE utf8_bin DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `ljID` (`ljID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;



CREATE TABLE `ziroom` (
`id` int(11) NOT NULL,
`price` int(10) DEFAULT NULL,
`url` varchar(255) COLLATE utf8_bin DEFAULT NULL,
`iswhole` tinyint(1) DEFAULT NULL,
`ctime` datetime DEFAULT CURRENT_TIMESTAMP,
`area` varchar(10) DEFAULT NULL,
`bedroom` varchar(2) COLLATE utf8_bin DEFAULT NULL,
`parlor` varchar(2) COLLATE utf8_bin DEFAULT NULL,
`district_name` varchar(15) COLLATE utf8_bin DEFAULT NULL,
`bizcircle_name` varchar(15) COLLATE utf8_bin DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin;


--
-- 视图结构 `1-3年城市薪资`
--
DROP TABLE IF EXISTS `1-3年城市薪资`;

CREATE VIEW `1-3年城市薪资` AS select avg(`item`.`avgsalary`) AS `AVG(avgsalary)`,std(`item`.`avgsalary`) AS `STD(avgsalary)`,`item`.`city` AS `city`,count(1) AS `num` from `item` where (`item`.`workingexp` = '1-3年') group by `item`.`city` having (`num` > 300) order by `AVG(avgsalary)` desc ;

-- --------------------------------------------------------

--
-- 视图结构 `3-5年薪资`
--
DROP TABLE IF EXISTS `3-5年薪资`;

CREATE VIEW `3-5年薪资` AS select avg(`item`.`avgsalary`) AS `AVG(avgsalary)`,std(`item`.`avgsalary`) AS `STD(avgsalary)`,`item`.`city` AS `city`,count(1) AS `num` from `item` where (`item`.`workingexp` = '3-5年') group by `item`.`city` having (`num` > 300) order by `AVG(avgsalary)` desc ;

-- --------------------------------------------------------

--
-- 视图结构 `2015年数据`
--
DROP TABLE IF EXISTS `2015年数据`;

CREATE VIEW `2015年数据` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2` from `lianjia_transaction` where ((`lianjia_transaction`.`address1` = '成都') and (`lianjia_transaction`.`transactiondate` < '2016-01-01') and (`lianjia_transaction`.`transactiondate` > '2015-01-01')) group by `lianjia_transaction`.`address2` ;

-- --------------------------------------------------------

--
-- 视图结构 `2016年数据`
--
DROP TABLE IF EXISTS `2016年数据`;

CREATE VIEW `2016年数据` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2` from `lianjia_transaction` where ((`lianjia_transaction`.`address1` = '成都') and (`lianjia_transaction`.`transactiondate` < '2017-01-01') and (`lianjia_transaction`.`transactiondate` > '2016-01-01')) group by `lianjia_transaction`.`address2` ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年数据`
--
DROP TABLE IF EXISTS `2017年数据`;

CREATE VIEW `2017年数据` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2` from `lianjia_transaction` where ((`lianjia_transaction`.`address1` = '成都') and (`lianjia_transaction`.`transactiondate` < '2018-01-01') and (`lianjia_transaction`.`transactiondate` > '2017-01-01')) group by `lianjia_transaction`.`address2` ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年起价格走势`
--
DROP TABLE IF EXISTS `2017年起价格走势`;

CREATE VIEW `2017年起价格走势` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2`,count(1) AS `COUNT(1)`,date_format(`lianjia_transaction`.`transactiondate`,'%Y%m') AS `tdate` from `lianjia_transaction` where (`lianjia_transaction`.`transactiondate` > '2017-01-01') group by `tdate`,`lianjia_transaction`.`address2` order by `lianjia_transaction`.`address2`,`tdate` ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年起全国价格走势`
--
DROP TABLE IF EXISTS `2017年起全国价格走势`;

CREATE VIEW `2017年起全国价格走势` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,count(1) AS `COUNT(1)`,date_format(`lianjia_transaction`.`transactiondate`,'%Y%m') AS `tdate` from `lianjia_transaction` where (`lianjia_transaction`.`transactiondate` > '2017-01-01') group by `tdate` order by `tdate` ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年起北京价格走势`
--
DROP TABLE IF EXISTS `2017年起北京价格走势`;

CREATE VIEW `2017年起北京价格走势` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2`,count(1) AS `COUNT(1)`,date_format(`lianjia_transaction`.`transactiondate`,'%Y%m') AS `tdate` from `lianjia_transaction` where ((`lianjia_transaction`.`transactiondate` > '2017-01-01') and (`lianjia_transaction`.`address1` = '北京')) group by `tdate`,`lianjia_transaction`.`address2` order by `lianjia_transaction`.`address2`,`tdate` ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年起各城市房屋均价`
--
DROP TABLE IF EXISTS `2017年起各城市房屋均价`;

CREATE VIEW `2017年起各城市房屋均价` AS select avg(`lianjia_transaction`.`avgPrice`) AS `AVG(avgPrice)`,avg(`lianjia_transaction`.`price`) AS `AVG(price)`,`lianjia_transaction`.`address1` AS `address1` from `lianjia_transaction` where (`lianjia_transaction`.`transactiondate` > '2017-01-01') group by `lianjia_transaction`.`address1` order by `AVG(price)` desc ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年起成都价格走势`
--
DROP TABLE IF EXISTS `2017年起成都价格走势`;

CREATE VIEW `2017年起成都价格走势` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2`,count(1) AS `COUNT(1)`,date_format(`lianjia_transaction`.`transactiondate`,'%Y%m') AS `tdate` from `lianjia_transaction` where ((`lianjia_transaction`.`transactiondate` > '2017-01-01') and (`lianjia_transaction`.`address1` = '成都')) group by `tdate`,`lianjia_transaction`.`address2` order by `lianjia_transaction`.`address2`,`tdate` ;

-- --------------------------------------------------------

--
-- 视图结构 `2017年起深圳价格走势`
--
DROP TABLE IF EXISTS `2017年起深圳价格走势`;

CREATE VIEW `2017年起深圳价格走势` AS select avg(`lianjia_transaction`.`avgPrice`) AS `avg(``avgPrice``)`,std(`lianjia_transaction`.`avgPrice`) AS `std(``avgPrice``)`,avg(`lianjia_transaction`.`price`) AS `avg(``price``)`,std(`lianjia_transaction`.`price`) AS `std(``price``)`,`lianjia_transaction`.`address2` AS `address2`,count(1) AS `COUNT(1)`,date_format(`lianjia_transaction`.`transactiondate`,'%Y%m') AS `tdate` from `lianjia_transaction` where ((`lianjia_transaction`.`transactiondate` > '2017-01-01') and (`lianjia_transaction`.`address1` = '深圳')) group by `tdate`,`lianjia_transaction`.`address2` order by `lianjia_transaction`.`address2`,`tdate` ;

-- --------------------------------------------------------

--
-- 视图结构 `20180101开始成都数据`
--
DROP TABLE IF EXISTS `20180101开始成都数据`;

CREATE VIEW `20180101开始成都数据` AS select avg(`lianjia_transaction`.`avgPrice`) AS `AVG(avgPrice)`,std(`lianjia_transaction`.`avgPrice`) AS `std(avgPrice)`,avg(`lianjia_transaction`.`price`) AS `AVG(price)`,std(`lianjia_transaction`.`price`) AS `STD(price)`,`lianjia_transaction`.`address2` AS `address2` from `lianjia_transaction` where ((`lianjia_transaction`.`address1` = '成都') and (`lianjia_transaction`.`transactiondate` > '2018-01-01')) group by `lianjia_transaction`.`address2` ;

-- --------------------------------------------------------

--
-- 视图结构 `citySarly`
--
DROP TABLE IF EXISTS `citySarly`;

CREATE VIEW `citySarly` AS select avg(`item`.`avgsalary`) AS `avgsalarys`,std(`item`.`avgsalary`) AS `STD(avgsalary)`,`item`.`city` AS `city`,count(1) AS `counts` from `item` where (`item`.`avgsalary` <> 0) group by `item`.`city` having (`counts` > 300) order by `avgsalarys` desc ;

-- --------------------------------------------------------

--
-- 视图结构 `城市薪资`
--
DROP TABLE IF EXISTS `城市薪资`;

CREATE VIEW `城市薪资` AS select avg(`item`.`avgsalary`) AS `AVG(avgsalary)`,std(`item`.`avgsalary`) AS `STD(avgsalary)`,`item`.`city` AS `city`,count(1) AS `num` from `item` group by `item`.`city` having (`num` > 300) order by `AVG(avgsalary)` desc ;

-- --------------------------------------------------------

--
-- 视图结构 `按月统计薪资`
--
DROP TABLE IF EXISTS `按月统计薪资`;

CREATE VIEW `按月统计薪资` AS select avg(`a`.`avgsalary`) AS `AVG(avgsalary)`,count(1) AS `COUNT(1)`,`a`.`DATE` AS `DATE` from (select `item`.`id` AS `id`,`item`.`zlid` AS `zlid`,`item`.`zqtime` AS `zqtime`,`item`.`score` AS `score`,`item`.`workingexp` AS `workingexp`,`item`.`companyname` AS `companyname`,`item`.`companysize` AS `companysize`,`item`.`companytype` AS `companytype`,`item`.`jobtype` AS `jobtype`,`item`.`createdate` AS `createdate`,`item`.`jobname` AS `jobname`,`item`.`enddate` AS `enddate`,`item`.`edulevel` AS `edulevel`,`item`.`city` AS `city`,`item`.`salary` AS `salary`,`item`.`avgsalary` AS `avgsalary`,`item`.`keyword` AS `keyword`,`item`.`industry` AS `industry`,date_format(`item`.`createdate`,'%Y%m') AS `DATE` from `item` where (`item`.`avgsalary` <> 0)) `a` group by `a`.`DATE` ;

-- --------------------------------------------------------

--
-- 视图结构 `生活压力`
--
DROP TABLE IF EXISTS `生活压力`;

CREATE VIEW `生活压力` AS select `a`.`avgsalarys` AS `avgsalarys`,`a`.`STD(avgsalary)` AS `STD(avgsalary)`,`a`.`city` AS `city`,`a`.`counts` AS `counts`,`b`.`AVG(avgPrice)` AS `AVG(avgPrice)`,`b`.`AVG(price)` AS `AVG(price)`,`b`.`address1` AS `address1`,(`a`.`avgsalarys` / `b`.`AVG(avgPrice)`) AS `gfyl` from (`citySarly` `a` left join `2017年起各城市房屋均价` `b` on((`a`.`city` = `b`.`address1`))) order by (`a`.`avgsalarys` / `b`.`AVG(avgPrice)`) desc ;

-- --------------------------------------------------------

--
-- 视图结构 `近12月城市房价`
--
DROP TABLE IF EXISTS `近12月城市房价`;

CREATE VIEW `近12月城市房价` AS select avg(`lianjia_transaction`.`avgPrice`) AS `AVG(avgPrice)`,avg(`lianjia_transaction`.`price`) AS `AVG(price)`,`lianjia_transaction`.`address1` AS `address1` from `lianjia_transaction` where (`lianjia_transaction`.`transactiondate` between (now() - interval 12 month) and now()) group by `lianjia_transaction`.`address1` order by `AVG(price)` desc ;

-- --------------------------------------------------------

--
-- 视图结构 `近12月生活压力`
--
DROP TABLE IF EXISTS `近12月生活压力`;

CREATE VIEW `近12月生活压力` AS select `a`.`avgsalarys` AS `avgsalarys`,`a`.`STD(avgsalary)` AS `STD(avgsalary)`,`a`.`city` AS `city`,`a`.`counts` AS `counts`,`b`.`AVG(avgPrice)` AS `AVG(avgPrice)`,`b`.`AVG(price)` AS `AVG(price)`,`b`.`address1` AS `address1`,(`a`.`avgsalarys` / `b`.`AVG(avgPrice)`) AS `gfyl` from (`近12月薪资` `a` left join `近12月城市房价` `b` on((`a`.`city` = `b`.`address1`))) order by (`a`.`avgsalarys` / `b`.`AVG(avgPrice)`) desc ;

-- --------------------------------------------------------

--
-- 视图结构 `近12月薪资`
--
DROP TABLE IF EXISTS `近12月薪资`;

CREATE VIEW `近12月薪资` AS select avg(`item`.`avgsalary`) AS `avgsalarys`,std(`item`.`avgsalary`) AS `STD(avgsalary)`,`item`.`city` AS `city`,count(1) AS `counts` from `item` where ((`item`.`avgsalary` <> 0) and (`item`.`createdate` between (now() - interval 12 month) and now())) group by `item`.`city` having (`counts` > 300) order by `avgsalarys` desc ;
14 changes: 7 additions & 7 deletions backup.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import pymysql
import datetime

from config import DBInfo

# 打开数据库连接
db = pymysql.connect("localhost","root","78667602" ,"zhaopin")
db = pymysql.connect(DBInfo.dbhost, DBInfo.user, DBInfo.pwd, DBInfo.db)

# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()

today=datetime.date.today()
today = datetime.date.today()
# 使用 execute() 方法执行 SQL 查询
cursor.execute("CREATE TABLE `近12月生活压力_"+str(today)+"` AS SELECT * FROM `近12月生活压力` WHERE gfyl is NOT null;")


cursor.execute("CREATE TABLE `近12月生活压力_" + str(today) + "` AS SELECT * FROM `近12月生活压力` WHERE gfyl is NOT null;")

# 关闭数据库连接
db.close()
24 changes: 14 additions & 10 deletions config/DBInfo.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
#coding:utf-8
# coding:utf-8
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

dbhost = "127.0.0.1"
port = '3306'
db = 'zhaopin'
pwd = '78667602'
user = 'root'


#数据库设置
# 数据库设置
engine = create_engine(
"mysql+pymysql://root:[email protected]:3306/zhaopin?charset=utf8",
max_overflow=20, # 超过连接池大小外最多创建的连接
pool_size=10, # 连接池大小
pool_timeout=30, # 池中没有线程最多等待的时间,否则报错
pool_recycle=-1 # 多久之后对线程池中的线程进行一次连接的回收(重置)
)
"mysql+pymysql://" + user + ":" + pwd + "@" + dbhost + ":" + port + "/" + db + "?charset=utf8",
max_overflow=20, # 超过连接池大小外最多创建的连接
pool_size=10, # 连接池大小
pool_timeout=30, # 池中没有线程最多等待的时间,否则报错
pool_recycle=-1 # 多久之后对线程池中的线程进行一次连接的回收(重置)
)

SessionFactory = sessionmaker(bind=engine)
SessionFactory = sessionmaker(bind=engine)
9 changes: 7 additions & 2 deletions crontab
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# m h dom mon dow command
50 3 * * 2 /home/jiangwei1995910/lianjia-beike-spider/run.sh
0 18 * * * /home/jiangwei1995910/getAwayBSG/run.sh
# 启动链家爬虫
0 16 * * * /home/jiangwei1995910/getAwayBSG/run.sh
# 启动智联爬虫
0 0 * * * /usr/bin/python3 /home/jiangwei1995910/getAwayBSG/main.py
# 每个月11号备份视图数据
* * 11 * * /usr/bin/python3 /home/jiangwei1995910/getAwayBSG/backup.py
# 每天早上8点和中午13点汇报服务器状态
0 8,13 * * * /usr/bin/python3 /home/jiangwei1995910/getAwayBSG/reportIP.py
# 自如爬虫
0 8 * * * /usr/bin/python3 /home/jiangwei1995910/getAwayBSG/spider/ziroom.py
44 changes: 44 additions & 0 deletions db/ziroom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#coding:utf-8
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, String, Integer, Float, DateTime


# 创建对象的基类:
Base = declarative_base()

class Ziroom(Base):
# 表的名字:
__tablename__ = 'ziroom'

# 表的结构:
id = Column(Integer, primary_key=True)
price = Column(Integer())
url = Column(String(255))
iswhole = Column(Integer())
area = Column(Float())
bedroom = Column(String(2))
parlor = Column(String(2))
district_name = Column(String(15))
bizcircle_name = Column(String(15))


def __init__(self,data):
for key in data.keys():
if key == 'id':
self.id=data[key]
if key == 'price':
self.price=data[key]
if key == 'url':
self.url=data[key]
if key == 'iswhole':
self.iswhole=data[key]
if key == 'area':
self.area=data[key]
if key == 'bedroom':
self.bedroom=data[key]
if key == 'parlor':
self.parlor=data[key]
if key == 'district_name':
self.district_name=data[key]
if key == 'bizcircle_name':
self.bizcircle_name=data[key]
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
requests
SQLAlchemy
scrapy
pymysql
pymysql
pyquery
13 changes: 8 additions & 5 deletions spider/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,24 @@


def get(url):
sleep(random.randint(0,5))
header = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
}
# sleep(random.randint(0,5))
try:
return requests.get(url,timeout=10)
return requests.get(url, timeout=10, headers=header)
except:
return requests.get(url, timeout=10)
return requests.get(url, timeout=10, headers=header)


# 智联招聘取列表的接口
def getList(cityid, kw, start, length):
kw=quote(kw, 'utf-8')
kw = quote(kw, 'utf-8')

url = 'https://fe-api.zhaopin.com/c/i/sou?start=' + str(start) + 'pageSize=' + str(length) + '&cityId=' + str(
cityid) + '&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&sortType=publish&kw=' + str(
kw) + '&kt=3&_v=0.17996222&x-zp-page-request-id=e8d2c03d3c4347a9b5edffa03367d90d-1547646999572-254944'
try:
return json.loads(get(url).text)
except:
return {'code':0}
return {'code': 0}
Loading

0 comments on commit 37c26f8

Please sign in to comment.