-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[任务]: 开发 b 站爬虫,爬取 ioclub 账号下的新评论,转发到 qq 群 #35
Comments
api 方案
根据我们的需求,第二个方案似乎更好 但是有些问题:
机器人对接方案
方法 3 应该是最好的 补充现有 qq 机器人仓库: https://github.com/io-club/IOGAS-QQ |
官方开放平台好像没有提供我们需要的评论 api
可以用作参考,不过没有创作中心 api,所以还是要自己扒下来 好像没有帮助,还没仔细看,不过没有看到评论字样 我还看到了一个: https://nemo2011.github.io/bilibili-api/#/, 但是好像连评论 api 都没有 |
这是你的真实 SESSIONDATA 吗,建议不要直接贴到这里,为了你自己的账号安全 获取评论的接口感觉可以了,但是现在只是一个脚本,完全无法和 qq 机器人对接
另外 @kksk-code
|
#成品代码
import requests
from datetime import datetime
class BilibiliCommentScraper:
def __init__(self, cookies=None, headers=None):
self.cookies = cookies or {
'SESSDATA': 'yours'
}
self.headers = headers or {
'User-Agent': 'yours',
'Content-Type': 'application/json',
}
def convert_timestamp_to_readable(self, ctime):
"""将时间戳转换为易懂的日期时间格式"""#方便排序
timestamp = datetime.fromtimestamp(ctime)
return timestamp.strftime('%Y-%m-%d %H:%M:%S')
def get_oid_from_bv(self, bv_id):
"""获取视频的 OID (aid)"""
api_url = f"https://api.bilibili.com/x/web-interface/view?bvid={bv_id}"
response = requests.get(api_url, headers=self.headers, cookies=self.cookies)
if response.status_code == 200:
data = response.json()
if data['code'] == 0:
return data['data']['aid']
return None
def get_comments(self, oid, page=1, ps=20):
"""获取视频的评论数据"""
url = "https://api.bilibili.com/x/v2/reply/main"
params = {
'type': 1,
'oid': oid,
'next': page,
'ps': ps,
}
response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params)
if response.status_code == 200:
data = response.json()
if 'data' in data and 'replies' in data['data']:
return data['data']
return {}
def get_replies(self, reply_id, oid):
"""获取评论的回复"""
url = "https://api.bilibili.com/x/v2/reply/reply"
params = {
'oid': oid,
'type': 1,
'rpid': reply_id
}#mode无影响,故删
response = requests.get(url, headers=self.headers, cookies=self.cookies, params=params)
if response.status_code == 200:
data = response.json()
if 'data' in data and 'replies' in data['data']:
return data['data']['replies']
return []
def print_comments_and_replies(self, bv_id, data):
"""打印评论及其回复,并按照时间排序"""
print(f"\n视频 BV: {bv_id} 的评论:")
all_comments = []
# 获取置顶评论并加入列表
if 'top_replies' in data:
top_replies = data['top_replies']
for top_reply in top_replies:
name = top_reply['member']['uname']
sex = top_reply['member']['sex']
content = top_reply['content']['message']
like = top_reply['like']
ctime = top_reply['ctime']
readable_time = self.convert_timestamp_to_readable(ctime)
all_comments.append({'name': name, 'sex': sex, 'content': content, 'like': like, 'ctime': ctime, 'readable_time': readable_time, 'type': 'top', 'reply': None})
# 获取置顶评论的回复
if 'replies' in top_reply:
for reply in top_reply['replies']:
reply_name = reply['member']['uname']
reply_content = reply['content']['message']
reply_like = reply['like']
reply_ctime = reply['ctime']
readable_reply_time = self.convert_timestamp_to_readable(reply_ctime)
all_comments.append({'name': reply_name, 'sex': 'unknown', 'content': reply_content, 'like': reply_like, 'ctime': reply_ctime, 'readable_time': readable_reply_time, 'type': 'reply', 'reply': True})
# 获取普通评论并加入列表
for comment in data['replies']:
name = comment['member']['uname']
sex = comment['member']['sex']
content = comment['content']['message']
like = comment['like']
ctime = comment['ctime']
readable_time = self.convert_timestamp_to_readable(ctime)
all_comments.append({'name': name, 'sex': sex, 'content': content, 'like': like, 'ctime': ctime, 'readable_time': readable_time, 'type': 'normal', 'reply': None})
# 获取评论的回复
if 'replies' in comment:
for reply in comment['replies']:
reply_name = reply['member']['uname']
reply_content = reply['content']['message']
reply_like = reply['like']
reply_ctime = reply['ctime']
readable_reply_time = self.convert_timestamp_to_readable(reply_ctime)
all_comments.append({'name': reply_name, 'sex': 'unknown', 'content': reply_content, 'like': reply_like, 'ctime': reply_ctime, 'readable_time': readable_reply_time, 'type': 'reply', 'reply': True})
# 按照时间戳排序评论
all_comments.sort(key=lambda x: x['ctime'])
# 输出排序后的评论
for comment in all_comments:
if comment['type'] == 'top':
print(f"置顶评论: 昵称: {comment['name']}, 性别: {comment['sex']}, 评论: {comment['content']}, 点赞: {comment['like']}, 时间: {comment['readable_time']}")
elif comment['type'] == 'normal':
print(f"普通评论: 昵称: {comment['name']}, 性别: {comment['sex']}, 评论: {comment['content']}, 点赞: {comment['like']}, 时间: {comment['readable_time']}")
elif comment['type'] == 'reply':
print(f" 回复: {comment['name']}, 内容: {comment['content']}, 点赞: {comment['like']}, 时间: {comment['readable_time']}")
def get_multiple_videos_comments(self, bv_ids, total_pages=1):
"""获取多个视频的评论"""
for bv_id in bv_ids:
print(f"\n正在获取视频 {bv_id} 的评论...")
oid = self.get_oid_from_bv(bv_id)
if oid:
print(f"视频的 OID: {oid}")
for page in range(1, total_pages + 1):
print(f"正在获取第 {page} 页评论...")
data = self.get_comments(oid, page)
if data:
self.print_comments_and_replies(bv_id, data)
else:
print(f"第 {page} 页没有评论或请求失败")
else:
print(f"无法获取视频 {bv_id} 的 OID")
# 主函数
def main():
# 用户输入 BV 号
bv_ids = input('请输入一个视频的 BV 号: ').split(',')
bv_ids = [bv_id.strip() for bv_id in bv_ids]
# 创建 BilibiliCommentScraper 实例并获取评论
scraper = BilibiliCommentScraper()
scraper.get_multiple_videos_comments(bv_ids)
if __name__ == "__main__":
main()
#实现了对视频评论的爬取,并按照时间排序输出(先出现的再上面,后出现的在下面) #实现了对视频评论的爬取,并按照时间排序输出(先出现的再上面,后出现的在下面) |
# 获取评论数据
def get_comments(oid, page=1, ps=20):
url = "https://api.bilibili.com/x/v2/reply/main"
params = {
...
'mode': 3 # 按热度排序
} 请问关于 mode 参数的文档在哪里?我在 bilibili-API-collect 里没有找到 |
我认为你的代码中每次分页都会有所有的置顶评论属于预期行为:
所以建议只有第一次打印置顶评论 |
一些测试的代码与代码各个部分在这个仓库中 |
如何认领
请直接在下方回复,我会把 issue 的 assignees 设置为你
认领者需要遵守以下要求
注意事项
需求描述
我们需要开发一个机器人同步 b 站评论信息,原因如下:
实现方式
文档资料
难度
简单 (什么都不会也能学习做)
能力要求
关闭 Issue 前请确认以下内容
The text was updated successfully, but these errors were encountered: