-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrape.py
41 lines (33 loc) · 1.02 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import re
from github import Github
from github import Auth
AUTH_TOKEN = Auth.Token(os.environ["GITHUB_TOKEN"])
REGEX = r"^<!-- (.*) -->[\s\S]*<!-- \1 -->"
def regex_replace(repo):
return rf"^(<!-- {repo} -->)([\s\S]*)^(<!-- {repo} -->)"
def issue_text(issue):
return f"""* [{issue.title}]({issue.html_url})"""
g = Github(auth=AUTH_TOKEN)
# Extract repos
repos = []
with open("README.md", "r") as f:
text = f.read()
match = re.findall(REGEX, text, re.MULTILINE | re.UNICODE | re.DOTALL)
if match is not None and len(match) > 0:
repos = match
else:
print("No repos found")
exit(-1)
# Scrape repos
for repo in repos:
repo_object = g.get_repo(repo)
issues = repo_object.get_issues(state="open", labels=["help wanted"])
text_issues = "\n"
for issue in issues:
text_issues += issue_text(issue) + "\n"
with open(f"README.md", "r") as f:
text = f.read()
with open(f"README.md", "w") as f:
sub = re.sub(regex_replace(repo), rf"\1{text_issues}\3", text, flags=re.MULTILINE)
f.write(sub)