-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathcinemaempoa.py
executable file
·149 lines (127 loc) · 4.89 KB
/
cinemaempoa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
import argparse
import json
import os
import re
import shutil
from datetime import datetime
from bs4 import BeautifulSoup
from build import HtmlBuilder
from scrapers.capitolio import Capitolio
from scrapers.cinebancarios import CineBancarios
from scrapers.paulo_amorim import CinematecaPauloAmorim
from scrapers.sala_redencao import SalaRedencao
from utils import dump_utf8_json
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="cinemaempoa",
description="Grab the schedule for Porto Alegre's finest features",
)
allowed_rooms = ["capitolio", "sala-redencao", "cinebancarios", "paulo-amorim"]
parser.add_argument(
"-b",
"--build",
help="Builds scrapped json as an html file",
action="store_true",
)
parser.add_argument(
"--deploy",
help="Saves generated html at docs/index.html - saves the old index file in YYYY-MM-DD.html format",
action="store_true",
)
parser.add_argument(
"--date",
help="Runs the scrapper as if the current date is the given YYYY-MM-DD value",
required=False,
)
group = parser.add_mutually_exclusive_group()
group.add_argument(
"-r",
"--rooms",
nargs="+",
help=f"Filter specific rooms. Available: {', '.join(allowed_rooms)}",
required=False,
)
group.add_argument(
"-j", "--json", help="JSON filepath to build index.html from", required=False
)
args = parser.parse_args()
if args.date and not args.rooms:
parser.error("Define rooms to scrape when using a custom date")
if not args.rooms and not args.json:
parser.error("Define program input with either --rooms or --json")
if args.deploy and not args.build:
parser.error("You need --build in order to deploy")
if args.rooms:
if not all(room in allowed_rooms for room in args.rooms):
parser.error(
f"Invalid selected rooms. Available: {', '.join(allowed_rooms)}"
)
scrape_date = args.date
if scrape_date:
if args.rooms != ["sala-redencao"]:
parser.error("Only sala-redencao implements custom date scraping.")
features = []
if "capitolio" in args.rooms:
feature = {
"url": "http://www.capitolio.org.br",
"cinema": "Cinemateca Capitólio",
"slug": "capitolio",
}
cap = Capitolio()
feature["features"] = cap.get_daily_features_json()
features.append(feature)
if "sala-redencao" in args.rooms:
feature = {
"url": "https://www.ufrgs.br/difusaocultural/salaredencao/",
"cinema": "Sala Redenção",
"slug": "sala-redencao",
}
redencao = SalaRedencao(date=scrape_date)
feature["features"] = redencao.get_daily_features_json()
features.append(feature)
if "cinebancarios" in args.rooms:
cineBancarios = CineBancarios()
features.append(cineBancarios.get_daily_features_json())
if "paulo-amorim" in args.rooms:
feature = {
"url": "https://www.cinematecapauloamorim.com.br",
"cinema": "Cinemateca Paulo Amorim",
"slug": "paulo-amorim",
}
pauloAmorim = CinematecaPauloAmorim()
feature["features"] = pauloAmorim.get_daily_features_json()
features.append(feature)
json_filename = os.path.join(
"json", f"{datetime.now().strftime('%Y-%m-%d')}.json"
)
os.makedirs("json", exist_ok=True)
with open(json_filename, "w") as json_file:
json_file.write(dump_utf8_json(features))
if args.json:
if not os.path.exists(args.json):
parser.error(f"File {args.json} not found.")
with open(args.json, "r") as json_file:
features = json.load(json_file)
json_string = dump_utf8_json(features)
page_html = None
if args.build:
html_builder = HtmlBuilder(json_string)
page_html = html_builder.create_page_from_json()
if not args.deploy:
print(page_html)
else:
print(json_string)
if args.deploy:
os.makedirs("docs", exist_ok=True)
with open("docs/index.html", "r") as index:
index_soup = BeautifulSoup(index, "html.parser")
time_tag = index_soup.find("time")
datetime_match = re.match("\d{4}-\d{2}-\d{1,2}", time_tag["datetime"])
if not datetime_match:
parser.error(
"Please check that your index.html has a <time> tag with a valid YYYY-MM-DD datetime attribute"
)
shutil.move("docs/index.html", f"docs/{datetime_match[0]}.html")
with open("docs/index.html", "w") as new_index:
new_index.write(page_html)