-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYouTube.py
71 lines (58 loc) · 2.3 KB
/
YouTube.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python3
import argparse
import json
from pathlib import Path
from typing import Any, Dict, List
from tqdm import tqdm
def load_data(file_path: Path) -> List[Dict[str, Any]]:
with open(file_path) as f:
raw_data: List[dict] = json.load(f)
# 1年間
# {
# "header": "YouTube",
# "products": ["YouTube"],
# "subtitles": [{"name": "コカ・コーラ", "url": "https://www.youtube...bDY6Gi3WQ"}],
# "time": "2020-01-24T17:45:17.963Z",
# "title": "【アクエリアス】 TVCM 『見えない...F を視聴しました",
# "titleUrl": "https://www.youtube...w1ji5_CXI",
# }
history = []
non_public = [] # 非公開動画
for page in tqdm(raw_data):
if "titleUrl" not in page:
if page["title"].rsplit(" ", 1)[-1] == "のストーリーを視聴しました":
pass
elif page["title"] == "削除済みの動画を視聴しました":
pass
elif page["title"] == "YouTube Music にアクセスしました":
pass
else:
assert False
continue
if "subtitles" not in page:
assert page["title"] == f"{page['titleUrl']} を視聴しました"
non_public.append({"time": page["time"], "title_url": page["titleUrl"]})
continue
assert page["header"] in ["YouTube", "YouTube Music"]
assert page["products"] == ["YouTube"]
assert len(page["subtitles"]) == 1
history.append(
{
"title": page["title"][:-8],
"title_url": page["titleUrl"],
"time": page["time"],
"channel": page["subtitles"][0]["name"],
"channel_url": page["subtitles"][0]["url"],
}
)
print(f"{len(history)}[{len(history) / len(raw_data):.1%}]のデータが読み込まれました")
return history
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--takeout-root", type=str, default="Takeout")
args = parser.parse_args()
print(json.dumps(args.__dict__)) # , indent=2
history_file = Path(args.takeout_root) / "YouTube" / "履歴" / "watch-history.json"
history = load_data(history_file)
if __name__ == "__main__":
main()