-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathget-all-missing-images.py
67 lines (54 loc) · 1.9 KB
/
get-all-missing-images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import fnmatch
import os
import lxml.html
# Get all missing images on FS from nav's htmls
# =================================
# just some fancy colors for output
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = "\033[1m"
def disable():
HEADER = ''
OKBLUE = ''
OKGREEN = ''
WARNING = ''
FAIL = ''
ENDC = ''
def infog(msg):
print OKGREEN + msg + ENDC
def info(msg):
print OKBLUE + msg + ENDC
def warn(msg):
print WARNING + msg + ENDC
def err(msg):
print FAIL + 'ERROR: ' + msg + ENDC
# =================================
def main():
# nav_matches contains all found *nav*html files. Useful later for checking against opf files
nav_matches = []
for root, dirnames, filenames in os.walk('.'):
for filename in fnmatch.filter(filenames, '*nav*html'):
nav_matches.append(os.path.join(root, filename))
# Check if all files in nav files exist and check if images exist
for nav_file in nav_matches:
print '====== Analysing navigation HTML: ' + nav_file
nav_htmls = lxml.html.parse(nav_file).xpath("//a/@href")
for nav_html in nav_htmls:
nav_html = os.path.join(os.path.dirname(nav_file), nav_html)
if not(os.path.isfile(nav_html)):
err('HTML file {0} in {1} does not exist!'.format(nav_html, os.path.basename(nav_file)))
continue
#print '----- Analysing: ' + nav_html
img_links = lxml.html.parse(nav_html).xpath("//img/@src")
for img_link in img_links:
img_file = os.path.join(os.path.dirname(nav_html), img_link)
if not(os.path.isfile(img_file)):
err('Image file {0} in {1} does not exist!'.format(img_file, nav_html))
continue
# =================================
if __name__ == '__main__':
main()