forked from akkana/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcourseratext
executable file
·67 lines (51 loc) · 1.99 KB
/
courseratext
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env python
# Make a feedme-style directory of .txt files from one week of
# a Coursera class.
# Usage: courseratext in_dir out_dir
# It will create the out_dir if it doesn't already exist.
import sys, os
def courseratext(in_dir, out_dir):
if not os.path.exists(out_dir):
os.mkdir(out_dir)
header = '''<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" type="text/css" title="Feeds" href="../../feeds.css"/>
<title>%s</title>
</head>
<body>
'''
indexstr = header % 'Coursera class index for ' + in_dir
for fil in sorted(os.listdir(in_dir)):
if not fil.endswith('.txt'):
print "Skipping", fil
continue
# Now we know the filename is something.txt.
# We need the base for it, so we can also make .html.
# Take some of the spaces out of the filename while we're at it.
basefil = fil[:-4].replace(' - ', '-')
htmlfil = basefil + '.html'
sentencebreak = False
# Put an HTML-ified version of the text file into the new directory.
newfile = open(os.path.join(out_dir, htmlfil), 'w')
newfile.write(header % fil)
fp = open(os.path.join(in_dir, fil))
for line in fp:
# Coursera caption files have no paragraph breaks,
# making them hard to read. Add some, even though
# this will probably mean too many paragraph breaks
# rather than too few.
if sentencebreak and line[0].isupper():
newfile.write('\n<p>\n')
line = line.strip()
sentencebreak = line.endswith('.')
print >>newfile, line
fp.close()
newfile.close()
indexstr += '<p><a href="%s">%s</a>' % (htmlfil, fil)
print "Wrote", basefil
indexfile = open(os.path.join(out_dir, 'index.html'), 'w')
indexfile.write(indexstr)
indexfile.close
if __name__ == "__main__":
courseratext(sys.argv[1], sys.argv[2])