-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbs_example.py
56 lines (45 loc) · 1.47 KB
/
bs_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from BeautifulSoup import BeautifulSoup
import urllib2
import codecs
import pprint
import unicode_magic as um
url = "http://en.wikipedia.org/wiki/Chinese_language"
page = urllib2.urlopen(url)
#soup = BeautifulSoup(page.read())
soup = BeautifulSoup(um.fix_bad_unicode(page.read().decode("utf-8")))
raw_table = soup.findAll(name = "table", attrs = {"class": "wikitable"}, limit = 3)[-1]
rows = raw_table.findAll("tr")
table = []
keys = [header.text for header in rows[0].findAll("th")]
table.append(keys)
for i in range(len(rows)-1):
table.append([rows[i+1].findAll("td")[j].text for j in range(len(keys))])
table = [[table[i][j] for j in range(len(table[0]))] for i in range(len(table))]
def dump(o):
with codecs.open("output.txt", "w", "utf-8") as f:
f.write(o)
def pad(str):
gap = 25 - len(str)
tabs = gap/4
spaces = gap - tabs*4
return str+' '*spaces+'\t'*tabs
blob = '\n'.join([' '.join(table[i]) for i in range(len(table)-1)])
dump(blob)
def in_ipynb():
try:
cfg = get_ipython().config
if cfg['IPKernelApp']['parent_appname'] == 'ipython-notebook':
return True
else:
return False
except NameError:
return False
if in_ipynb:
def table_print():
for i in range(len(table)):
for j in range(len(table[0])):
print table[i][j],
print
table_print()
else:
print "Unicode output suppressed when not in IPython Notebook"