forked from erwanor/engineering-blogs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_opml.rb
executable file
·132 lines (115 loc) · 3.29 KB
/
generate_opml.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env ruby
require 'builder'
require 'feedbag'
require 'json'
require 'nokogiri'
OUTPUT_FILENAME = 'engineering_blogs.opml'
TITLE = 'Engineering Blogs'
# grab name/url pairings from README.md
readme = File.open('README.md', 'r')
contents = readme.read
matches = contents.scan(/\* (.*) (http.*)/)
# skip over blogs that aren't found
unavailable = []
fast_forwards = [
'Baidu Research',
'Booking.com',
'Fynd',
'Graphcool',
'LinkedIn',
'Medallia',
'OmniTI',
'Paperless Post',
'Pluralsight',
'Prolific Interactive',
'Quora',
'Robert Elder Software',
'Simple',
'SlideShare',
'SourceClear',
'Viget',
'Zalando',
'Zapier',
'Zynga',
'Dave Beazley',
'Edan Kwan',
'Grzegorz Gajos',
'Joe Armstrong',
'Kai Hendry',
'LiveOverflow'
]
Struct.new('Blog', :name, :web_url, :rss_url)
blogs = []
# for each blog URL, check if rss URL exists
matches.each do |match|
name = match[0]
web_url = match[1]
if fast_forwards.include?(name)
puts "#{name}: TEMP IGNORE"
unavailable.push(Struct::Blog.new(name, web_url, nil))
next
end
# if rss_url already in existing opml file, use that; otherwise, do a lookup
rss_url = nil
if File.exist?(OUTPUT_FILENAME)
xml = Nokogiri::XML(File.open(OUTPUT_FILENAME))
existing_blog = xml.xpath("//outline[@htmlUrl='#{web_url}']").first
if existing_blog
rss_url = existing_blog.attr('xmlUrl')
puts "#{name}: ALREADY HAVE"
end
end
if rss_url.nil?
puts "#{name}: GETTING"
rss_check_url = "http://ajax.googleapis.com/ajax/services/feed/lookup?v=1.0&q=#{web_url}"
uri = URI.parse(rss_check_url)
response = JSON.parse(Net::HTTP.get(uri))
rss_url = response['responseData']['url'] if response['responseData'] && response['responseData'].has_key?('url')
# use Feedbag as a backup to Google Feeds Api
if rss_url.nil?
rss_url = Feedbag.find(web_url).first
if rss_url.nil?
suggested_paths = ['/rss', '/feed', '/feeds', '/atom.xml', '/feed.xml', '/rss.xml', '.atom']
suggested_paths.each do |suggested_path|
rss_url = Feedbag.find("#{web_url.chomp('/')}#{suggested_path}").first
break if rss_url
end
end
end
end
if rss_url && rss_url.length > 0
blogs.push(Struct::Blog.new(name, web_url, rss_url))
else
unavailable.push(Struct::Blog.new(name, web_url, rss_url))
end
end
blogs.sort_by { |b| b.name.capitalize }
unavailable.sort_by { |b| b.name.capitalize }
# create and write to opml file
xml = Builder::XmlMarkup.new(indent: 2)
xml.instruct! :xml, version: '1.0', encoding: 'UTF-8'
xml.tag!('opml', version: '1.0') do
# head
xml.tag!('head') do
xml.title TITLE
end
# body
xml.tag!('body') do
xml.tag!('outline', text: TITLE, title: TITLE) do
blogs.each do |blog|
xml.tag!('outline', type: 'rss', text: blog.name, title: blog.name,
xmlUrl: blog.rss_url, htmlUrl: blog.web_url)
end
end
end
end
output = File.new(OUTPUT_FILENAME, 'wb')
output.write(xml.target!)
output.close
puts "DONE: #{blogs.count} written to #{OUTPUT_FILENAME}"
puts "\nUnable to find an RSS feed for the following blogs:"
puts "==================================================="
unavailable.each do |b|
puts "#{b.name} | #{b.web_url}"
end
puts "==================================================="