-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnew-P39s.rb
executable file
·36 lines (27 loc) · 1.18 KB
/
new-P39s.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/env ruby
# frozen_string_literal: true
# Check a Wikipedia scraper outfile against what's currently in
# Wikidata, suggesting new P39s to add
require 'csv'
require 'pry'
require 'shellwords'
require_relative 'lib/inputfile'
# TODO: sanity check the input
wikipedia_file = Pathname.new(ARGV.first) # output of scraper
wikidata_file = Pathname.new(ARGV.last) # `wd sparql term-members.sparql`
wikipedia = InputFile::CSV.new(wikipedia_file)
wikidata = InputFile::JSON.new(wikidata_file)
wikipedia.data.each do |wp|
next if wp[:id].to_s.empty?
next unless wikipedia.tally[wp[:id]] > wikidata.tally.fetch(wp[:id], 0)
existing = wikidata.find(wp[:id])
# Skip this entry if anything in WD has the same start/end date
# TODO: check for anything in an overlapping range
next if existing.any? { |wd| (wp[:P580] == wd[:P580]) || (wp[:P582] == wd[:P582]) }
warn "\n#{wp[:name]}: WP: #{wikipedia.tally[wp[:id]]} // WD: #{wikidata.tally.fetch(wp[:id], 0)}"
existing.each do |wd|
warn " WD has #{wd[:P580]} – #{wd[:P582]}"
end
warn " To add #{wp[:P580]} - #{wp[:P582]} call:"
puts "add_P39.js #{wp.values_at(:id, :P580, :P582, :P1365, :P1366, :P1545, :P5054).shelljoin}"
end