-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrunlit2ksam.pl
executable file
·94 lines (82 loc) · 4.32 KB
/
runlit2ksam.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use 5.016;
use autodie qw(:file);
use open qw(:utf8 :std);
use DBI;
use RDF::Trine;
use YAML::XS qw(LoadFile);
############################################################################################
# Query the SRDB database for signa with SOCH URIs; #
# Read in the YAML dump of Svensk runbibliografi, and find all the works with Libris URIs; #
# Iterate over one, and find all matches from the other; #
# Feed these results into an in-memory triplestore; #
# When finished, serialise the graph as Turtle, for later ingest into the UGC hub. #
############################################################################################
# Get configuration info:
my %conf = %{LoadFile('../config/config.yml')} or die "Failed to read config file: $!\n";
my $dsn = "DBI:$conf{dsn}{dbms}:database=$conf{dsn}{database};host=$conf{dsn}{hostname};port=$conf{dsn}{port}";
my $dbh = DBI->connect($dsn, $conf{dsn}{username}, $conf{dsn}{password}, {RaiseError => 1, AutoCommit => 1, pg_enable_utf8 => 1, pg_server_prepare => 1});
# Get signa and corresponding SOCH URIs:
my %signa;
my $sth = $dbh->prepare(q{
SELECT signum1, signum2, uri
FROM objects_signa_unique JOIN object_uri USING (objectid) JOIN uris USING (uriid)
ORDER BY objectid
});
$sth->execute();
while (my $record = $sth->fetchrow_arrayref) {
push(@{$signa{join(' ', @$record[0,1])}}, $record->[2]);
}
# Get bibliographic data:
my @lit = @{ [LoadFile(join('', $conf{path}, 'srb-lit.yml'))] } or die "Failed to read SRB-lit file: $!\n";
# Create a temporary Triplestore:
my $store = RDF::Trine::Store->new('Memory');
my $model = RDF::Trine::Model->new($store);
# Define namespaces:
my %prefixes = (
bbr => 'http://kulturarvsdata.se/raa/bbr/',
fmis => 'http://kulturarvsdata.se/raa/fmi/',
gsm => 'http://kulturarvsdata.se/GSM/objekt/',
jm => 'http://kulturarvsdata.se/JM/object/',
ksam => 'http://kulturarvsdata.se/ksamsok#',
kulturen => 'http://kulturarvsdata.se/Kulturen/objekt/',
librisbib => 'http://libris.kb.se/resource/bib/',
librisauth => 'http://libris.kb.se/resource/auth/',
librisxl => 'https://libris.kb.se/',
nomu => 'http://kulturarvsdata.se/nomu/object/',
shm => 'http://kulturarvsdata.se/shm/object/',
shmart => 'http://kulturarvsdata.se/shm/art/',
shminv => 'http://kulturarvsdata.se/shm/inventory/',
skansen => 'http://kulturarvsdata.se/sk/object/',
slm => 'http://kulturarvsdata.se/SLM/item/',
solm => 'http://kulturarvsdata.se/S-OLM/object/',
upmu => 'http://kulturarvsdata.se/upmu/object/',
);
my $prefixes = RDF::Trine::NamespaceMap->new(\%prefixes);
my $predicate = $prefixes->ksam('isDescribedBy');
$model->begin_bulk_ops();
for my $record (@lit) { # For each record (work)…
next unless ((exists $record->{signa}) && (exists $record->{urls})); # Skip the work if it has no signa or URLs…
for my $signum (@{$record->{signa}}) { # For each signum that work concerns…
next unless (exists $signa{$signum}); # Skip the signum if it does not have a URI…
for my $uri (@{$record->{urls}}) { # For each URL the work is associated with…
next unless ($uri =~ m|^https://libris\.kb\.se/(?<librisid>.+)$|); # Skip the URL if it's not a Libris URI…
my $librisid = $+{librisid};
for my $soch_uri (@{$signa{$signum}}) { # For each object id this signum has…
next unless ($soch_uri =~ m!^https?://kulturarvsdata\.se/!); # Skip if not a SOCH URI…
my $triple = RDF::Trine::Statement->new(RDF::Trine::Node::Resource->new($soch_uri), $predicate, $prefixes->librisxl($librisid)); # Generate a triple…
$model->add_statement($triple); # …and insert it!
}
}
}
}
$model->end_bulk_ops();
say $model->size . ' triples stored!';
my $turtle = RDF::Trine::Serializer->new('turtle', namespaces => $prefixes);
# Dump out the whole graph:
open (my $fh, '>', join('', $conf{path}, 'srb-lit-soch.ttl'));
$turtle->serialize_model_to_file ($fh, $model);
close $fh;