-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
bb89e1a
commit 206448d
Showing
10 changed files
with
447 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<xsl:stylesheet xmlns="http://www.tei-c.org/ns/1.0" | ||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
xmlns:xs="http://www.w3.org/2001/XMLSchema" | ||
xmlns:tei="http://www.tei-c.org/ns/1.0" | ||
xmlns:saxon="http://saxon.sf.net/" | ||
exclude-result-prefixes="xs" | ||
version="2.0"> | ||
|
||
<!-- DON'T FORGET TO SET XSLT TRANSFORMER TO IGNORE THE SCHEMA (TO AVOID ADDING DEFAULT ATTRIBUTES) --> | ||
|
||
<xsl:output method="xml" encoding="UTF-8"/> | ||
|
||
<xsl:variable name="newline" select="' '"/> | ||
|
||
<xsl:variable name="works" select="document('../../authority/works_master.xml')//tei:TEI/tei:text/tei:body/tei:listBibl/tei:bibl"/> | ||
<xsl:variable name="people" select="document('../../authority/persons_master.xml')//tei:TEI/tei:text/tei:body/tei:listPerson/tei:person"/> | ||
<xsl:variable name="places" select="document('../../authority/places_master.xml')//tei:TEI/tei:text/tei:body/tei:listPlace/tei:place"/> | ||
|
||
<xsl:template match="/"> | ||
<xsl:value-of select="$newline"/> | ||
<xsl:processing-instruction name="xml-model"><xsl:text>href="https://raw.githubusercontent.com/bodleian/consolidated-tei-schema/master/msdesc.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"</xsl:text></xsl:processing-instruction><xsl:value-of select="$newline"/> | ||
<xsl:processing-instruction name="xml-model"><xsl:text>href="https://raw.githubusercontent.com/bodleian/consolidated-tei-schema/master/msdesc.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"</xsl:text></xsl:processing-instruction><xsl:value-of select="$newline"/> | ||
<xsl:apply-templates select="*[not(processing-instruction('xml-model'))]"/> | ||
</xsl:template> | ||
|
||
<xsl:template match="*"> | ||
<xsl:copy> | ||
<xsl:apply-templates select="@*[not(name()='key')]"/> <!-- A very, very few already have keys, and they don't relate to anything, so strip them out. --> | ||
<xsl:choose> | ||
<!-- Don't do msItems | ||
<xsl:when test="self::tei:msItem"> | ||
<xsl:variable name="thisid" select="@xml:id"/> | ||
<xsl:if test="$thisid = $works/tei:ref/@target"> | ||
<xsl:attribute name="key" select="$works[tei:ref/@target = $thisid]/@xml:id"/> | ||
</xsl:if> | ||
</xsl:when>--> | ||
<xsl:when test="self::tei:author"> | ||
<xsl:variable name="thisval" select="normalize-space(string-join(.//text(), ' '))"/> | ||
<xsl:if test="$thisval = $people/tei:persName"> | ||
<xsl:attribute name="key" select="$people[tei:persName = $thisval]/@xml:id"/> | ||
</xsl:if> | ||
</xsl:when> | ||
<xsl:when test="self::tei:persName"> | ||
<xsl:variable name="thisval" select="normalize-space(string-join(.//text(), ' '))"/> | ||
<xsl:if test="$thisval = $people/tei:persName"> | ||
<xsl:attribute name="key" select="$people[tei:persName = $thisval]/@xml:id"/> | ||
</xsl:if> | ||
</xsl:when> | ||
<xsl:when test="self::tei:placeName"> | ||
<xsl:variable name="thisval" select="normalize-space(string-join(.//text(), ' '))"/> | ||
<xsl:if test="$thisval = $places/tei:placeName"> | ||
<xsl:attribute name="key" select="$places[tei:placeName = $thisval]/@xml:id"/> | ||
</xsl:if> | ||
</xsl:when> | ||
</xsl:choose> | ||
<xsl:apply-templates/> | ||
</xsl:copy> | ||
</xsl:template> | ||
|
||
<xsl:template match="@*|comment()|processing-instruction()"> | ||
<xsl:copy/> | ||
</xsl:template> | ||
|
||
</xsl:stylesheet> |
103 changes: 103 additions & 0 deletions
103
processing/batch_conversion/build-interim-person-authority-file.xquery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
declare namespace tei="http://www.tei-c.org/ns/1.0"; | ||
declare option saxon:output "indent=yes"; | ||
|
||
|
||
declare function local:logging($level, $msg, $values) | ||
{ | ||
(: Trick XQuery into doing trace() to output message to STDERR but not insert it into the XML :) | ||
substring(trace('', concat(upper-case($level), ' ', $msg, ' ', string-join($values, ' '), ' ')), 0, 0) | ||
}; | ||
|
||
<TEI xmlns="http://www.tei-c.org/ns/1.0"> | ||
<teiHeader> | ||
<fileDesc> | ||
<titleStmt> | ||
<title>Title</title> | ||
</titleStmt> | ||
<publicationStmt> | ||
<p>Publication Information</p> | ||
</publicationStmt> | ||
<sourceDesc> | ||
<p>Information about the source</p> | ||
</sourceDesc> | ||
</fileDesc> | ||
</teiHeader> | ||
<text> | ||
<body> | ||
<listPerson> | ||
{ | ||
|
||
let $skipids := () | ||
|
||
(: First build an in-memory nodeset temporarily storing titles, IDs and the files they come from. :) | ||
let $hebrewpeople := ( | ||
for $x in collection('../../collections/?select=*.xml;recurse=yes')//tei:persName[not(ancestor::tei:revisionDesc or ancestor::tei:respStmt)] | ||
return | ||
if ($x eq $skipids) then | ||
( (: This @key is in one of the manually-maintained authority files, so don't include it in the generated list :) ) | ||
else | ||
<person> | ||
<name>{ normalize-space(string-join($x//text(), ' ')) }</name> | ||
<file>{ base-uri($x) }</file> | ||
<ref>/catalog/{ $x/ancestor::tei:TEI/@xml:id/data() }|{ ($x/ancestor::tei:TEI/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msIdentifier/tei:idno)[1]/text() }</ref> | ||
</person> | ||
) | ||
|
||
(: NOTE: In the Hebrew catalogue, persName when used inside author marks up a translated variant of the same name. :) | ||
let $hebrewauthors := ( | ||
for $x in collection('../../collections/?select=*.xml;recurse=yes')//tei:author | ||
return | ||
if ($x eq $skipids) then | ||
( (: This @key is in one of the manually-maintained authority files, so don't include it in the generated list :) ) | ||
else | ||
<person> | ||
<name>{ normalize-space(string-join($x//text()[not(ancestor::persName)], ' ')) }</name> | ||
<file>{ base-uri($x) }</file> | ||
<ref>/catalog/{ $x/ancestor::tei:TEI/@xml:id/data() }|{ ($x/ancestor::tei:TEI/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msIdentifier/tei:idno)[1]/text() }</ref> | ||
</person> | ||
) | ||
|
||
let $genizahpeople := ( | ||
for $x in collection('../../../genizah-mss/collections/?select=*.xml;recurse=yes')//tei:persName[not(ancestor::tei:revisionDesc or ancestor::tei:respStmt)] | ||
return | ||
if ($x eq $skipids) then | ||
( (: This @key is in one of the manually-maintained authority files, so don't include it in the generated list :) ) | ||
else | ||
<person> | ||
<name>{ normalize-space(string-join($x//text(), ' ')) }</name> | ||
<file>{ base-uri($x) }</file> | ||
<ref>/catalog/{ $x/ancestor::tei:TEI/@xml:id/data() }|{ ($x/ancestor::tei:TEI/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msIdentifier/tei:idno)[1]/text() }</ref> | ||
</person> | ||
) | ||
|
||
let $allpeople := ($hebrewpeople, $hebrewauthors, $genizahpeople) | ||
(: NOTE: No author TEI elements used in Genizah :) | ||
|
||
let $dedupedpeople := ( | ||
for $t at $pos in distinct-values($allpeople/name/text()) | ||
order by $t | ||
return | ||
<person xml:id="{ concat('person_', $pos) }"> | ||
<persName type="display">{ $t }</persName> | ||
{ | ||
for $s in distinct-values($allpeople[name = $t]/ref/text()) | ||
order by $s | ||
return | ||
<ref>{ $s }</ref> | ||
} | ||
</person> | ||
|
||
) | ||
|
||
return $dedupedpeople | ||
|
||
} | ||
</listPerson> | ||
</body> | ||
</text> | ||
</TEI> | ||
|
||
|
||
|
||
|
||
|
95 changes: 95 additions & 0 deletions
95
processing/batch_conversion/build-interim-place-authority-file.xquery
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
declare namespace tei="http://www.tei-c.org/ns/1.0"; | ||
declare option saxon:output "indent=yes"; | ||
|
||
|
||
declare function local:logging($level, $msg, $values) | ||
{ | ||
(: Trick XQuery into doing trace() to output message to STDERR but not insert it into the XML :) | ||
substring(trace('', concat(upper-case($level), ' ', $msg, ' ', string-join($values, ' '), ' ')), 0, 0) | ||
}; | ||
|
||
<TEI xmlns="http://www.tei-c.org/ns/1.0"> | ||
<teiHeader> | ||
<fileDesc> | ||
<titleStmt> | ||
<title>Title</title> | ||
</titleStmt> | ||
<publicationStmt> | ||
<p>Publication Information</p> | ||
</publicationStmt> | ||
<sourceDesc> | ||
<p>Information about the source</p> | ||
</sourceDesc> | ||
</fileDesc> | ||
</teiHeader> | ||
<text> | ||
<body> | ||
<listPlace> | ||
{ | ||
|
||
(: | ||
TODO: | ||
Pick up on @role attributes, if any | ||
Batch convert the TEI to add @key to all persNames then use that here (blow away existing ones - there's only about a dozen, and they contain spaces) | ||
:) | ||
|
||
let $skipids := () (: TODO:) | ||
|
||
(: First build an in-memory nodeset temporarily storing titles, IDs and the files they come from. :) | ||
let $hebrewplaces := ( | ||
for $x in collection('../../collections/?select=*.xml;recurse=yes')//tei:placeName | ||
return | ||
if ($x eq $skipids) then | ||
( (: This @key is in one of the manually-maintained authority files, so don't include it in the generated list :) ) | ||
else | ||
<place> | ||
<name>{ normalize-space(string-join($x//text(), ' ')) }</name> | ||
<file>{ base-uri($x) }</file> | ||
<ref>/catalog/{ $x/ancestor::tei:TEI/@xml:id/data() }|{ ($x/ancestor::tei:TEI/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msIdentifier/tei:idno)[1]/text() }</ref> | ||
<scheme>{ ($x/ancestor::keywords/@scheme/data(), 'bodl')[1] }</scheme> | ||
</place> | ||
) | ||
|
||
let $genizahplaces := ( | ||
for $x in collection('../../../genizah-mss/collections/?select=*.xml;recurse=yes')//tei:placeName | ||
return | ||
if ($x eq $skipids) then | ||
( (: This @key is in one of the manually-maintained authority files, so don't include it in the generated list :) ) | ||
else | ||
<place> | ||
<name>{ normalize-space(string-join($x//text(), ' ')) }</name> | ||
<file>{ base-uri($x) }</file> | ||
<ref>/catalog/{ $x/ancestor::tei:TEI/@xml:id/data() }|{ ($x/ancestor::tei:TEI/tei:teiHeader/tei:fileDesc/tei:sourceDesc/tei:msDesc/tei:msIdentifier/tei:idno)[1]/text() }</ref> | ||
<scheme>{ ($x/ancestor::keywords/@scheme/data(), 'bodl')[1] }</scheme> | ||
</place> | ||
) | ||
|
||
let $allplaces := ($hebrewplaces, $genizahplaces) | ||
|
||
let $dedupedplaces := ( | ||
for $t at $pos in distinct-values($allplaces/name/text()) | ||
order by $t | ||
return | ||
<place xml:id="{ concat('place_', $pos) }"> | ||
<placeName type="index" source="{ string-join(distinct-values($allplaces[name = $t]/scheme/text()), ' ') }">{ $t }</placeName> | ||
{ | ||
for $s in distinct-values($allplaces[name = $t]/ref/text()) | ||
order by $s | ||
return | ||
<ref>{ $s }</ref> | ||
} | ||
</place> | ||
) | ||
|
||
return $dedupedplaces | ||
|
||
} | ||
</listPlace> | ||
</body> | ||
</text> | ||
</TEI> | ||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.