Skip to content

Commit

Permalink
tries improving Volumes query
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Mar 17, 2024
1 parent 37fb4ca commit c197b17
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 10 deletions.
3 changes: 2 additions & 1 deletion ceurws/models/dblp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ class Proceeding(SQLModel, table=True):
"""
A proceeding indexed in DBLP with additional details.
"""
proceeding:str = Field(primary_key=True)
dblp_publication_id: Optional[str]
volume_number: int = Field(primary_key=True)
volume_number: int = Field(index=True)
title: str
dblp_event_id: Optional[str] = None

Expand Down
24 changes: 23 additions & 1 deletion ceurws/resources/queries/dblp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,29 @@
?gnd_blank litre:hasLiteralValue ?_gnd_id.
}
}GROUP BY ?dblp_author_id
'CEUR-WS-Volumes':
sparql: |
# CEUR-WS-Proceedings query
# Tim Holzheim 2022
# refactored WF 2024-03-17
#
PREFIX datacite: <http://purl.org/spar/datacite/>
PREFIX dblp: <https://dblp.org/rdf/schema#>
PREFIX litre: <http://purl.org/spar/literal/>
SELECT DISTINCT
?proceeding
?volume_number
(SAMPLE(?_title) as ?title)
(GROUP_CONCAT(?_editor; SEPARATOR=";") as ?editor_urls)
(SAMPLE(?_dblp_event_id) as ?dblp_event_id)
WHERE{
?proceeding dblp:publishedIn "CEUR Workshop Proceedings".
?proceeding dblp:publishedInSeriesVolume ?volume_number.
OPTIONAL{?proceeding dblp:title ?_title .}
OPTIONAL{?proceeding dblp:editedBy ?_editor}
OPTIONAL{?proceeding dblp:listedOnTocPage ?_dblp_event_id}
}
GROUP BY ?proceeding ?volume_number
'CEUR-WS-Papers':
sparql: |
PREFIX datacite: <http://purl.org/spar/datacite/>
Expand Down
18 changes: 10 additions & 8 deletions tests/test_dblp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ def setUp(self, debug=True, profile=True):
#self.endpointUrl = "http://dblp.wikidata.dbis.rwth-aachen.de/api/dblp"
self.endpoint_url="https://qlever.cs.uni-freiburg.de/api/dblp"
self.sparql=SPARQL(self.endpoint_url)
self.force_query=True
path = os.path.dirname(__file__)
path = os.path.dirname(path)
qYamlFile = f"{path}/ceurws/resources/queries/dblp.yaml"
if os.path.isfile(qYamlFile):
self.qm = QueryManager(lang="sparql", queriesPath=qYamlFile)
self.sql_db=SqlDB("/tmp/ceurws.db",debug=False)
self.qYamlFile = os.path.join(os.path.dirname(path), 'ceurws/resources/queries/dblp.yaml')
if os.path.isfile(self.qYamlFile):
self.qm = QueryManager(lang="sparql", queriesPath=self.qYamlFile)
self.db_path = "/tmp/ceurws.db"
if self.force_query and os.path.isfile(self.db_path):
os.remove(self.db_path)
self.sql_db = SqlDB(self.db_path, debug=False)

def test_dblp_caches(self):
"""
Expand All @@ -31,15 +34,14 @@ def test_dblp_caches(self):
from ceurws.models.dblp2 import Paper,Scholar, Proceeding, Authorship, Editorship

caches=[
Cached(Proceeding,self.sparql,sql_db=self.sql_db,query_name="CEUR-WS all Volumes",debug=self.debug),
Cached(Proceeding,self.sparql,sql_db=self.sql_db,query_name="CEUR-WS-Volumes",debug=self.debug),
Cached(Scholar,self.sparql,sql_db=self.sql_db,query_name="CEUR-WS-Scholars",debug=self.debug),
Cached(Paper,self.sparql,sql_db=self.sql_db,query_name="CEUR-WS-Papers",debug=self.debug),
Cached(Editorship, self.sparql, sql_db=self.sql_db, query_name="CEUR-WS-Editorship", debug=self.debug),
Cached(Authorship, self.sparql, sql_db=self.sql_db, query_name="CEUR-WS-Authorship", debug=self.debug)
]
force_query=True
for cache in caches:
cache.fetch_or_query(self.qm,force_query=force_query)
cache.fetch_or_query(self.qm,force_query=self.force_query)
#paper_cache.get_lod(self.qm)
#paper_cache.store()

0 comments on commit c197b17

Please sign in to comment.