Add workflow that checks the Qleverfiles (#78)

For each of the Qleverfiles in `src/qlever/Qleverfiles`, check whether the main commands (`get-data`, `index`, `start`) work, without executing the command (just do `qlever ... --show`). In particular, this catches if one these Qleverfile cannot be parsed properly.
ad-freiburg · Oct 19, 2024 · 22eff54 · 22eff54
1 parent a583b10
commit 22eff54
Show file tree

Hide file tree

Showing 8 changed files with 67 additions and 17 deletions.
diff --git a/.github/workflows/qleverfiles-check.yml b/.github/workflows/qleverfiles-check.yml
@@ -0,0 +1,50 @@
+name: Qleverfiles check
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  merge_group:
+
+jobs:
+  qleverfiles-check:
+    runs-on: ${{matrix.os}}
+    strategy:
+      fail-fast: true
+      matrix:
+        os: [ubuntu-24.04]
+
+    steps:
+      - name: Checkout the repository for the qlever script
+        uses: actions/checkout@v3
+        with:
+          path: qlever-control
+
+      - name: Install the script locally
+        working-directory: ${{github.workspace}}/qlever-control
+        run: |
+          # python3 -m pip install --upgrade pip setuptools wheel
+          # python3 --version
+          # pip3 --version
+          # pip3 show setuptools wheel
+          pip install -e .
+
+      - name: Check that all the files in `src/qlever/Qleverfiles` parse.
+        working-directory: ${{github.workspace}}/qlever-control
+        run: |
+          for QLEVERFILE in src/qlever/Qleverfiles/Qleverfile.*; do
+            echo
+            echo -e "\x1b[1;34mChecking ${QLEVERFILE}\x1b[0m"
+            echo
+            NAME=${QLEVERFILE##*.}
+            rm -f Qleverfile
+            qlever setup-config $NAME
+            qlever get-data --show
+            qlever index --show
+            qlever start --show
+            qlever ui --show
+            echo
+            echo -e "\x1b[34mAll checks passed for ${QLEVERFILE}\x1b[0m"
+            echo
+          done
diff --git a/src/qlever/Qleverfiles/Qleverfile.dblp-plus b/src/qlever/Qleverfiles/Qleverfile.dblp-plus
@@ -9,12 +9,12 @@
 [data]
 NAME              = dblp-plus
 GET_DATA_CMD      = wget -nc -O dblp.ttl.gz https://dblp.org/rdf/dblp.ttl.gz
-INDEX_DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
+DESCRIPTION       = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
 TEXT_DESCRIPTION  = All literals, search with FILTER KEYWORDS(?text, "...")
 
 [index]
 INPUT_FILES       = dblp.ttl.gz affiliations.nt affiliations.additions.nt citations.nt
-CAT_INPUT_FILES   = zcat -f ${RDF_FILES}
+CAT_INPUT_FILES   = zcat -f ${INPUT_FILES}
 SETTINGS_JSON     = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [ "<https://w3id.org", "<https://doi.org", "<http://dx.doi.org" ] }
 TEXT_INDEX        = from_literals
 

diff --git a/src/qlever/Qleverfiles/Qleverfile.default b/src/qlever/Qleverfiles/Qleverfile.default
@@ -31,7 +31,7 @@ SETTINGS_JSON   = { "num-triples-per-batch": 1000000 }
 # URL parameter `access_token`. It should not be easily guessable, unless you
 # don't mind others to get privileged access to your server.
 [server]
-PORT         =
+PORT         = 8888
 ACCESS_TOKEN = 
 
 # Use SYSTEM = docker to run QLever inside a docker container; the Docker image

diff --git a/src/qlever/Qleverfiles/Qleverfile.fbeasy b/src/qlever/Qleverfiles/Qleverfile.fbeasy
@@ -13,13 +13,13 @@ TEXT_DESCRIPTION  = Sentences from Wikipedia that mention at least one Freebase
 
 [index]
 INPUT_FILES     = fbeasy.nt
-CAT_INPUT_FILES = cat ${RDF_FILES}
+CAT_INPUT_FILES = cat ${INPUT_FILES}
 SETTINGS_JSON   = { "ascii-prefixes-only": true, "num-triples-per-batch": 10000000 }
 
 [server]
-PORT                        = 7003
-ACCESS_TOKEN                = ${data:NAME}_12631403
-MEMORY_FOR_QUERIES          = 5G
+PORT               = 7003
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 5G
 
 [runtime]
 SYSTEM = docker

diff --git a/src/qlever/Qleverfiles/Qleverfile.freebase b/src/qlever/Qleverfiles/Qleverfile.freebase
@@ -12,12 +12,12 @@ DESCRIPTION  = RDF data from ${DATA_URL}, latest (and final) version from 09.08.
 
 [index]
 INPUT_FILES     = freebase-rdf-latest.gz
-CAT_INPUT_FILES = zcat ${RDF_FILES}
+CAT_INPUT_FILES = zcat ${INPUT_FILES}
 SETTINGS_JSON   = { "languages-internal": [ "en" ], "prefixes-external": ["<"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 10000000 }
 
 [server]
 PORT               = 7002
-ACCESS_TOKEN       = ${data:NAME}_12631403
+ACCESS_TOKEN       = ${data:NAME}
 MEMORY_FOR_QUERIES = 10G
 
 [runtime]

diff --git a/src/qlever/Qleverfiles/Qleverfile.osm-planet b/src/qlever/Qleverfiles/Qleverfile.osm-planet
@@ -11,7 +11,7 @@
 NAME         = osm-planet
 DATA_URL     = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
 GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
-VERSION      = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y")
+VERSION      = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
 DESCRIPTION  = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
 
 [index]

diff --git a/src/qlever/Qleverfiles/Qleverfile.vvz b/src/qlever/Qleverfiles/Qleverfile.vvz
@@ -14,13 +14,13 @@ TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
 
 [index]
 INPUT_FILES     = vvz.ttl
-CAT_INPUT_FILES = cat ${FILE_NAMES}
-SETTINGS_JSON   = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000 }
+CAT_INPUT_FILES = cat ${INPUT_FILES}
+SETTINGS_JSON   = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
 TEXT_INDEX      = from_literals
 
 [server]
 PORT               = 7041
-ACCESS_TOKEN       = ${data:NAME}_8736426534
+ACCESS_TOKEN       = ${data:NAME}
 MEMORY_FOR_QUERIES = 10G
 
 [runtime]

diff --git a/src/qlever/Qleverfiles/Qleverfile.yago-4 b/src/qlever/Qleverfiles/Qleverfile.yago-4
@@ -16,14 +16,14 @@ DESCRIPTION  = "Full dump from https://yago-knowledge.org/downloads/yago-4, vers
 
 [index]
 INPUT_FILES     = yago-wd-*.nt.gz
-CAT_INPUT_FILES = zcat ${FILE_NAMES}
+CAT_INPUT_FILES = zcat ${INPUT_FILES}
 SETTINGS_JSON   = { "languages-internal": ["en"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
 STXXL_MEMORY    = 10G
 
 [server]
-PORT                        = 9004
-ACCESS_TOKEN                = ${DB}_2347348732
-MEMORY_FOR_QUERIES          = 30G
+PORT               = 9004
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 30G
 
 [runtime]
 SYSTEM = docker