Skip to content

Commit

Permalink
Merge branch 'ad-freiburg:main' into improve_code_for_stop
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonL22 authored Dec 23, 2024
2 parents 695e59c + c9894be commit 3df9c0b
Show file tree
Hide file tree
Showing 41 changed files with 2,367 additions and 659 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Unit Tests

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
unit_tests:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["pypy3.9", "pypy3.10", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{matrix.python-version}}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .
pip install pytest pytest-cov
- name: Test with pytest
run: |
pytest -v
51 changes: 51 additions & 0 deletions .github/workflows/qleverfiles-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: Qleverfiles check

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
merge_group:

jobs:
qleverfiles-check:
runs-on: ${{matrix.os}}
strategy:
fail-fast: true
matrix:
os: [ubuntu-24.04]

steps:
- name: Checkout the repository for the qlever script
uses: actions/checkout@v3
with:
path: qlever-control

- name: Install the script locally
working-directory: ${{github.workspace}}/qlever-control
run: |
# python3 -m pip install --upgrade pip setuptools wheel
# python3 --version
# pip3 --version
# pip3 show setuptools wheel
pip install -e .
- name: Check that all the files in `src/qlever/Qleverfiles` parse.
working-directory: ${{github.workspace}}/qlever-control
run: |
export QLEVER_ARGCOMPLETE_ENABLED=1
for QLEVERFILE in src/qlever/Qleverfiles/Qleverfile.*; do
echo
echo -e "\x1b[1;34mChecking ${QLEVERFILE}\x1b[0m"
echo
NAME=${QLEVERFILE##*.}
rm -f Qleverfile
qlever setup-config $NAME
qlever get-data --show
qlever index --show
qlever start --show
qlever ui --show
echo
echo -e "\x1b[34mAll checks passed for ${QLEVERFILE}\x1b[0m"
echo
done
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "qlever"
description = "Script for using the QLever SPARQL engine."
version = "0.5.6"
version = "0.5.14"
authors = [
{ name = "Hannah Bast", email = "[email protected]" }
]
Expand Down Expand Up @@ -35,3 +35,8 @@ package-data = { "qlever" = ["Qleverfiles/*"] }

[tool.pytest.ini_options]
pythonpath = ["src"]

[tool.ruff]
line-length = 79
[tool.ruff.lint]
extend-select = ["I"]
24 changes: 14 additions & 10 deletions src/qlever/Qleverfiles/Qleverfile.dblp
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
# Qleverfile for DBLP, use with https://github.com/ad-freiburg/qlever-control
# Qleverfile for DBLP, use with QLever CLI (`pip install qlever`)
#
# qlever get-data # takes ~3 mins (downloads .ttl.gz file of size ~3 GB)
# qlever index # takes ~4 mins (on an AMD Ryzen 9 5900X)
# qlever start # takes a few seconds
# qlever get-data # ~1 min, ~5 GB compressed, 1.3 B triples
# qlever index # ~30 min, ~20 GB RAM, ~25 GB index size on disk
# qlever start # ~3 s, adjust MEMORY_FOR_QUERIES as needed
#
# Measured on an AMD Ryzen 9 5950X with 128 GB RAM, and NVMe SSD (25.10.2024)

[data]
NAME = dblp
GET_DATA_URL = https://dblp.org/rdf/dblp.ttl.gz
GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL} 2>&1 | tee ${data:NAME}.download-log.txt
DATA_TARFILE = dblp_KG_with_associated_data.tar
GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL} (version ${VERSION})
DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION})
FORMAT = ttl

[index]
INPUT_FILES = dblp.ttl.gz
CAT_INPUT_FILES = zcat ${INPUT_FILES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
INPUT_FILES = *.gz
MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 5000000, "prefixes-external": [""] }

[server]
PORT = 7015
Expand Down
4 changes: 2 additions & 2 deletions src/qlever/Qleverfiles/Qleverfile.dblp-plus
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
[data]
NAME = dblp-plus
GET_DATA_CMD = wget -nc -O dblp.ttl.gz https://dblp.org/rdf/dblp.ttl.gz
INDEX_DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")

[index]
INPUT_FILES = dblp.ttl.gz affiliations.nt affiliations.additions.nt citations.nt
CAT_INPUT_FILES = zcat -f ${RDF_FILES}
CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [ "<https://w3id.org", "<https://doi.org", "<http://dx.doi.org" ] }
TEXT_INDEX = from_literals

Expand Down
2 changes: 1 addition & 1 deletion src/qlever/Qleverfiles/Qleverfile.default
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
# URL parameter `access_token`. It should not be easily guessable, unless you
# don't mind others to get privileged access to your server.
[server]
PORT =
PORT = 8888
ACCESS_TOKEN =

# Use SYSTEM = docker to run QLever inside a docker container; the Docker image
Expand Down
8 changes: 4 additions & 4 deletions src/qlever/Qleverfiles/Qleverfile.fbeasy
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ TEXT_DESCRIPTION = Sentences from Wikipedia that mention at least one Freebase

[index]
INPUT_FILES = fbeasy.nt
CAT_INPUT_FILES = cat ${RDF_FILES}
CAT_INPUT_FILES = cat ${INPUT_FILES}
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 10000000 }

[server]
PORT = 7003
ACCESS_TOKEN = ${data:NAME}_12631403
MEMORY_FOR_QUERIES = 5G
PORT = 7003
ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 5G

[runtime]
SYSTEM = docker
Expand Down
4 changes: 2 additions & 2 deletions src/qlever/Qleverfiles/Qleverfile.freebase
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ DESCRIPTION = RDF data from ${DATA_URL}, latest (and final) version from 09.08.

[index]
INPUT_FILES = freebase-rdf-latest.gz
CAT_INPUT_FILES = zcat ${RDF_FILES}
CAT_INPUT_FILES = zcat ${INPUT_FILES}
SETTINGS_JSON = { "languages-internal": [ "en" ], "prefixes-external": ["<"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 10000000 }

[server]
PORT = 7002
ACCESS_TOKEN = ${data:NAME}_12631403
ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 10G

[runtime]
Expand Down
2 changes: 1 addition & 1 deletion src/qlever/Qleverfiles/Qleverfile.imdb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 5G

[runtime]
SYSTEM = native
SYSTEM = docker
IMAGE = docker.io/adfreiburg/qlever:latest

[ui]
Expand Down
30 changes: 30 additions & 0 deletions src/qlever/Qleverfiles/Qleverfile.orkg
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Qleverfile for ORKG, use with the QLever CLI (`pip install qlever`)
#
# qlever get-data # Get the dataset
# qlever index # Build index data structures
# qlever start # Start the server

[data]
NAME = orkg
GET_DATA_URL = https://orkg.org/api/rdf/dump
GET_DATA_CMD = curl -LR -o ${NAME}.ttl ${GET_DATA_URL} 2>&1 | tee ${NAME}.download-log.txt
VERSION = $$(date -r ${NAME}.ttl +%d.%m.%Y || echo "NO_DATE")
DESCRIPTION = The Open Research Knowledge Graph (ORKG) (data from ${GET_DATA_URL}, version ${VERSION})

[index]
INPUT_FILES = ${data:NAME}.ttl
CAT_INPUT_FILES = cat ${INPUT_FILES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }

[server]
PORT = 7053
ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 10G
CACHE_MAX_SIZE = 5G

[runtime]
SYSTEM = docker
IMAGE = docker.io/adfreiburg/qlever:latest

[ui]
UI_CONFIG = orkg
2 changes: 1 addition & 1 deletion src/qlever/Qleverfiles/Qleverfile.osm-planet
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
NAME = osm-planet
DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y")
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)

[index]
Expand Down
Loading

0 comments on commit 3df9c0b

Please sign in to comment.