Skip to content

Commit

Permalink
Add Qleverfile for osm-planet + more goodies
Browse files Browse the repository at this point in the history
1. A random alphanumerical string is now appended automatically to the
   ACCESS_TOKEN when the Qleverfile is copied to the cwd

2. Values can now be of the form $$(...), in which case the ... part is
   evaluated and the values becomes the result of that evluation
  • Loading branch information
Hannah Bast committed Mar 10, 2024
1 parent 7dc784a commit b4d7daf
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 13 deletions.
38 changes: 38 additions & 0 deletions src/qlever/Qleverfiles/Qleverfile.osm-planet
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Qleverfile for OSM Planet, use with the qlever script (pip install qlever)
#
# qlever get-data # takes ~50 mins to download .ttl.bz2 file of ~ 300 GB
# qlever index # takes ~12 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
# qlever start # takes a few seconds
#
# For the OSM data of a single country, do `qlever setup-config osm-country`
# and edit the Qleverfile to specify the country,

[data]
NAME = osm-planet
DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y")
DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)

[index]
INPUT_FILES = ${data:NAME}.ttl.bz2
CAT_INPUT_FILES = lbzcat -f -n 2 ${INPUT_FILES}
WITH_TEXT = false
STXXL_MEMORY = 20G
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }

[server]
PORT = 7007
ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 90G
CACHE_MAX_SIZE = 40G
CACHE_MAX_SIZE_SINGLE_ENTRY = 30G
TIMEOUT = 300s

[runtime]
SYSTEM = docker
IMAGE = docker.io/adfreiburg/qlever:latest

[ui]
UI_PORT = 7000
UI_CONFIG = osm-planet
6 changes: 5 additions & 1 deletion src/qlever/commands/setup-config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from qlever.command import QleverCommand
from qlever.log import log
from qlever.util import get_random_string


class SetupConfigCommand(QleverCommand):
Expand Down Expand Up @@ -35,7 +36,10 @@ def execute(self, args) -> bool:
# Construct the command line and show it.
qleverfile_path = (self.qleverfiles_path
/ f"Qleverfile.{args.config_name}")
setup_config_cmd = f"cp -a {qleverfile_path} Qleverfile"
setup_config_cmd = (
f"cat {qleverfile_path}"
f" | sed -E 's/(^ACCESS_TOKEN.*)/\\1_{get_random_string(12)}/'"
f"> Qleverfile")
self.show(setup_config_cmd, only_show=args.show)
if args.show:
return False
Expand Down
10 changes: 5 additions & 5 deletions src/qlever/commands/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def should_have_qleverfile(self) -> bool:
return True

def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
return {"data": ["name", "index_description", "text_description"],
return {"data": ["name", "description", "text_description"],
"server": ["server_binary", "port", "access_token",
"memory_for_queries", "cache_max_size",
"cache_max_size_single_entry",
Expand Down Expand Up @@ -163,8 +163,8 @@ def execute(self, args) -> bool:

# Set the access token if specified.
access_arg = f"--data-urlencode \"access-token={args.access_token}\""
if args.index_description:
desc = args.index_description
if args.description:
desc = args.description
curl_cmd = (f"curl -Gs http://localhost:{port}/api"
f" --data-urlencode \"index-description={desc}\""
f" {access_arg} > /dev/null")
Expand All @@ -174,9 +174,9 @@ def execute(self, args) -> bool:
except Exception as e:
log.error(f"Setting the index description failed ({e})")
if args.text_description:
desc = args.text_description
text_desc = args.text_description
curl_cmd = (f"curl -Gs http://localhost:{port}/api"
f" --data-urlencode \"text-description={desc}\""
f" --data-urlencode \"text-description={text_desc}\""
f" {access_arg} > /dev/null")
log.debug(curl_cmd)
try:
Expand Down
38 changes: 33 additions & 5 deletions src/qlever/qleverfile.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from __future__ import annotations

import re
import socket
import subprocess
from configparser import ConfigParser, ExtendedInterpolation

from qlever.containerize import Containerize
from qlever.log import log


class QleverfileException(Exception):
Expand Down Expand Up @@ -43,12 +46,13 @@ def arg(*args, **kwargs):
data_args["get_data_cmd"] = arg(
"--get-data-cmd", type=str, required=True,
help="The command to get the data")
data_args["index_description"] = arg(
"--index-description", type=str, required=True,
help="A concise description of the indexed dataset")
data_args["description"] = arg(
"--description", type=str, required=True,
help="A concise description of the dataset")
data_args["text_description"] = arg(
"--text-description", type=str, default=None,
help="A description of the indexed text if any")
help="A concice description of the addtional text data"
" if any")

index_args["input_files"] = arg(
"--input-files", type=str, required=True,
Expand Down Expand Up @@ -200,12 +204,36 @@ def read(qleverfile_path):
"""

# Read the Qleverfile.
config = ConfigParser(interpolation=ExtendedInterpolation())
defaults = {"random": "83724324hztz", "version": "01.01.01"}
config = ConfigParser(interpolation=ExtendedInterpolation(),
defaults=defaults)
try:
config.read(qleverfile_path)
except Exception as e:
raise QleverfileException(f"Error parsing {qleverfile_path}: {e}")

# Iterate over all sections and options and check if there are any
# values of the form $$(...) that need to be replaced.
for section in config.sections():
for option in config[section]:
value = config[section][option]
match = re.match(r"^\$\((.*)\)$", value)
if match:
try:
value = subprocess.check_output(
match.group(1), shell=True, text=True,
stderr=subprocess.STDOUT)
except Exception as e:
log.info("")
log.error(f"Error evaluating {value} for option "
f"{section}.{option.upper()} in "
f"{qleverfile_path}:")
log.info("")
log.info(e.output if hasattr(e, "output") else e)
exit(1)
config[section][option] = value
log.info(f"Set {section}.{option} to {value}")

# Make sure that all the sections are there.
for section in ["data", "index", "server", "runtime", "ui"]:
if section not in config:
Expand Down
17 changes: 15 additions & 2 deletions src/qlever/util.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from __future__ import annotations

import random
import re
import string
import subprocess
from datetime import date, datetime
from pathlib import Path

from qlever.log import log
import re
from datetime import datetime, date


def get_total_file_size(patterns: list[str]) -> int:
Expand Down Expand Up @@ -94,3 +97,13 @@ def show_table_line(pid, user, start_time, rss, cmdline):
except Exception as e:
log.error(f"Could not get process info: {e}")
return False


def get_random_string(length: int) -> str:
"""
Helper function that returns a randomly chosen string of the given
length. Take the current time as seed.
"""
random.seed(datetime.now())
return "".join(random.choices(string.ascii_letters + string.digits,
k=length))

0 comments on commit b4d7daf

Please sign in to comment.