Skip to content

Commit

Permalink
Remove _GB or _gb from VARIABLES on option names
Browse files Browse the repository at this point in the history
For example, instead of MEMORY_QUERIES_GB = 30, now write MEMORY_QUERIES
= 30G. See ad-freiburg/qlever#1067
  • Loading branch information
Hannah Bast committed Dec 9, 2023
1 parent 840a478 commit 0cd9506
Show file tree
Hide file tree
Showing 13 changed files with 73 additions and 68 deletions.
10 changes: 5 additions & 5 deletions Qleverfiles/Qleverfile.dblp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 10000
WITH_TEXT_INDEX = from_literals

[server]
PORT = 7015
ACCESS_TOKEN = ${data:NAME}_7643543846
MEMORY_FOR_QUERIES_GB = 30
CACHE_MAX_SIZE_GB = 5
WITH_TEXT_INDEX = from_literals
PORT = 7015
ACCESS_TOKEN = ${data:NAME}_7643543846
MEMORY_FOR_QUERIES = 30G
CACHE_MAX_SIZE = 5G
WITH_TEXT_INDEX = from_literals

[docker]
USE_DOCKER = false
Expand Down
10 changes: 5 additions & 5 deletions Qleverfiles/Qleverfile.dblp-plus
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100
WITH_TEXT_INDEX = from_literals

[server]
PORT = 7027
ACCESS_TOKEN = ${data:NAME}_169238202
MEMORY_FOR_QUERIES_GB = 20
CACHE_MAX_SIZE_GB = 10
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 2
PORT = 7027
ACCESS_TOKEN = ${data:NAME}_169238202
MEMORY_FOR_QUERIES = 20G
CACHE_MAX_SIZE = 10G
CACHE_MAX_SIZE_SINGLE_ENTRY = 2G

[docker]
USE_DOCKER = false
Expand Down
10 changes: 5 additions & 5 deletions Qleverfiles/Qleverfile.fbeasy
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ FILE_NAMES = fbeasy.nt
CAT_FILES = cat ${RDF_FILES}

[server]
PORT = 7003
ACCESS_TOKEN = ${data:NAME}_12631403
MEMORY_FOR_QUERIES_GB = 10
CACHE_MAX_SIZE_GB = 5
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 2
PORT = 7003
ACCESS_TOKEN = ${data:NAME}_12631403
MEMORY_FOR_QUERIES = 10G
CACHE_MAX_SIZE = 5G
CACHE_MAX_SIZE_SINGLE_ENTRY = 2G

[docker]
USE_DOCKER = false
Expand Down
10 changes: 5 additions & 5 deletions Qleverfiles/Qleverfile.freebase
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ FILE_NAMES = freebase-rdf-latest.gz
CAT_FILES = zcat ${RDF_FILES}

[server]
PORT = 7002
ACCESS_TOKEN = ${data:NAME}_12631403
MEMORY_FOR_QUERIES_GB = 30
CACHE_MAX_SIZE_GB = 20
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
PORT = 7002
ACCESS_TOKEN = ${data:NAME}_12631403
MEMORY_FOR_QUERIES = 30G
CACHE_MAX_SIZE = 20G
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G

[docker]
USE_DOCKER = false
Expand Down
8 changes: 4 additions & 4 deletions Qleverfiles/Qleverfile.imdb
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ WITH_TEXT_INDEX = from_literals
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }

[server]
PORT = 7029
ACCESS_TOKEN = ${data:NAME}_1234567890
MEMORY_FOR_QUERIES_GB = 5
CACHE_MAX_SIZE_GB = 3
PORT = 7029
ACCESS_TOKEN = ${data:NAME}_1234567890
MEMORY_FOR_QUERIES = 5G
CACHE_MAX_SIZE = 3G

[docker]
USE_DOCKER = false
Expand Down
8 changes: 4 additions & 4 deletions Qleverfiles/Qleverfile.olympics
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ CAT_FILES = cat ${FILE_NAMES}
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }

[server]
PORT = 7019
ACCESS_TOKEN = ${data:NAME}_7643543846
MEMORY_FOR_QUERIES_GB = 5
CACHE_MAX_SIZE_GB = 2
PORT = 7019
ACCESS_TOKEN = ${data:NAME}_7643543846
MEMORY_FOR_QUERIES = 5G
CACHE_MAX_SIZE = 2G

[docker]
USE_DOCKER = false
Expand Down
16 changes: 8 additions & 8 deletions Qleverfiles/Qleverfile.osm-country
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,19 @@ PBF = ${DB}.pbf
RDF_FILES = "${DB}.ttl.bz2"
CAT_FILES = "bzcat ${RDF_FILES}"
WITH_TEXT = false
STXXL_MEMORY_GB = 10
STXXL_MEMORY = 10
SETTINGS_JSON = '{ "prefixes-external": [ "\"LINESTRING(", "\"MULTIPOLYGON(", "\"POLYGON(" ], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }'
GET_DATA_CMD = "wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${DB}.*.bz2; ( time /local/data/osm2rdf/build/apps/osm2rdf ${PBF} -o ${DB}.ttl --cache . --write-geometric-relation-statistics ) 2>&1 | tee ${DB}.osm2rdf-log.txt; rm -f spatial-*"
INDEX_DESCRIPTION = "OSM ${COUNTRY^}, dump from $(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6) with ogc:contains"

# Server settings
HOSTNAME = $(hostname -f)
SERVER_PORT = 7025
ACCESS_TOKEN = ${DB}_%RANDOM%
MEMORY_FOR_QUERIES = 20
CACHE_MAX_SIZE_GB = 10
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
CACHE_MAX_NUM_ENTRIES = 100
HOSTNAME = $(hostname -f)
SERVER_PORT = 7025
ACCESS_TOKEN = ${DB}_%RANDOM%
MEMORY_FOR_QUERIES = 20G
CACHE_MAX_SIZE = 10G
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
CACHE_MAX_NUM_ENTRIES = 100

# QLever binaries
QLEVER_BIN_DIR = %QLEVER_BIN_DIR%
Expand Down
4 changes: 2 additions & 2 deletions Qleverfiles/Qleverfile.pubchem
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ INDEX_DESCRIPTION = PubChem RDF from ${GET_DATA_URL}, version 29.10.2023 (all fo
FILE_NAMES = nt/*.nt.gz nt/*/*.nt.gz nt/*/*/*.nt.gz
CAT_FILES = zcat ${FILE_NAMES}
WITH_TEXT_INDEX = false
STXXL_MEMORY_GB = 10
STXXL_MEMORY = 10G
SETTINGS_JSON = '{ "languages-internal": [""], "prefixes-external": [ "<http://rdf.ncbi.nlm.nih.gov/pubchem/" ], "ascii-prefixes-only": true, "num-triples-per-batch": 1000000 }'


[server]
PORT = 7023
ACCESS_TOKEN = ${NAME}_310129823
MEMORY_FOR_QUERIES_GB = 50
MEMORY_FOR_QUERIES = 50G

[docker]
USE_DOCKER = false
Expand Down
8 changes: 4 additions & 4 deletions Qleverfiles/Qleverfile.scientists
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ DB = scientists
RDF_FILES = "${DB}.nt"
CAT_FILES = "cat ${RDF_FILES}"
WITH_TEXT_INDEX = from_text_records_and_literals
STXXL_MEMORY_GB = 1
STXXL_MEMORY = 1G
SETTINGS_JSON = '{ "ascii-prefixes-only": true, "num-triples-per-batch": 100000 }'
GET_DATA_CMD = "wget https://github.com/ad-freiburg/qlever/raw/master/e2e/scientist-collection.zip && unzip -j scientist-collection.zip && rm -f scientist-collection.zip"
INDEX_DESCRIPTION = "Scientist collection from QLever's end-to-end test, see https://github.com/ad-freiburg/qlever/tree/master/e2e"
Expand All @@ -22,9 +22,9 @@ TEXT_DESCRIPTION = "Literals (use FILTER CONTAINS) and Wikipedia articles (use
HOSTNAME = $(hostname -f)
SERVER_PORT = 7020
ACCESS_TOKEN = ${DB}_%RANDOM%
MEMORY_FOR_QUERIES = 5
CACHE_MAX_SIZE_GB = 2
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 1
MEMORY_FOR_QUERIES = 5G
CACHE_MAX_SIZE = 2G
CACHE_MAX_SIZE_SINGLE_ENTRY = 1G
CACHE_MAX_NUM_ENTRIES = 100

# QLever binaries
Expand Down
8 changes: 4 additions & 4 deletions Qleverfiles/Qleverfile.uniprot
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ EXTRACT_PREFIXES = "for F in ${RDF_FILES}; do xzcat \$F | head -1000 | \grep ^@
CAT_FILES = "xzcat -f ${DB}.prefix-definitions ${RDF_FILES}"
WITH_TEXT_INDEX = false
PSO_AND_POS_ONLY = true
STXXL_MEMORY_GB = 80
STXXL_MEMORY = 80G
SETTINGS_JSON = '{ "languages-internal": ["en"], "prefixes-external": [ "<http://purl.uniprot.org/uniprot/", "<http://purl.uniprot.org/uniparc/", "<http://purl.uniprot.org/uniref/", "<http://purl.uniprot.org/isoforms/", "<http://purl.uniprot.org/range/", "<http://purl.uniprot.org/position/", "<http://purl.uniprot.org/refseq/", "<http://purl.uniprot.org/embl-cds/", "<http://purl.uniprot.org/EMBL", "<http://purl.uniprot.org/PATRIC", "<http://purl.uniprot.org/SEED", "<http://purl.uniprot.org/gi", "<http://rdf.ebi.ac.uk/resource", "<http://purl.uniprot.org/SHA-384" ], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-partial-vocab": 20000000 }'
GET_DATA_CMD = 'echo "Please read https://github.com/ad-freiburg/qlever/wiki/Using-QLever-for-UniProt"'
INDEX_DESCRIPTION = "Complete UniProt data from https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf, version ${DATE}"
Expand All @@ -21,9 +21,9 @@ INDEX_DESCRIPTION = "Complete UniProt data from https://ftp.uniprot.org/pub/data
HOSTNAME = $(hostname -f)
SERVER_PORT = 7018
ACCESS_TOKEN = ${DB}_%RANDOM%
MEMORY_FOR_QUERIES = 80
CACHE_MAX_SIZE_GB = 50
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 10
MEMORY_FOR_QUERIES = 80G
CACHE_MAX_SIZE = 50G
CACHE_MAX_SIZE_SINGLE_ENTRY = 10G
CACHE_MAX_NUM_ENTRIES = 100

# QLever binaries
Expand Down
6 changes: 3 additions & 3 deletions Qleverfiles/Qleverfile.wikidata
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ FILE_NAMES = latest-lexemes.ttl.bz2 latest-all.ttl.bz2
CAT_FILES = bzcat ${FILE_NAMES}
SETTINGS_JSON = { "languages-internal": ["en"], "prefixes-external": [ "<http://www.wikidata.org/entity/statement", "<http://www.wikidata.org/value", "<http://www.wikidata.org/reference" ], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
WITH_TEXT_INDEX = false
STXXL_MEMORY_GB = 10
STXXL_MEMORY = 10g

[server]
PORT = 7001
ACCESS_TOKEN = ${data:NAME}_372483264
MEMORY_FOR_QUERIES_GB = 50
CACHE_MAX_SIZE_GB = 30
MEMORY_FOR_QUERIES = 50G
CACHE_MAX_SIZE = 30G

[docker]
USE_DOCKER = true
Expand Down
16 changes: 8 additions & 8 deletions Qleverfiles/Qleverfile.yago-4
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ DB = yago-4
RDF_FILES = "yago-wd-*.nt.gz"
CAT_FILES = "zcat ${RDF_FILES}"
WITH_TEXT_INDEX = false
STXXL_MEMORY_GB = 10
STXXL_MEMORY = 10G
SETTINGS_JSON = '{ "languages-internal": ["en"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }'

# Get the data
Expand All @@ -21,13 +21,13 @@ GET_DATA_CMD = "wget -nc https://yago-knowledge.org/data/yago4/full/2020-02
INDEX_DESCRIPTION = "Full dump from https://yago-knowledge.org/downloads/yago-4, version 12.03.2020"

# Server settings
HOSTNAME = $(hostname -f)
SERVER_PORT = 9004
ACCESS_TOKEN = ${DB}_2347348732
MEMORY_FOR_QUERIES = 30
CACHE_MAX_SIZE_GB = 20
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
CACHE_MAX_NUM_ENTRIES = 100
HOSTNAME = $(hostname -f)
SERVER_PORT = 9004
ACCESS_TOKEN = ${DB}_2347348732
MEMORY_FOR_QUERIES = 30G
CACHE_MAX_SIZE = 20G
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
CACHE_MAX_NUM_ENTRIES = 100

# QLever binaries
QLEVER_BIN_DIR = /local/data/qlever/qlever-code/build
Expand Down
27 changes: 16 additions & 11 deletions qlever
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ class Actions:
"port": "7000",
"binary": "ServerMain",
"num_threads": "8",
"cache_max_size_gb": "5",
"cache_max_size_gb_single_entry": "1",
"cache_max_size": "5G",
"cache_max_size_single_entry": "1G",
"cache_max_num_entries": "100",
"with_text_index": "false",
"only_pso_and_pos_permutations": "false",
Expand Down Expand Up @@ -439,8 +439,8 @@ class Actions:
if index_config['with_text_index'] in \
["from_literals", "from_text_records_and_literals"]:
cmdline += " --text-words-from-literals"
if 'stxxl_memory_gb' in index_config:
cmdline += f" --stxxl-memory-gb {index_config['stxxl_memory_gb']}"
if 'stxxl_memory' in index_config:
cmdline += f" --stxxl-memory {index_config['stxxl_memory']}"
cmdline += f" | tee {self.name}.index-log.txt"

# If the total file size is larger than 10 GB, set ulimit (such that a
Expand Down Expand Up @@ -544,9 +544,9 @@ class Actions:
f" -i {self.name}"
f" -j {server_config['num_threads']}"
f" -p {server_config['port']}"
f" -m {server_config['memory_for_queries_gb']}"
f" -c {server_config['cache_max_size_gb']}"
f" -e {server_config['cache_max_size_gb_single_entry']}"
f" -m {server_config['memory_for_queries']}"
f" -c {server_config['cache_max_size']}"
f" -e {server_config['cache_max_size_single_entry']}"
f" -k {server_config['cache_max_num_entries']}")
if server_config['access_token']:
cmdline += f" -a {server_config['access_token']}"
Expand Down Expand Up @@ -799,9 +799,11 @@ class Actions:

# Helper lambda that finds the next line matching the given `regex`,
# starting from `current_line`, and extracts the time. Returns a tuple
# of the time and the regex match object. If a matchine is found,
# of the time and the regex match object. If a match is found,
# `current_line` is updated to the line after the match. Otherwise,
# `current_line` is not changed.
# `current_line` will be one beyond the last line, unless
# `line_is_optional` is true, in which case it will be the same as
# when the function was entered.
def find_next_line(regex, line_is_optional=False):
nonlocal lines
nonlocal current_line
Expand Down Expand Up @@ -840,14 +842,17 @@ class Actions:
break
_, perm_info = find_next_line(r"INFO:\s*Writing meta data for"
r" ([A-Z]+ and [A-Z]+)", True)
if perm_info is None:
break
# if perm_info is None:
# break
perm_begin_and_info.append((perm_begin, perm_info))
convert_end = (perm_begin_and_info[0][0] if
len(perm_begin_and_info) > 0 else None)
normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True)
text_end, _ = find_next_line(r"INFO:\s*DocsDB done", True)
# print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info)
# print("DEBUG:", overall_begin)
# print("DEBUG:", normal_end)

# Check whether at least the first phase is done.
if overall_begin is None:
Expand Down

0 comments on commit 0cd9506

Please sign in to comment.