From dd74d182a9f6d0f548eab5c2f97a1c840798998f Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sun, 8 Dec 2024 13:55:51 +0100 Subject: [PATCH] Extend `query` command by option `--pin-to-cache` (#99) Also, make the query a positional argument (instead of an argument of the option `--query`), but leave it optional and with a default value, like before. That way one can now write more naturally something like this: `qlever query "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"` or, with pinning to the cache: `qlever query --pin-to-cache "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }"` --- src/qlever/commands/query.py | 91 +++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 26 deletions(-) diff --git a/src/qlever/commands/query.py b/src/qlever/commands/query.py index 979cd5a5..6b3acbf0 100644 --- a/src/qlever/commands/query.py +++ b/src/qlever/commands/query.py @@ -18,39 +18,79 @@ def __init__(self): pass def description(self) -> str: - return ("Send a query to a SPARQL endpoint") + return "Send a query to a SPARQL endpoint" def should_have_qleverfile(self) -> bool: return False - def relevant_qleverfile_arguments(self) -> dict[str: list[str]]: - return {"server": ["port"]} + def relevant_qleverfile_arguments(self) -> dict[str : list[str]]: + return {"server": ["port", "access_token"]} def additional_arguments(self, subparser) -> None: - subparser.add_argument("--query", type=str, - default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", - help="SPARQL query to send") - subparser.add_argument("--sparql-endpoint", type=str, - help="URL of the SPARQL endpoint") - subparser.add_argument("--accept", type=str, - choices=["text/tab-separated-values", - "text/csv", - "application/sparql-results+json", - "application/sparql-results+xml", - "application/qlever-results+json"], - default="text/tab-separated-values", - help="Accept header for the SPARQL query") - subparser.add_argument("--no-time", action="store_true", - default=False, - help="Do not print the (end-to-end) time taken") + subparser.add_argument( + "query", + type=str, + nargs="?", + default="SELECT * WHERE { ?s ?p ?o } LIMIT 10", + help="SPARQL query to send", + ) + subparser.add_argument( + "--pin-to-cache", + action="store_true", + default=False, + help="Pin the query to the cache", + ) + subparser.add_argument( + "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint" + ) + subparser.add_argument( + "--accept", + type=str, + choices=[ + "text/tab-separated-values", + "text/csv", + "application/sparql-results+json", + "application/sparql-results+xml", + "application/qlever-results+json", + ], + default="text/tab-separated-values", + help="Accept header for the SPARQL query", + ) + subparser.add_argument( + "--no-time", + action="store_true", + default=False, + help="Do not print the (end-to-end) time taken", + ) def execute(self, args) -> bool: + # When pinning to the cache, set `send=0` and request media type + # `application/qlever-results+json` so that we get the result size. + # Also, we need to provide the access token. + if args.pin_to_cache: + args.accept = "application/qlever-results+json" + curl_cmd_additions = ( + f" --data pinresult=true --data send=0" + f" --data access-token=" + f"{shlex.quote(args.access_token)}" + f" | jq .resultsize | numfmt --grouping" + f" | xargs -I {{}} printf" + f' "Result pinned to cache,' + f' number of rows: {{}}\\n"' + ) + else: + curl_cmd_additions = "" + # Show what the command will do. - sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint - else f"localhost:{args.port}") - curl_cmd = (f"curl -s {sparql_endpoint}" - f" -H \"Accept: {args.accept}\"" - f" --data-urlencode query={shlex.quote(args.query)}") + sparql_endpoint = ( + args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}" + ) + curl_cmd = ( + f"curl -s {sparql_endpoint}" + f' -H "Accept: {args.accept}"' + f" --data-urlencode query={shlex.quote(args.query)}" + f"{curl_cmd_additions}" + ) self.show(curl_cmd, only_show=args.show) if args.show: return True @@ -62,8 +102,7 @@ def execute(self, args) -> bool: time_msecs = round(1000 * (time.time() - start_time)) if not args.no_time and args.log_level != "NO_LOG": log.info("") - log.info(f"Query processing time (end-to-end):" - f" {time_msecs:,d} ms") + log.info(f"Query processing time (end-to-end):" f" {time_msecs:,d} ms") except Exception as e: if args.log_level == "DEBUG": traceback.print_exc()