From a6751ba7d0cd43f77e6b81a566929dec2b7d91db Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 25 Jan 2025 01:37:28 +0100 Subject: [PATCH] Incorporate changes to example-queries from extract-queries commit --- src/qlever/commands/example_queries.py | 80 +++++++++++++++++++------- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/src/qlever/commands/example_queries.py b/src/qlever/commands/example_queries.py index dca1f554..50b438ff 100644 --- a/src/qlever/commands/example_queries.py +++ b/src/qlever/commands/example_queries.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Any +from rdflib import Graph from ruamel.yaml import YAML from ruamel.yaml.scalarstring import LiteralScalarString from termcolor import colored @@ -191,8 +192,7 @@ def pretty_printed_query(self, query: str, show_prefixes: bool) -> str: return query_pretty_printed.rstrip() except Exception: log.error( - "Failed to pretty-print query, " - "returning original query: {e}" + "Failed to pretty-print query, returning original query: {e}" ) return query.rstrip() @@ -206,7 +206,7 @@ def sparql_query_type(self, query: str) -> str: return "UNKNOWN" @staticmethod - def parse_queries_file(queries_file: str) -> dict: + def parse_queries_file(queries_file: str) -> dict[str, list[str, str]]: """ Parse a YAML file and validate its structure. """ @@ -225,9 +225,11 @@ def parse_queries_file(queries_file: str) -> dict: # Validate the structure if not isinstance(data, dict) or "queries" not in data: log.error(error_msg) + return {} if not isinstance(data["queries"], list): log.error(error_msg) + return {} for item in data["queries"]: if ( @@ -236,6 +238,7 @@ def parse_queries_file(queries_file: str) -> dict: or "sparql" not in item ): log.error(error_msg) + return {} return data @@ -250,7 +253,9 @@ def get_example_queries( # yaml file case -> convert to tsv (description \t query) if queries_file is not None: queries_data = self.parse_queries_file(queries_file) - queries = queries_data["queries"] + queries = queries_data.get("queries") + if queries is None: + return [] example_query_lines = [ f"{query['query']}\t{query['sparql']}" for query in queries ] @@ -278,12 +283,21 @@ def execute(self, args) -> bool: log.error("Cannot have both --remove-offset-and-limit and --limit") return False + if args.generate_output_file: + if args.output_basename is None or args.backend_name is None: + log.error( + "Both --output-basename and --backend-name parameters" + " must be passed when --generate-output-file is passed" + ) + return False + args.accept = "AUTO" + # If `args.accept` is `application/sparql-results+json` or # `application/qlever-results+json` or `AUTO`, we need `jq`. - if ( - args.accept == "application/sparql-results+json" - or args.accept == "application/qlever-results+json" - or args.accept == "AUTO" + if args.accept in ( + "application/sparql-results+json", + "application/qlever-results+json", + "AUTO", ): try: subprocess.run( @@ -311,6 +325,8 @@ def execute(self, args) -> bool: not args.sparql_endpoint or args.sparql_endpoint.startswith("https://qlever") ) + if args.generate_output_file: + is_qlever = is_qlever or "qlever" in args.backend_name.lower() if args.clear_cache == "yes" and not is_qlever: log.warning("Clearing the cache only works for QLever") args.clear_cache = "no" @@ -345,6 +361,7 @@ def execute(self, args) -> bool: if args.show: return True + # Get the example queries either from queries_file or get_queries_cmd example_query_lines = ( self.get_example_queries(get_queries_cmd=get_queries_cmd) if args.queries_file is None @@ -454,10 +471,22 @@ def execute(self, args) -> bool: # queries and `application/sparql-results+json` for all others. accept_header = args.accept if accept_header == "AUTO": - if query_type == "CONSTRUCT" or query_type == "DESCRIBE": + if query_type == "DESCRIBE": accept_header = "text/turtle" + elif query_type == "CONSTRUCT": + accept_header = ( + "application/qlever-results+json" + if is_qlever and args.generate_output_file + else "text/turtle" + ) else: accept_header = "application/sparql-results+json" + if args.generate_output_file: + accept_header = ( + "application/qlever-results+json" + if is_qlever + else "text/tab-separated-values" + ) # Launch query. try: @@ -469,8 +498,7 @@ def execute(self, args) -> bool: ) log.debug(curl_cmd) result_file = ( - f"qlever.example_queries.result." - f"{abs(hash(curl_cmd))}.tmp" + f"qlever.example_queries.result.{abs(hash(curl_cmd))}.tmp" ) start_time = time.time() http_code = run_curl_command( @@ -528,7 +556,7 @@ def get_json_error_msg(e: Exception) -> dict[str, str]: result_size = run_command( f"sed 1d {result_file}", return_output=True ) - elif args.accept == "application/qlever-results+json": + elif accept_header == "application/qlever-results+json": try: # sed cmd to get the number between 2nd and 3rd double_quotes result_size = run_command( @@ -642,11 +670,13 @@ def get_json_error_msg(e: Exception) -> dict[str, str]: ) yaml_record = self.get_record_for_yaml( query=description, - sparql=self.get_pretty_printed_query(query, True), + sparql=self.pretty_printed_query( + query, args.show_prefixes + ), client_time=time_seconds, result=results_for_yaml, result_size=result_length, - is_qlever=is_qlever, + accept_header=accept_header, ) yaml_records["queries"].append(yaml_record) @@ -722,7 +752,7 @@ def get_record_for_yaml( client_time: float, result: str | dict[str, str], result_size: int | None, - is_qlever: bool, + accept_header: str, ) -> dict[str, Any]: """ Construct a dictionary with query information for yaml file @@ -742,9 +772,9 @@ def get_record_for_yaml( else result_size ) headers, results = self.get_query_results( - result, result_size, is_qlever + result, result_size, accept_header ) - if is_qlever: + if accept_header == "application/qlever-results+json": runtime_info_cmd = ( f"jq 'if .runtimeInformation then" f" .runtimeInformation else" @@ -761,23 +791,33 @@ def get_record_for_yaml( return record def get_query_results( - self, result_file: str, result_size: int, is_qlever: bool + self, result_file: str, result_size: int, accept_header: str ) -> tuple[list[str], list[list[str]]]: """ Return headers and results as a tuple """ - if not is_qlever: + if accept_header == "text/tab-separated-values": get_result_cmd = f"sed -n '1,{result_size + 1}p' {result_file}" results_str = run_command(get_result_cmd, return_output=True) results = results_str.splitlines() headers = [header for header in results[0].split("\t")] results = [result.split("\t") for result in results[1:]] return headers, results - else: + elif accept_header == "application/qlever-results+json": get_result_cmd = f"jq '{{headers: .selected, results: .res[0:{result_size}]}}' {result_file}" results_str = run_command(get_result_cmd, return_output=True) results_json = json.loads(results_str) return results_json["headers"], results_json["results"] + else: # text/turtle + graph = Graph() + graph.parse(result_file, format="turtle") + headers = ["?subject", "?predicate", "?object"] + results = [] + for i, (s, p, o) in enumerate(graph): + if i >= result_size: + break + results.append([str(s), str(p), str(o)]) + return headers, results @staticmethod def write_query_data_to_yaml(