From a6751ba7d0cd43f77e6b81a566929dec2b7d91db Mon Sep 17 00:00:00 2001
From: tanmay-9 <tanmaygarg9879@gmail.com>
Date: Sat, 25 Jan 2025 01:37:28 +0100
Subject: [PATCH] Incorporate changes to example-queries from extract-queries
 commit

---
 src/qlever/commands/example_queries.py | 80 +++++++++++++++++++-------
 1 file changed, 60 insertions(+), 20 deletions(-)

diff --git a/src/qlever/commands/example_queries.py b/src/qlever/commands/example_queries.py
index dca1f554..50b438ff 100644
--- a/src/qlever/commands/example_queries.py
+++ b/src/qlever/commands/example_queries.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 from typing import Any
 
+from rdflib import Graph
 from ruamel.yaml import YAML
 from ruamel.yaml.scalarstring import LiteralScalarString
 from termcolor import colored
@@ -191,8 +192,7 @@ def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
             return query_pretty_printed.rstrip()
         except Exception:
             log.error(
-                "Failed to pretty-print query, "
-                "returning original query: {e}"
+                "Failed to pretty-print query, returning original query: {e}"
             )
             return query.rstrip()
 
@@ -206,7 +206,7 @@ def sparql_query_type(self, query: str) -> str:
             return "UNKNOWN"
 
     @staticmethod
-    def parse_queries_file(queries_file: str) -> dict:
+    def parse_queries_file(queries_file: str) -> dict[str, list[str, str]]:
         """
         Parse a YAML file and validate its structure.
         """
@@ -225,9 +225,11 @@ def parse_queries_file(queries_file: str) -> dict:
         # Validate the structure
         if not isinstance(data, dict) or "queries" not in data:
             log.error(error_msg)
+            return {}
 
         if not isinstance(data["queries"], list):
             log.error(error_msg)
+            return {}
 
         for item in data["queries"]:
             if (
@@ -236,6 +238,7 @@ def parse_queries_file(queries_file: str) -> dict:
                 or "sparql" not in item
             ):
                 log.error(error_msg)
+                return {}
 
         return data
 
@@ -250,7 +253,9 @@ def get_example_queries(
         # yaml file case -> convert to tsv (description \t query)
         if queries_file is not None:
             queries_data = self.parse_queries_file(queries_file)
-            queries = queries_data["queries"]
+            queries = queries_data.get("queries")
+            if queries is None:
+                return []
             example_query_lines = [
                 f"{query['query']}\t{query['sparql']}" for query in queries
             ]
@@ -278,12 +283,21 @@ def execute(self, args) -> bool:
             log.error("Cannot have both --remove-offset-and-limit and --limit")
             return False
 
+        if args.generate_output_file:
+            if args.output_basename is None or args.backend_name is None:
+                log.error(
+                    "Both --output-basename and --backend-name parameters"
+                    " must be passed when --generate-output-file is passed"
+                )
+                return False
+            args.accept = "AUTO"
+
         # If `args.accept` is `application/sparql-results+json` or
         # `application/qlever-results+json` or `AUTO`, we need `jq`.
-        if (
-            args.accept == "application/sparql-results+json"
-            or args.accept == "application/qlever-results+json"
-            or args.accept == "AUTO"
+        if args.accept in (
+            "application/sparql-results+json",
+            "application/qlever-results+json",
+            "AUTO",
         ):
             try:
                 subprocess.run(
@@ -311,6 +325,8 @@ def execute(self, args) -> bool:
             not args.sparql_endpoint
             or args.sparql_endpoint.startswith("https://qlever")
         )
+        if args.generate_output_file:
+            is_qlever = is_qlever or "qlever" in args.backend_name.lower()
         if args.clear_cache == "yes" and not is_qlever:
             log.warning("Clearing the cache only works for QLever")
             args.clear_cache = "no"
@@ -345,6 +361,7 @@ def execute(self, args) -> bool:
         if args.show:
             return True
 
+        # Get the example queries either from queries_file or get_queries_cmd
         example_query_lines = (
             self.get_example_queries(get_queries_cmd=get_queries_cmd)
             if args.queries_file is None
@@ -454,10 +471,22 @@ def execute(self, args) -> bool:
             # queries and `application/sparql-results+json` for all others.
             accept_header = args.accept
             if accept_header == "AUTO":
-                if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
+                if query_type == "DESCRIBE":
                     accept_header = "text/turtle"
+                elif query_type == "CONSTRUCT":
+                    accept_header = (
+                        "application/qlever-results+json"
+                        if is_qlever and args.generate_output_file
+                        else "text/turtle"
+                    )
                 else:
                     accept_header = "application/sparql-results+json"
+                    if args.generate_output_file:
+                        accept_header = (
+                            "application/qlever-results+json"
+                            if is_qlever
+                            else "text/tab-separated-values"
+                        )
 
             # Launch query.
             try:
@@ -469,8 +498,7 @@ def execute(self, args) -> bool:
                 )
                 log.debug(curl_cmd)
                 result_file = (
-                    f"qlever.example_queries.result."
-                    f"{abs(hash(curl_cmd))}.tmp"
+                    f"qlever.example_queries.result.{abs(hash(curl_cmd))}.tmp"
                 )
                 start_time = time.time()
                 http_code = run_curl_command(
@@ -528,7 +556,7 @@ def get_json_error_msg(e: Exception) -> dict[str, str]:
                         result_size = run_command(
                             f"sed 1d {result_file}", return_output=True
                         )
-                    elif args.accept == "application/qlever-results+json":
+                    elif accept_header == "application/qlever-results+json":
                         try:
                             # sed cmd to get the number between 2nd and 3rd double_quotes
                             result_size = run_command(
@@ -642,11 +670,13 @@ def get_json_error_msg(e: Exception) -> dict[str, str]:
                 )
                 yaml_record = self.get_record_for_yaml(
                     query=description,
-                    sparql=self.get_pretty_printed_query(query, True),
+                    sparql=self.pretty_printed_query(
+                        query, args.show_prefixes
+                    ),
                     client_time=time_seconds,
                     result=results_for_yaml,
                     result_size=result_length,
-                    is_qlever=is_qlever,
+                    accept_header=accept_header,
                 )
                 yaml_records["queries"].append(yaml_record)
 
@@ -722,7 +752,7 @@ def get_record_for_yaml(
         client_time: float,
         result: str | dict[str, str],
         result_size: int | None,
-        is_qlever: bool,
+        accept_header: str,
     ) -> dict[str, Any]:
         """
         Construct a dictionary with query information for yaml file
@@ -742,9 +772,9 @@ def get_record_for_yaml(
                 else result_size
             )
             headers, results = self.get_query_results(
-                result, result_size, is_qlever
+                result, result_size, accept_header
             )
-            if is_qlever:
+            if accept_header == "application/qlever-results+json":
                 runtime_info_cmd = (
                     f"jq 'if .runtimeInformation then"
                     f" .runtimeInformation else"
@@ -761,23 +791,33 @@ def get_record_for_yaml(
         return record
 
     def get_query_results(
-        self, result_file: str, result_size: int, is_qlever: bool
+        self, result_file: str, result_size: int, accept_header: str
     ) -> tuple[list[str], list[list[str]]]:
         """
         Return headers and results as a tuple
         """
-        if not is_qlever:
+        if accept_header == "text/tab-separated-values":
             get_result_cmd = f"sed -n '1,{result_size + 1}p' {result_file}"
             results_str = run_command(get_result_cmd, return_output=True)
             results = results_str.splitlines()
             headers = [header for header in results[0].split("\t")]
             results = [result.split("\t") for result in results[1:]]
             return headers, results
-        else:
+        elif accept_header == "application/qlever-results+json":
             get_result_cmd = f"jq '{{headers: .selected, results: .res[0:{result_size}]}}' {result_file}"
             results_str = run_command(get_result_cmd, return_output=True)
             results_json = json.loads(results_str)
             return results_json["headers"], results_json["results"]
+        else:  # text/turtle
+            graph = Graph()
+            graph.parse(result_file, format="turtle")
+            headers = ["?subject", "?predicate", "?object"]
+            results = []
+            for i, (s, p, o) in enumerate(graph):
+                if i >= result_size:
+                    break
+                results.append([str(s), str(p), str(o)])
+            return headers, results
 
     @staticmethod
     def write_query_data_to_yaml(