Some refactor

dtim-upc · Feb 27, 2024 · b3ee6b3 · b3ee6b3
1 parent 1378a88
commit b3ee6b3
Show file tree

Hide file tree

Showing 36 changed files with 474 additions and 189 deletions.
diff --git a/Modules/IntentSpecification2WorkflowGenerator/.idea/.gitignore b/Modules/IntentSpecification2WorkflowGenerator/.idea/.gitignore
diff --git a/...les/IntentSpecification2WorkflowGenerator/.idea/IntentSpecification2WorkflowGenerator.iml b/...les/IntentSpecification2WorkflowGenerator/.idea/IntentSpecification2WorkflowGenerator.iml
diff --git a/Modules/IntentSpecification2WorkflowGenerator/api/api_main.py b/Modules/IntentSpecification2WorkflowGenerator/api/api_main.py
@@ -104,6 +104,10 @@ def download_knime():
     plan_graph = Graph().parse(data=request.json.get("graph", ""), format='turtle')
     ontology = Graph().parse(data=request.json.get('ontology', ''), format='turtle')
 
+    plan_graph.print()
+    print("--------------")
+    # ontology.print()
+
     file_path = os.path.join(files_folder, f'{uuid.uuid4()}.ttl')
     plan_graph.serialize(file_path, format='turtle')
 

diff --git a/Modules/IntentSpecification2WorkflowGenerator/api/temp_files/knime/Decision Tree 1.knwf b/Modules/IntentSpecification2WorkflowGenerator/api/temp_files/knime/Decision Tree 1.knwf
diff --git a/Modules/IntentSpecification2WorkflowGenerator/api/temp_files/knime/Decision Tree 2.knwf b/Modules/IntentSpecification2WorkflowGenerator/api/temp_files/knime/Decision Tree 2.knwf
diff --git a/Modules/IntentSpecification2WorkflowGenerator/api/temp_files/knime/Decision Tree 3.knwf b/Modules/IntentSpecification2WorkflowGenerator/api/temp_files/knime/Decision Tree 3.knwf
diff --git a/Modules/IntentSpecification2WorkflowGenerator/dataset_annotator/annotator.py b/Modules/IntentSpecification2WorkflowGenerator/dataset_annotator/annotator.py
@@ -10,7 +10,7 @@
 def add_dataset_info(dataset_path, graph, label):
     dataset_node = ab.term(path.basename(dataset_path))
     graph.add((dataset_node, RDF.type, dmop.TabularDataset))
-    dataset = pd.read_csv(dataset_path)
+    dataset = pd.read_csv(dataset_path, encoding='latin', delimiter=";")
     add_csv_info(dataset_path, dataset, dataset_node, graph)
     add_column_info(dataset_path, dataset, dataset_node, graph, label)
 

diff --git a/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/implementations/CSVBootstrap.java b/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/implementations/CSVBootstrap.java
@@ -17,9 +17,12 @@
 import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Objects;
 import java.util.stream.Collectors;
 
 import static edu.upc.essi.dtim.nextiabs.utils.DF_MMtoRDFS.productionRulesDataframe_to_RDFS;
+import static edu.upc.essi.dtim.nextiabs.utils.Utils.reformatName;
 
 /**
  * Generates an RDFS-compliant representation of a CSV file schema
@@ -58,15 +61,15 @@ public Graph bootstrapSchema(Boolean generateMetadata) {
 		G_target.addTriple(createIRI(name), RDF.type, DataFrame_MM.DataFrame);
 		G_target.addTripleLiteral(createIRI(name), RDFS.label, name);
 		parser.getHeaderNames().forEach(h -> {
-			String h2 = h.replace("\"", "").trim();
+			String h2 = reformatName(h);
 			G_target.addTriple(createIRI(h2),RDF.type,DataFrame_MM.Data);
 			G_target.addTripleLiteral(createIRI(h2), RDFS.label,h2 );
 			G_target.addTriple(createIRI(name),DataFrame_MM.hasData,createIRI(h2));
 			G_target.addTriple(createIRI(h2),DataFrame_MM.hasDataType,DataFrame_MM.String);
 		});
 
-		String select =  parser.getHeaderNames().stream().map(a ->  a + " AS `" + a.replace(".","_") + "`").collect(Collectors.joining(", "));
-		wrapper = "SELECT " + select  + " FROM " + name;
+		String select =  parser.getHeaderNames().stream().map(a ->  a + " AS " + reformatName(a)).collect(Collectors.joining(", "));
+		wrapper = "SELECT " + select  + " FROM `" + name + "`";
 
 		//TODO: implement metadata
 //		if(generateMetadata)

diff --git a/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/implementations/JSONBootstrap.java b/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/implementations/JSONBootstrap.java
@@ -14,10 +14,7 @@
 import org.apache.commons.compress.utils.Lists;
 import org.apache.commons.lang3.tuple.Pair;
 
-import javax.json.Json;
-import javax.json.JsonArray;
-import javax.json.JsonObject;
-import javax.json.JsonValue;
+import javax.json.*;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.InputStream;
@@ -27,6 +24,7 @@
 import java.util.stream.Collectors;
 
 import static edu.upc.essi.dtim.nextiabs.utils.DF_MMtoRDFS.productionRulesDataframe_to_RDFS;
+import static edu.upc.essi.dtim.nextiabs.utils.Utils.reformatName;
 
 public class JSONBootstrap extends DataSource implements IBootstrap<Graph>, BootstrapODIN {
     // Using DataFrame_MM and without Jena
@@ -67,7 +65,7 @@ public Graph bootstrapSchema(Boolean generateMetadata) {
 
         String SELECT = attributesSWJ.entrySet().stream().map( p -> {
             if (p.getKey().equals(p.getValue().getKey())) return p.getValue().getPath() + " AS `" + p.getKey() + "`";
-            return  p.getValue().getPath() + " AS `" + p.getValue().getLabel() + "`";
+            return  p.getValue().getPath() + " AS " + reformatName(p.getValue().getLabel());
         }).collect(Collectors.joining(", "));
 
 
@@ -76,8 +74,8 @@ public Graph bootstrapSchema(Boolean generateMetadata) {
 //            System.out.println(s.getLeft() + " ---- " + s.getRight());
 //        }
 
-        String LATERAL = lateralViews.stream().map(p -> "LATERAL VIEW explode(" + p.getLeft() + ") AS " + p.getRight()).collect(Collectors.joining("\n"));
-        wrapper = "SELECT " + SELECT + " FROM " + name + " " + LATERAL;
+        String LATERAL = lateralViews.stream().map(p -> "LATERAL VIEW explode(" + p.getLeft() + ") AS " + reformatName(p.getRight())).collect(Collectors.joining("\n"));
+        wrapper = "SELECT " + SELECT + " FROM `" + name + "` " + LATERAL;
 
         //generateMetadata();
 
@@ -128,9 +126,21 @@ private void Document(String path, String D) {
             throw new RuntimeException("File not found");
         }
 
-        G_source.addTriple(createIRI(D), RDF.type, DataFrame_MM.DataSource);
-        Object(Json.createReader(fis).readValue().asJsonObject(),new JSON_Aux(D,"",""));
+        // Step 1: Read data from fis and store it in a JSON array
+        JsonReader reader = Json.createReader(fis);
+        JsonArray jsonArray = reader.readArray();
+        reader.close();
 
+        // Step 2: Extract the JSON object from the JSON array
+        if (jsonArray.size() > 0) {
+            JsonObject jsonObject = jsonArray.getJsonObject(0); // Assuming jsonArray contains only one object
+            System.out.println(jsonObject);
+
+            // Step 3: Call the object method with the JSON object
+            Object(jsonObject, new JSON_Aux(D, "", ""));
+        } else {
+            throw new RuntimeException("Empty JSON array");
+        }
     }
 
     private void DataType(JsonValue D, JSON_Aux p) {

diff --git a/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/implementations/SQLBootstrap.java b/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/implementations/SQLBootstrap.java
@@ -18,6 +18,7 @@
 import java.util.List;
 
 import static edu.upc.essi.dtim.nextiabs.utils.DF_MMtoRDFS.productionRulesDataframe_to_RDFS;
+import static edu.upc.essi.dtim.nextiabs.utils.Utils.reformatName;
 
 /**
  * Generates an instance of a DataFrame_Metamodel representation of a postgresSQL database
@@ -58,10 +59,10 @@ public Graph bootstrapSchema(Boolean generateMetadata) {
 
         List<String> columns = new LinkedList<>();
         for (Pair<String, String> col: tableData.getColumns()) {
-            columns.add(col.getLeft() + " AS `" + col.getLeft() + "`");
+            columns.add(col.getLeft() + " AS " + reformatName(col.getLeft()));
         }
         String columnNames = String.join(", ", columns);
-        wrapper = "SELECT " + columnNames + " FROM " + tableName;
+        wrapper = "SELECT " + columnNames + " FROM `" + tableName + "`";
 
         if (generateMetadata) {
             generateMetadata();

diff --git a/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/utils/Utils.java b/Modules/NextiaBS/src/main/java/edu/upc/essi/dtim/nextiabs/utils/Utils.java
@@ -0,0 +1,37 @@
+package edu.upc.essi.dtim.nextiabs.utils;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+public class Utils {
+    public static String reformatName(String tableName) {
+        // Trim whitespaces at the end and the beginning
+        tableName = tableName.trim();
+        // Remove characters that might case problems
+        String pattern = "[!@#$%^&*)-+=\\[\\]{}\\\\|;:'\"<>,.?/]";
+        tableName = tableName.replaceAll(pattern, "");
+        // Pass the string to camel case
+        tableName = toCamelCase(tableName);
+
+        return tableName;
+    }
+
+    public static String toCamelCase(String inputString) {
+        StringBuilder camelCaseString = new StringBuilder();
+        String[] words = inputString.split("\\s+|(?<=\\()|(?=\\))"); // Split the input string by whitespace or parenthesis
+        words = Arrays.stream(words).filter(str -> !Objects.equals(str, "(")).toArray(String[]::new);
+        words = Arrays.stream(words).map(str -> str.replace("(", "")).toArray(String[]::new);
+
+        for (int i = 0; i < words.length; i++) {
+            String word = words[i];
+            if (i == 0) {
+                camelCaseString.append(word.toLowerCase()); // Convert the first word to lowercase
+            } else {
+                camelCaseString.append(Character.toUpperCase(word.charAt(0))); // Capitalize the first letter
+                camelCaseString.append(word.substring(1).toLowerCase()); // Convert the rest to lowercase
+            }
+        }
+
+        return camelCaseString.toString();
+    }
+}
diff --git a/...s/NextiaDataLayer/src/main/java/edu/upc/essi/dtim/NextiaDataLayer/dataLayer/DLDuckDB.java b/...s/NextiaDataLayer/src/main/java/edu/upc/essi/dtim/NextiaDataLayer/dataLayer/DLDuckDB.java
@@ -208,19 +208,19 @@ else if (repo.getRepositoryType().equals("RelationalJDBCRepository")) {
 
     // TODO: extend this to different formats and zones
     @Override
-    public String materialize(Dataset dataset, String zone, String format) {
-        String csvFilePath = Paths.get(dataStorePath, "tmp", dataset.getUUID() + ".csv").toString();
+    public String materialize(String UUID, String zone, String format) {
+        String csvFilePath = Paths.get(dataStorePath, "tmp", UUID + ".csv").toString();
         // As of now, we assume that it is always a csv
         // String extension = "." + format;
         try {
-            ResultSet rs = stmt.executeQuery("SELECT * FROM " + zone + "_" + dataset.getUUID());
+            ResultSet rs = stmt.executeQuery("SELECT * FROM " + zone + "_" + UUID);
             try (FileWriter writer = new FileWriter(csvFilePath)) {
                 // Header
                 int columnCount = rs.getMetaData().getColumnCount();
                 for (int i = 1; i <= columnCount; i++) {
                     writer.append(rs.getMetaData().getColumnName(i));
                     if (i < columnCount) {
-                        writer.append(",");
+                        writer.append(";");
                     }
                 }
                 writer.append("\n");
@@ -230,7 +230,7 @@ public String materialize(Dataset dataset, String zone, String format) {
                         Object value = rs.getObject(i);
                         writer.append(value != null ? value.toString() : "null");
                         if (i < columnCount) {
-                            writer.append(",");
+                            writer.append(";");
                         }
                     }
                     writer.append("\n");

diff --git a/...es/NextiaDataLayer/src/main/java/edu/upc/essi/dtim/NextiaDataLayer/dataLayer/DLSpark.java b/...es/NextiaDataLayer/src/main/java/edu/upc/essi/dtim/NextiaDataLayer/dataLayer/DLSpark.java
@@ -72,7 +72,7 @@ public void removeFromExploitationZone(String tableName) {
     }
 
     @Override
-    public String materialize(Dataset dataset, String zone, String format) {
+    public String materialize(String UUID, String zone, String format) {
         return null;
     }
 

diff --git a/.../NextiaDataLayer/src/main/java/edu/upc/essi/dtim/NextiaDataLayer/dataLayer/DataLayer.java b/.../NextiaDataLayer/src/main/java/edu/upc/essi/dtim/NextiaDataLayer/dataLayer/DataLayer.java
@@ -64,7 +64,7 @@ else if (d.getClass().equals(SQLDataset.class)) {
         }
         // we use the name because the wrapper is expecting the sql table to have the name of the dataset
         assert df != null;
-        df.createOrReplaceTempView(d.getDatasetName());
+        df.createOrReplaceTempView("`" + d.getDatasetName() + "`");
         return spark.sql(d.getWrapper());
     }
 
@@ -117,7 +117,7 @@ public String storeTemporalFile(InputStream inputFile, String newFileDirectory)
         return storeTemporalFile(dataStorePath + "tmp", inputFile, newFileDirectory);
     }
 
-    public abstract String materialize(Dataset dataset, String zone, String format);
+    public abstract String materialize(String UUID, String zone, String format);
 
     // ---------------- Others
 

diff --git a/.../NextiaJD/src/main/java/edu/upc/essi/dtim/NextiaJD/calculateQuality/CalculateQuality.java b/.../NextiaJD/src/main/java/edu/upc/essi/dtim/NextiaJD/calculateQuality/CalculateQuality.java
@@ -8,7 +8,7 @@ public class CalculateQuality {
     public CalculateQuality(Double l, double s) {this.l = l; this.s = s;}
 
     public double calculateQualityDiscrete(double c, double k){
-        if (c == 1 && k == 1) return 1.0;
+//        if (c == 1 && k == 1) return 1.0;
         for (double i = 0; i<l; ++i) {
             if ((c >= 1-(i/l)) && (k >= Math.pow(0.5, i))) {
                 return (l-i+1)/l;

diff --git a/api/src/main/java/edu/upc/essi/dtim/odin/dataProducts/DataProductController.java b/api/src/main/java/edu/upc/essi/dtim/odin/dataProducts/DataProductController.java
@@ -5,10 +5,14 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.core.io.FileSystemResource;
+import org.springframework.http.HttpHeaders;
 import org.springframework.http.HttpStatus;
+import org.springframework.http.MediaType;
 import org.springframework.http.ResponseEntity;
 import org.springframework.web.bind.annotation.*;
 
+import java.io.File;
 import java.util.List;
 
 @RestController
@@ -101,4 +105,29 @@ public ResponseEntity<String> materializeDataProduct(@PathVariable("dataProductI
         String pathOfMaterializedDataProduct = dataProductService.materializeDataProduct(dataProductID);
         return new ResponseEntity<>(pathOfMaterializedDataProduct, HttpStatus.OK);
     }
+
+    /**
+     * Downloads a (temporal) DataProduct content's as a CSV file.
+     *
+     * @param dataProductUUID The UUID of the data product to be materialized
+     * @return If the task was successful return a ResponseEntity with an OK HTTP code.
+     */
+    @PostMapping("/project/{projectID}/download-temporal-data-product/{dataProductUUID}")
+    public ResponseEntity<FileSystemResource> downloadTemporalDataProduct(@PathVariable("dataProductUUID") String dataProductUUID) {
+        logger.info("Downloading data product");
+        return dataProductService.downloadTemporalDataProduct(dataProductUUID);
+    }
+
+    /**
+     * Downloads a DataProduct content's as a CSV file.
+     *
+     * @param dataProductID The UUID of the data product to be materialized
+     * @return If the task was successful return a ResponseEntity with an OK HTTP code.
+     */
+    @PostMapping("/project/{projectID}/data-product/{dataProductID}/download")
+    public ResponseEntity<FileSystemResource> downloadDataProduct(@PathVariable("dataProductID") String dataProductID) {
+        logger.info("Downloading data product");
+        return dataProductService.downloadDataProduct(dataProductID);
+    }
+
 }
diff --git a/api/src/main/java/edu/upc/essi/dtim/odin/dataProducts/DataProductService.java b/api/src/main/java/edu/upc/essi/dtim/odin/dataProducts/DataProductService.java
@@ -11,8 +11,13 @@
 import edu.upc.essi.dtim.odin.projects.ProjectService;
 import edu.upc.essi.dtim.odin.projects.pojo.Project;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.core.io.FileSystemResource;
+import org.springframework.http.HttpHeaders;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
 import org.springframework.stereotype.Service;
 
+import java.io.File;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.NoSuchElementException;
@@ -136,18 +141,62 @@ public List<DataProduct> getDataProductsOfProject(String projectID) {
         return project.getDataProducts();
     }
 
-    // ---------------- Other operations
+    // ------------ Download/materialize operations
 
     /**
-     * Materializes a data product into a CSV file, mainly to be ingested by the intent generation pipeline.
+     * Materializes a data product into a CSV file.
      *
-     * @param dataProductID   The ID of the data product to be materialized
-     * @return If the task was successful return a ResponseEntity with an OK HTTP code.
+     * @param dataProductID The ID of the data product to be materialized
+     * @return If the task was successful returns a path were the materialized file resides
      */
     public String materializeDataProduct(String dataProductID) {
-        // Own function to get the data product
         DataProduct dp = ormDataResource.findById(DataProduct.class, dataProductID);
         DataLayerInterface dataLayerInterFace = new DataLayerImpl(appConfig);
-        return dataLayerInterFace.materialize(dp, "exp", "csv");
+        return dataLayerInterFace.materialize(dp.getUUID(), "exp", "csv");
+    }
+
+    /**
+     * Downloads a temporal data product. That is, just after executing a query over the data and before the data product
+     * has been stored in ODIN.
+     *
+     * @param dataProductUUID The UUID of the data product to be downloaded
+     * @return If the task was successful returns a ResponseEntity with the file to download
+     */
+    public ResponseEntity<FileSystemResource> downloadTemporalDataProduct(String dataProductUUID) {
+        DataLayerInterface dataLayerInterFace = new DataLayerImpl(appConfig);
+        String pathOfMaterializedDataProduct = dataLayerInterFace.materialize(dataProductUUID, "tmp_exp", "csv");
+        return downloadCSVFile(pathOfMaterializedDataProduct);
+    }
+
+    /**
+     * Downloads a data product.
+     *
+     * @param dataProductID The ID of the data product to be downloaded
+     * @return If the task was successful returns a ResponseEntity with the file to download
+     */
+    public ResponseEntity<FileSystemResource> downloadDataProduct(String dataProductID) {
+        String pathOfMaterializedDataProduct = materializeDataProduct(dataProductID);
+        return downloadCSVFile(pathOfMaterializedDataProduct);
+    }
+
+    /**
+     * Downloads a CSV file generated from a data product
+     *
+     * @param pathOfMaterializedDataProduct Path of the data product (in CSV format) to download
+     * @return If the task was successful returns a ResponseEntity with the file to download
+     */
+    public ResponseEntity<FileSystemResource> downloadCSVFile(String pathOfMaterializedDataProduct) {
+        // Create a FileSystemResource to represent the CSV file
+        FileSystemResource file = new FileSystemResource(new File(pathOfMaterializedDataProduct));
+
+        // Set headers to trigger file download
+        HttpHeaders headers = new HttpHeaders();
+        headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=file.csv");
+
+        // Set the content type
+        headers.setContentType(MediaType.parseMediaType("text/csv"));
+
+        // Return ResponseEntity with the file content and headers
+        return ResponseEntity.ok().headers(headers).body(file);
     }
 }