Skip to content

Commit

Permalink
Some refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
marc-maynou committed Feb 27, 2024
1 parent 1378a88 commit b3ee6b3
Show file tree
Hide file tree
Showing 36 changed files with 474 additions and 189 deletions.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Modules/IntentSpecification2WorkflowGenerator/api/api_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ def download_knime():
plan_graph = Graph().parse(data=request.json.get("graph", ""), format='turtle')
ontology = Graph().parse(data=request.json.get('ontology', ''), format='turtle')

plan_graph.print()
print("--------------")
# ontology.print()

file_path = os.path.join(files_folder, f'{uuid.uuid4()}.ttl')
plan_graph.serialize(file_path, format='turtle')

Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def add_dataset_info(dataset_path, graph, label):
dataset_node = ab.term(path.basename(dataset_path))
graph.add((dataset_node, RDF.type, dmop.TabularDataset))
dataset = pd.read_csv(dataset_path)
dataset = pd.read_csv(dataset_path, encoding='latin', delimiter=";")
add_csv_info(dataset_path, dataset, dataset_node, graph)
add_column_info(dataset_path, dataset, dataset_node, graph, label)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Objects;
import java.util.stream.Collectors;

import static edu.upc.essi.dtim.nextiabs.utils.DF_MMtoRDFS.productionRulesDataframe_to_RDFS;
import static edu.upc.essi.dtim.nextiabs.utils.Utils.reformatName;

/**
* Generates an RDFS-compliant representation of a CSV file schema
Expand Down Expand Up @@ -58,15 +61,15 @@ public Graph bootstrapSchema(Boolean generateMetadata) {
G_target.addTriple(createIRI(name), RDF.type, DataFrame_MM.DataFrame);
G_target.addTripleLiteral(createIRI(name), RDFS.label, name);
parser.getHeaderNames().forEach(h -> {
String h2 = h.replace("\"", "").trim();
String h2 = reformatName(h);
G_target.addTriple(createIRI(h2),RDF.type,DataFrame_MM.Data);
G_target.addTripleLiteral(createIRI(h2), RDFS.label,h2 );
G_target.addTriple(createIRI(name),DataFrame_MM.hasData,createIRI(h2));
G_target.addTriple(createIRI(h2),DataFrame_MM.hasDataType,DataFrame_MM.String);
});

String select = parser.getHeaderNames().stream().map(a -> a + " AS `" + a.replace(".","_") + "`").collect(Collectors.joining(", "));
wrapper = "SELECT " + select + " FROM " + name;
String select = parser.getHeaderNames().stream().map(a -> a + " AS " + reformatName(a)).collect(Collectors.joining(", "));
wrapper = "SELECT " + select + " FROM `" + name + "`";

//TODO: implement metadata
// if(generateMetadata)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
import org.apache.commons.compress.utils.Lists;
import org.apache.commons.lang3.tuple.Pair;

import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonObject;
import javax.json.JsonValue;
import javax.json.*;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
Expand All @@ -27,6 +24,7 @@
import java.util.stream.Collectors;

import static edu.upc.essi.dtim.nextiabs.utils.DF_MMtoRDFS.productionRulesDataframe_to_RDFS;
import static edu.upc.essi.dtim.nextiabs.utils.Utils.reformatName;

public class JSONBootstrap extends DataSource implements IBootstrap<Graph>, BootstrapODIN {
// Using DataFrame_MM and without Jena
Expand Down Expand Up @@ -67,7 +65,7 @@ public Graph bootstrapSchema(Boolean generateMetadata) {

String SELECT = attributesSWJ.entrySet().stream().map( p -> {
if (p.getKey().equals(p.getValue().getKey())) return p.getValue().getPath() + " AS `" + p.getKey() + "`";
return p.getValue().getPath() + " AS `" + p.getValue().getLabel() + "`";
return p.getValue().getPath() + " AS " + reformatName(p.getValue().getLabel());
}).collect(Collectors.joining(", "));


Expand All @@ -76,8 +74,8 @@ public Graph bootstrapSchema(Boolean generateMetadata) {
// System.out.println(s.getLeft() + " ---- " + s.getRight());
// }

String LATERAL = lateralViews.stream().map(p -> "LATERAL VIEW explode(" + p.getLeft() + ") AS " + p.getRight()).collect(Collectors.joining("\n"));
wrapper = "SELECT " + SELECT + " FROM " + name + " " + LATERAL;
String LATERAL = lateralViews.stream().map(p -> "LATERAL VIEW explode(" + p.getLeft() + ") AS " + reformatName(p.getRight())).collect(Collectors.joining("\n"));
wrapper = "SELECT " + SELECT + " FROM `" + name + "` " + LATERAL;

//generateMetadata();

Expand Down Expand Up @@ -128,9 +126,21 @@ private void Document(String path, String D) {
throw new RuntimeException("File not found");
}

G_source.addTriple(createIRI(D), RDF.type, DataFrame_MM.DataSource);
Object(Json.createReader(fis).readValue().asJsonObject(),new JSON_Aux(D,"",""));
// Step 1: Read data from fis and store it in a JSON array
JsonReader reader = Json.createReader(fis);
JsonArray jsonArray = reader.readArray();
reader.close();

// Step 2: Extract the JSON object from the JSON array
if (jsonArray.size() > 0) {
JsonObject jsonObject = jsonArray.getJsonObject(0); // Assuming jsonArray contains only one object
System.out.println(jsonObject);

// Step 3: Call the object method with the JSON object
Object(jsonObject, new JSON_Aux(D, "", ""));
} else {
throw new RuntimeException("Empty JSON array");
}
}

private void DataType(JsonValue D, JSON_Aux p) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.List;

import static edu.upc.essi.dtim.nextiabs.utils.DF_MMtoRDFS.productionRulesDataframe_to_RDFS;
import static edu.upc.essi.dtim.nextiabs.utils.Utils.reformatName;

/**
* Generates an instance of a DataFrame_Metamodel representation of a postgresSQL database
Expand Down Expand Up @@ -58,10 +59,10 @@ public Graph bootstrapSchema(Boolean generateMetadata) {

List<String> columns = new LinkedList<>();
for (Pair<String, String> col: tableData.getColumns()) {
columns.add(col.getLeft() + " AS `" + col.getLeft() + "`");
columns.add(col.getLeft() + " AS " + reformatName(col.getLeft()));
}
String columnNames = String.join(", ", columns);
wrapper = "SELECT " + columnNames + " FROM " + tableName;
wrapper = "SELECT " + columnNames + " FROM `" + tableName + "`";

if (generateMetadata) {
generateMetadata();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package edu.upc.essi.dtim.nextiabs.utils;

import java.util.Arrays;
import java.util.Objects;

public class Utils {
public static String reformatName(String tableName) {
// Trim whitespaces at the end and the beginning
tableName = tableName.trim();
// Remove characters that might case problems
String pattern = "[!@#$%^&*)-+=\\[\\]{}\\\\|;:'\"<>,.?/]";
tableName = tableName.replaceAll(pattern, "");
// Pass the string to camel case
tableName = toCamelCase(tableName);

return tableName;
}

public static String toCamelCase(String inputString) {
StringBuilder camelCaseString = new StringBuilder();
String[] words = inputString.split("\\s+|(?<=\\()|(?=\\))"); // Split the input string by whitespace or parenthesis
words = Arrays.stream(words).filter(str -> !Objects.equals(str, "(")).toArray(String[]::new);
words = Arrays.stream(words).map(str -> str.replace("(", "")).toArray(String[]::new);

for (int i = 0; i < words.length; i++) {
String word = words[i];
if (i == 0) {
camelCaseString.append(word.toLowerCase()); // Convert the first word to lowercase
} else {
camelCaseString.append(Character.toUpperCase(word.charAt(0))); // Capitalize the first letter
camelCaseString.append(word.substring(1).toLowerCase()); // Convert the rest to lowercase
}
}

return camelCaseString.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,19 +208,19 @@ else if (repo.getRepositoryType().equals("RelationalJDBCRepository")) {

// TODO: extend this to different formats and zones
@Override
public String materialize(Dataset dataset, String zone, String format) {
String csvFilePath = Paths.get(dataStorePath, "tmp", dataset.getUUID() + ".csv").toString();
public String materialize(String UUID, String zone, String format) {
String csvFilePath = Paths.get(dataStorePath, "tmp", UUID + ".csv").toString();
// As of now, we assume that it is always a csv
// String extension = "." + format;
try {
ResultSet rs = stmt.executeQuery("SELECT * FROM " + zone + "_" + dataset.getUUID());
ResultSet rs = stmt.executeQuery("SELECT * FROM " + zone + "_" + UUID);
try (FileWriter writer = new FileWriter(csvFilePath)) {
// Header
int columnCount = rs.getMetaData().getColumnCount();
for (int i = 1; i <= columnCount; i++) {
writer.append(rs.getMetaData().getColumnName(i));
if (i < columnCount) {
writer.append(",");
writer.append(";");
}
}
writer.append("\n");
Expand All @@ -230,7 +230,7 @@ public String materialize(Dataset dataset, String zone, String format) {
Object value = rs.getObject(i);
writer.append(value != null ? value.toString() : "null");
if (i < columnCount) {
writer.append(",");
writer.append(";");
}
}
writer.append("\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public void removeFromExploitationZone(String tableName) {
}

@Override
public String materialize(Dataset dataset, String zone, String format) {
public String materialize(String UUID, String zone, String format) {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ else if (d.getClass().equals(SQLDataset.class)) {
}
// we use the name because the wrapper is expecting the sql table to have the name of the dataset
assert df != null;
df.createOrReplaceTempView(d.getDatasetName());
df.createOrReplaceTempView("`" + d.getDatasetName() + "`");
return spark.sql(d.getWrapper());
}

Expand Down Expand Up @@ -117,7 +117,7 @@ public String storeTemporalFile(InputStream inputFile, String newFileDirectory)
return storeTemporalFile(dataStorePath + "tmp", inputFile, newFileDirectory);
}

public abstract String materialize(Dataset dataset, String zone, String format);
public abstract String materialize(String UUID, String zone, String format);

// ---------------- Others

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ public class CalculateQuality {
public CalculateQuality(Double l, double s) {this.l = l; this.s = s;}

public double calculateQualityDiscrete(double c, double k){
if (c == 1 && k == 1) return 1.0;
// if (c == 1 && k == 1) return 1.0;
for (double i = 0; i<l; ++i) {
if ((c >= 1-(i/l)) && (k >= Math.pow(0.5, i))) {
return (l-i+1)/l;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;

import java.io.File;
import java.util.List;

@RestController
Expand Down Expand Up @@ -101,4 +105,29 @@ public ResponseEntity<String> materializeDataProduct(@PathVariable("dataProductI
String pathOfMaterializedDataProduct = dataProductService.materializeDataProduct(dataProductID);
return new ResponseEntity<>(pathOfMaterializedDataProduct, HttpStatus.OK);
}

/**
* Downloads a (temporal) DataProduct content's as a CSV file.
*
* @param dataProductUUID The UUID of the data product to be materialized
* @return If the task was successful return a ResponseEntity with an OK HTTP code.
*/
@PostMapping("/project/{projectID}/download-temporal-data-product/{dataProductUUID}")
public ResponseEntity<FileSystemResource> downloadTemporalDataProduct(@PathVariable("dataProductUUID") String dataProductUUID) {
logger.info("Downloading data product");
return dataProductService.downloadTemporalDataProduct(dataProductUUID);
}

/**
* Downloads a DataProduct content's as a CSV file.
*
* @param dataProductID The UUID of the data product to be materialized
* @return If the task was successful return a ResponseEntity with an OK HTTP code.
*/
@PostMapping("/project/{projectID}/data-product/{dataProductID}/download")
public ResponseEntity<FileSystemResource> downloadDataProduct(@PathVariable("dataProductID") String dataProductID) {
logger.info("Downloading data product");
return dataProductService.downloadDataProduct(dataProductID);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,13 @@
import edu.upc.essi.dtim.odin.projects.ProjectService;
import edu.upc.essi.dtim.odin.projects.pojo.Project;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;

import java.io.File;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
Expand Down Expand Up @@ -136,18 +141,62 @@ public List<DataProduct> getDataProductsOfProject(String projectID) {
return project.getDataProducts();
}

// ---------------- Other operations
// ------------ Download/materialize operations

/**
* Materializes a data product into a CSV file, mainly to be ingested by the intent generation pipeline.
* Materializes a data product into a CSV file.
*
* @param dataProductID The ID of the data product to be materialized
* @return If the task was successful return a ResponseEntity with an OK HTTP code.
* @param dataProductID The ID of the data product to be materialized
* @return If the task was successful returns a path were the materialized file resides
*/
public String materializeDataProduct(String dataProductID) {
// Own function to get the data product
DataProduct dp = ormDataResource.findById(DataProduct.class, dataProductID);
DataLayerInterface dataLayerInterFace = new DataLayerImpl(appConfig);
return dataLayerInterFace.materialize(dp, "exp", "csv");
return dataLayerInterFace.materialize(dp.getUUID(), "exp", "csv");
}

/**
* Downloads a temporal data product. That is, just after executing a query over the data and before the data product
* has been stored in ODIN.
*
* @param dataProductUUID The UUID of the data product to be downloaded
* @return If the task was successful returns a ResponseEntity with the file to download
*/
public ResponseEntity<FileSystemResource> downloadTemporalDataProduct(String dataProductUUID) {
DataLayerInterface dataLayerInterFace = new DataLayerImpl(appConfig);
String pathOfMaterializedDataProduct = dataLayerInterFace.materialize(dataProductUUID, "tmp_exp", "csv");
return downloadCSVFile(pathOfMaterializedDataProduct);
}

/**
* Downloads a data product.
*
* @param dataProductID The ID of the data product to be downloaded
* @return If the task was successful returns a ResponseEntity with the file to download
*/
public ResponseEntity<FileSystemResource> downloadDataProduct(String dataProductID) {
String pathOfMaterializedDataProduct = materializeDataProduct(dataProductID);
return downloadCSVFile(pathOfMaterializedDataProduct);
}

/**
* Downloads a CSV file generated from a data product
*
* @param pathOfMaterializedDataProduct Path of the data product (in CSV format) to download
* @return If the task was successful returns a ResponseEntity with the file to download
*/
public ResponseEntity<FileSystemResource> downloadCSVFile(String pathOfMaterializedDataProduct) {
// Create a FileSystemResource to represent the CSV file
FileSystemResource file = new FileSystemResource(new File(pathOfMaterializedDataProduct));

// Set headers to trigger file download
HttpHeaders headers = new HttpHeaders();
headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=file.csv");

// Set the content type
headers.setContentType(MediaType.parseMediaType("text/csv"));

// Return ResponseEntity with the file content and headers
return ResponseEntity.ok().headers(headers).body(file);
}
}
Loading

0 comments on commit b3ee6b3

Please sign in to comment.