Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/dtim-upc/ODIN
Browse files Browse the repository at this point in the history
  • Loading branch information
marc-maynou committed Aug 4, 2024
2 parents 17379b5 + 04ae9a7 commit cdc4a87
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 108 deletions.
100 changes: 56 additions & 44 deletions Modules/NextiaJD/src/main/java/edu/upc/essi/dtim/NextiaJD/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ public static void main(String[] args) {
// "AcquisitionID", "AcquisitionID");

// santos();
tus();
// tus();
// santosBig();
// nextiaJD();
nextiaJD();
// tusBig();
// scalability();
// d3l();
Expand All @@ -70,10 +70,12 @@ public static void scalability() {
try {
PredictQuality pq = new PredictQuality();

for (int i = 1; i <= 20; ++i) {
pq.calculateDistancesAttVsFolder("dummy_value", "file_" + i + "_profile.csv", "C:\\Users\\marc.maynou\\Desktop\\scalability\\sizes\\size_100_kb\\profiles");
System.out.println("Query column " + i + " out of " + 20);
long startTime = System.currentTimeMillis();
for (int i = 1; i <= 100; ++i) {
pq.calculateDistancesAttVsFolder("dummy_value", "file_" + i + "_profile.csv", "C:\\Users\\34601\\Desktop\\1_gb\\profiles");
System.out.println("Query column " + i + " out of " + 100);
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -85,7 +87,7 @@ public static void santos() {
PredictQuality pq = new PredictQuality();

List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
try (CSVReader reader = new CSVReader(new FileReader("C:\\Work\\NextiaJD\\other_datasets\\santos_benchmark_small\\santos_small_benchmark_groundtruth.csv"))) {
try (CSVReader reader = new CSVReader(new FileReader("C:\\Projects\\benchmarks\\santos_small\\santos_small_benchmark_groundtruth.csv"))) {
String[] headerLine = reader.readNext();
String[] line;
while ((line = reader.readNext()) != null) {
Expand All @@ -101,46 +103,48 @@ public static void santos() {

int counter = 1;

long startTime = System.currentTimeMillis();
for (Pair<String, String> pair: listOfQueryColumns) {
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Work\\NextiaJD\\other_datasets\\santos_benchmark_small\\profiles_short");
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Projects\\benchmarks\\santos_small\\profiles");
System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
counter++;
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
}

public static void tus() {
try {
generateAllProfilesOfAllDataInAFolder("C:\\Work\\NextiaJD\\other_datasets\\tus_small\\csvfiles", "C:\\Work\\NextiaJD\\other_datasets\\tus_small\\profiles");
// Connection conn = DuckDB.getConnection();
// PredictQuality pq = new PredictQuality();
//
// List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
// try (CSVReader reader = new CSVReader(new FileReader("C:\\Work\\NextiaJD\\other_datasets\\tus_small\\TUS_benchmark_relabeled_groundtruth.csv"))) {
// String[] headerLine = reader.readNext();
// String[] line;
// while ((line = reader.readNext()) != null) {
// String dataset = line[1];
// String attribute = line[4]; // the attribute name is the same for the two columns
// if (!listOfQueryColumns.contains(Pair.of(dataset, attribute))) {
// listOfQueryColumns.add(Pair.of(dataset, attribute));
// }
// }
// } catch (IOException e) {
// e.printStackTrace();
// }
//
// int counter = 1;
//
// for (Pair<String, String> pair: listOfQueryColumns) {
// if (counter < 20) {
// pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Work\\NextiaJD\\other_datasets\\tus_small\\profiles_short");
// System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
// }
// counter++;
// }
// generateAllProfilesOfAllDataInAFolder("C:\\Work\\NextiaJD\\other_datasets\\tus\\tus_small\\csvfiles", "C:\\Work\\NextiaJD\\other_datasets\\tus\\tus_small\\profiles");
Connection conn = DuckDB.getConnection();
PredictQuality pq = new PredictQuality();

List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
try (CSVReader reader = new CSVReader(new FileReader("C:\\Projects\\benchmarks\\tus_small\\TUS_benchmark_relabeled_groundtruth.csv"))) {
String[] headerLine = reader.readNext();
String[] line;
while ((line = reader.readNext()) != null) {
String dataset = line[1];
String attribute = line[4]; // the attribute name is the same for the two columns
if (!listOfQueryColumns.contains(Pair.of(dataset, attribute))) {
listOfQueryColumns.add(Pair.of(dataset, attribute));
}
}
} catch (IOException e) {
e.printStackTrace();
}

int counter = 1;

long startTime = System.currentTimeMillis();
for (Pair<String, String> pair: listOfQueryColumns) {
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Projects\\benchmarks\\tus_small\\profiles");
System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
counter++;
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -152,7 +156,7 @@ public static void santosBig() {
PredictQuality pq = new PredictQuality();

List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
try (CSVReader reader = new CSVReader(new FileReader("C:\\Work\\NextiaJD\\other_datasets\\santos_benchmark_big\\santos_big_benchmark_groundtruth.csv"))) {
try (CSVReader reader = new CSVReader(new FileReader("C:\\Projects\\benchmarks\\santos_big\\real_data_lake_benchmark_query_tables.csv"))) {
String[] headerLine = reader.readNext();
String[] line;
while ((line = reader.readNext()) != null) {
Expand All @@ -167,11 +171,14 @@ public static void santosBig() {
}
int counter = 1;

long startTime = System.currentTimeMillis();

for (Pair<String, String> pair: listOfQueryColumns) {
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Work\\NextiaJD\\other_datasets\\santos_benchmark_big\\profiles_short");
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Projects\\benchmarks\\santos_big\\profiles");
System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
++counter;
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -182,7 +189,7 @@ public static void nextiaJD() {
PredictQuality pq = new PredictQuality();

List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
try (CSVReader reader = new CSVReader(new FileReader("C:\\Work\\NextiaJD\\nextia\\ground_truth_validate.csv"))) {
try (CSVReader reader = new CSVReader(new FileReader("C:\\Projects\\benchmarks\\nextia\\ground_truth_validate.csv"))) {
String[] headerLine = reader.readNext();
String[] line;
while ((line = reader.readNext()) != null) {
Expand All @@ -196,13 +203,14 @@ public static void nextiaJD() {
e.printStackTrace();
}
int counter = 1;

long startTime = System.currentTimeMillis();

for (Pair<String, String> pair: listOfQueryColumns) {
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Work\\NextiaJD\\nextia\\profiles");
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Projects\\benchmarks\\nextia\\profiles");
System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
++counter;
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -213,7 +221,7 @@ public static void tusBig() {
PredictQuality pq = new PredictQuality();

List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
try (CSVReader reader = new CSVReader(new FileReader("C:\\Work\\NextiaJD\\other_datasets\\tus_big\\TUS_large_candidate_queries_sample.csv"))) {
try (CSVReader reader = new CSVReader(new FileReader("C:\\Projects\\benchmarks\\tus_big\\TUS_large_candidate_queries_sample.csv"))) {
String[] headerLine = reader.readNext();
String[] line;
while ((line = reader.readNext()) != null) {
Expand All @@ -227,12 +235,14 @@ public static void tusBig() {
e.printStackTrace();
}

long startTime = System.currentTimeMillis();
int counter = 0;
for (Pair<String, String> pair: listOfQueryColumns) {
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Work\\NextiaJD\\other_datasets\\tus_big\\profiles_short");
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft().replace(".csv", "_profile.csv"), "C:\\Projects\\benchmarks\\tus_big\\profiles");
System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
++counter;
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -244,7 +254,7 @@ public static void d3l() {
PredictQuality pq = new PredictQuality();

List<Pair<String,String>> listOfQueryColumns = new LinkedList<>();
try (CSVReader reader = new CSVReader(new FileReader("C:\\Work\\NextiaJD\\other_datasets\\D3L\\d3l_ground_truth_sample.csv"))) {
try (CSVReader reader = new CSVReader(new FileReader("C:\\Projects\\benchmarks\\d3l\\d3l_ground_truth_sample.csv"))) {
String[] headerLine = reader.readNext();
String[] line;
while ((line = reader.readNext()) != null) {
Expand All @@ -260,11 +270,13 @@ public static void d3l() {

int counter = 1;

long startTime = System.currentTimeMillis();
for (Pair<String, String> pair: listOfQueryColumns) {
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft() + "_profile.csv", "C:\\Work\\NextiaJD\\other_datasets\\D3L\\profiles_short");
pq.calculateDistancesAttVsFolder(pair.getRight(), pair.getLeft() + "_profile.csv", "C:\\Projects\\benchmarks\\d3l\\profiles");
System.out.println("Query column " + counter + " out of " + listOfQueryColumns.size());
counter++;
}
System.out.println("Execution time: " + ((System.currentTimeMillis() - startTime) / 1000.0) + " seconds");
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand Down
Loading

0 comments on commit cdc4a87

Please sign in to comment.