From 11859822680640b7243a2ff799ef092e219bb106 Mon Sep 17 00:00:00 2001 From: Pouryafard75 Date: Tue, 3 Sep 2024 18:17:35 -0400 Subject: [PATCH] Blame: Experiment with both options (considering and ignoring whitespaces) --- .../blame/benchmark/BlameDiffer.java | 55 +++++---- .../blame/benchmark/BlameDifferDriver.java | 12 +- .../benchmark/BlameDifferOneWithMany.java | 34 ++++++ .../blame/benchmark/BlameDifferResult.java | 30 +++++ .../blame/benchmark/BlamerFactory.java | 2 +- .../codetracker/blame/impl/CliGitBlame.java | 110 +++++++++--------- 6 files changed, 152 insertions(+), 91 deletions(-) create mode 100644 src/main/java/org/codetracker/blame/benchmark/BlameDifferOneWithMany.java create mode 100644 src/main/java/org/codetracker/blame/benchmark/BlameDifferResult.java diff --git a/src/main/java/org/codetracker/blame/benchmark/BlameDiffer.java b/src/main/java/org/codetracker/blame/benchmark/BlameDiffer.java index e1579c0f6b8..9545f22ae2a 100644 --- a/src/main/java/org/codetracker/blame/benchmark/BlameDiffer.java +++ b/src/main/java/org/codetracker/blame/benchmark/BlameDiffer.java @@ -7,20 +7,14 @@ import org.eclipse.jgit.lib.Repository; import java.util.*; -import java.util.function.Predicate; +import java.util.function.BiPredicate; /* Created by pourya on 2024-07-14*/ public class BlameDiffer { - private final EnumSet blamerFactories; - private EnumMap> blameResults; - private List content; - private Predicate emptyLinesCondition; - private Repository repository; - private String commitId; - private String filePath; - private final Map codeElementMap = new LinkedHashMap<>(); - private int legitSize; + protected final EnumSet blamerFactories; + protected LineNumberToCommitIDRecordManager benchmarkRecordManager; + protected BiPredicate> emptyLinesCondition = (lineNumber, content) -> content.get(lineNumber-1).trim().isEmpty(); public BlameDiffer(EnumSet blamerFactories){ this.blamerFactories = blamerFactories; @@ -38,30 +32,37 @@ private EnumMap> runBlamers(Repository repo return results; } - private static void verify(EnumMap> blameResults) { - if (blameResults.size() != 2) + protected boolean verify(EnumMap> results) { + if (results.size() != 2) throw new RuntimeException("BlameDiffer only works with two blamers"); + return true; } - public Map> diff(Repository repository, String commitId, String filePath) throws Exception { - this.repository = repository; - this.commitId = commitId; - this.filePath = filePath; - this.blameResults = runBlamers(repository, commitId, filePath); - this.content = Utils.getFileContentByCommit(repository, commitId, filePath); - this.emptyLinesCondition = lineNumber -> content.get(lineNumber-1).trim().isEmpty(); - verify(blameResults); - LineNumberToCommitIDRecordManager benchmarkRecordManager = new LineNumberToCommitIDRecordManager(); + public final BlameDifferResult diff(Repository repository, String commitId, String filePath) throws Exception { + EnumMap> blameResults = prepareResults(repository, commitId, filePath); + List content = Utils.getFileContentByCommit(repository, commitId, filePath); + benchmarkRecordManager = new LineNumberToCommitIDRecordManager(); benchmarkRecordManager.diff(blameResults); Map> table = benchmarkRecordManager.getRegistry(); - table.entrySet().removeIf(entry -> emptyLinesCondition.test(entry.getKey())); - legitSize = table.size(); + table.entrySet().removeIf(entry -> emptyLinesCondition.test(entry.getKey(), content)); + int legitSize = table.size(); + table = process(repository, commitId, filePath, table); + return new BlameDifferResult(table, makeCodeElementMap(table.keySet(), repository, commitId, filePath), legitSize); + } + + protected Map> process(Repository repository, String commitId, String filePath, Map> table) { table.entrySet().removeIf(entry -> entry.getValue().values().stream().distinct().count() == 1); - makeCodeElementMap(table.keySet()); return table; } - void makeCodeElementMap(Set lineNumbers){ + private EnumMap> prepareResults(Repository repository, String commitId, String filePath) throws Exception { + EnumMap> blameResults = runBlamers(repository, commitId, filePath); + verify(blameResults); + return blameResults; + } + + Map makeCodeElementMap(Set lineNumbers, Repository repository, String commitId, String filePath){ + Map codeElementMap = new LinkedHashMap<>(); for (Integer lineNumber : lineNumbers) { try { codeElementMap.put(lineNumber, @@ -70,10 +71,6 @@ void makeCodeElementMap(Set lineNumbers){ throw new RuntimeException(e); } } - } - - public Map getCodeElementMap() { return codeElementMap; } - public int getLegitSize() {return legitSize;} } diff --git a/src/main/java/org/codetracker/blame/benchmark/BlameDifferDriver.java b/src/main/java/org/codetracker/blame/benchmark/BlameDifferDriver.java index ca14513ff20..d73701dadad 100644 --- a/src/main/java/org/codetracker/blame/benchmark/BlameDifferDriver.java +++ b/src/main/java/org/codetracker/blame/benchmark/BlameDifferDriver.java @@ -17,8 +17,12 @@ public class BlameDifferDriver { private static final EnumSet blamerFactories = EnumSet.of( BlamerFactory.CliGitBlameIgnoringWhiteSpace, + BlamerFactory.CliGitBlameDefault, BlamerFactory.FileTrackerBlame ); + + private static final BlameDiffer blameDiffer = new BlameDifferOneWithMany(blamerFactories, BlamerFactory.FileTrackerBlame); + private static final String[][] dummies = { {"https://github.com/checkstyle/checkstyle/commit/119fd4fb33bef9f5c66fc950396669af842c21a3", "src/main/java/com/puppycrawl/tools/checkstyle/Checker.java"}, {"https://github.com/javaparser/javaparser/commit/97555053af3025556efe1a168fd7943dac28a2a6", "javaparser-core/src/main/java/com/github/javaparser/printer/lexicalpreservation/Difference.java"}, @@ -59,10 +63,10 @@ public static void process(String url, String filePath, StatsCollector statsColl String project = getProject(url); String ownerSlashProject = owner + "/" + project; Repository repository = gitService.cloneIfNotExists(REPOS_PATH + "/" + ownerSlashProject, URLHelper.getRepo(url)); - BlameDiffer blameDiffer = new BlameDiffer(blamerFactories); - Map> result = blameDiffer.diff(repository, commitId, filePath); - statsCollector.process(result, blameDiffer.getLegitSize(), blameDiffer.getCodeElementMap()); - new CsvWriter(owner, project, commitId, filePath, blameDiffer.getCodeElementMap()).writeToCSV(result); + BlameDifferResult blameDifferResult = blameDiffer.diff(repository, commitId, filePath); + Map> result = blameDifferResult.getTable(); + statsCollector.process(result, blameDifferResult.getLegitSize(), blameDifferResult.getCodeElementMap()); + new CsvWriter(owner, project, commitId, filePath, blameDifferResult.getCodeElementMap()).writeToCSV(result); statsCollector.writeInfo(); } } diff --git a/src/main/java/org/codetracker/blame/benchmark/BlameDifferOneWithMany.java b/src/main/java/org/codetracker/blame/benchmark/BlameDifferOneWithMany.java new file mode 100644 index 00000000000..b624dccbf9e --- /dev/null +++ b/src/main/java/org/codetracker/blame/benchmark/BlameDifferOneWithMany.java @@ -0,0 +1,34 @@ +package org.codetracker.blame.benchmark; + +import org.codetracker.blame.model.LineBlameResult; +import org.eclipse.jgit.lib.Repository; + +import java.util.EnumMap; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +/* Created by pourya on 2024-09-03*/ +public class BlameDifferOneWithMany extends BlameDiffer { + + protected final BlamerFactory subject; + public BlameDifferOneWithMany(EnumSet blamerFactories, BlamerFactory subject) { + super(blamerFactories); + this.subject = subject; + } + + @Override + protected Map> process(Repository repository, String commitId, String filePath, Map> table) { + table.entrySet().removeIf(entry -> { + EnumMap factories = entry.getValue(); + String subject_value = factories.get(subject); + return factories.values().stream().filter(value -> value.equals(subject_value)).count() > 1; + }); + return table; + } + + @Override + protected boolean verify(EnumMap> results) { + return true; //TODO: + } +} diff --git a/src/main/java/org/codetracker/blame/benchmark/BlameDifferResult.java b/src/main/java/org/codetracker/blame/benchmark/BlameDifferResult.java new file mode 100644 index 00000000000..0431cd19ca0 --- /dev/null +++ b/src/main/java/org/codetracker/blame/benchmark/BlameDifferResult.java @@ -0,0 +1,30 @@ +package org.codetracker.blame.benchmark; + +import org.codetracker.api.CodeElement; + +import java.util.EnumMap; +import java.util.Map; + +public class BlameDifferResult{ + private final Map> table; + private final Map codeElementMap; + private final int legitSize; + + public BlameDifferResult(Map> table, Map codeElementMap, int legitSize) { + this.table = table; + this.codeElementMap = codeElementMap; + this.legitSize = legitSize; + } + + public Map> getTable() { + return table; + } + + public Map getCodeElementMap() { + return codeElementMap; + } + + public int getLegitSize() { + return legitSize; + } +} diff --git a/src/main/java/org/codetracker/blame/benchmark/BlamerFactory.java b/src/main/java/org/codetracker/blame/benchmark/BlamerFactory.java index d2bb1f358a3..aa078d4caed 100644 --- a/src/main/java/org/codetracker/blame/benchmark/BlamerFactory.java +++ b/src/main/java/org/codetracker/blame/benchmark/BlamerFactory.java @@ -10,7 +10,7 @@ public enum BlamerFactory { JGitBlameWithFollow(new JGitBlame()), CliGitBlameIgnoringWhiteSpace(new CliGitBlame(true)), - CliGitBlameDefault(new CliGitBlame()), + CliGitBlameDefault(new CliGitBlame(false)), CodeTrackerBlame(new CodeTrackerBlame()), FileTrackerBlame(new FileTrackerBlame()); private final IBlame blamer; diff --git a/src/main/java/org/codetracker/blame/impl/CliGitBlame.java b/src/main/java/org/codetracker/blame/impl/CliGitBlame.java index ac8247d1aca..88a30811b17 100644 --- a/src/main/java/org/codetracker/blame/impl/CliGitBlame.java +++ b/src/main/java/org/codetracker/blame/impl/CliGitBlame.java @@ -16,10 +16,6 @@ /* Created by pourya on 2024-08-22*/ public class CliGitBlame implements IBlame { private final boolean ignore_whitespace; - - public CliGitBlame() { - this(false); - } public CliGitBlame(boolean ignore_whitespace) { this.ignore_whitespace = ignore_whitespace; } @@ -29,12 +25,11 @@ public List blameFile(Repository repository, String commitId, S Process process = null; try { - // Construct the git blame command with the commit and file path String[] command; if (ignore_whitespace) { - command = new String[]{"git", "blame", "-w", commitId, "--", filePath}; + command = new String[]{"git", "blame", "-n", "-w", "--follow", commitId, "--", filePath}; } else { - command = new String[]{"git", "blame", commitId, "--", filePath}; + command = new String[]{"git", "blame", "-n", "--follow", commitId, "--", filePath}; } ProcessBuilder processBuilder = new ProcessBuilder(command); processBuilder.directory(repository.getDirectory()); @@ -46,58 +41,9 @@ public List blameFile(Repository repository, String commitId, S String line; int lineNumber = 1; while ((line = reader.readLine()) != null) { - if (line.charAt(0) == '^') { - line = line.substring(1); - } - // Extract commitId, committer, commitDate, filePath, and beforeFilePath from the blame line - String[] parts = line.split("\\s+", 3); - String blameCommitId = parts[0].trim(); // Extract the commit ID - String prevFilePath = parts[1].trim(); - - // Find the index of the first space to separate the number - int firstSpaceIndex = parts[2].indexOf(" "); - int resultLineNumber = -1; - try { - resultLineNumber = Integer.parseInt(parts[2].substring(0, firstSpaceIndex)); - } - catch (NumberFormatException e) { - - } - - parts[2] = parts[2].substring(firstSpaceIndex + 1); - - String[] meta = parts[2].split("\\s{2,}"); // Split on at least 2 spaces - String commiter = ""; - long commitTime = 0; - - // Regex to capture the timestamp - Pattern pattern = Pattern.compile("(.*?)(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]\\d{4})"); - Matcher matcher = pattern.matcher(parts[2]); - - if (matcher.find()) { - // Extract the committer and timestamp - commiter = matcher.group(1).trim(); // Everything before the timestamp - commiter = commiter.substring(1); // Remove the parentheses - String timestamp = matcher.group(2).trim(); // Timestamp - - - // Define the formatter for the timestamp - DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss Z"); - - // Parse the timestamp string - OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestamp, formatter); - - // Convert to Instant - Instant instant = offsetDateTime.toInstant(); - commitTime = instant.toEpochMilli() / 1000; - } - LineBlameResult result = new LineBlameResult(blameCommitId, filePath, prevFilePath, commiter, commitTime, resultLineNumber, lineNumber); - blameResults.add(result); - - + blameResults.add(getLineBlameResult(line, filePath, lineNumber)); lineNumber++; } - int exitCode = process.waitFor(); if (exitCode != 0) { throw new Exception("Error executing git blame command"); @@ -111,6 +57,56 @@ public List blameFile(Repository repository, String commitId, S return blameResults; } + private static LineBlameResult getLineBlameResult(String line, String filePath, int lineNumber) { + if (line.charAt(0) == '^') { + line = line.substring(1); + } + // Extract commitId, committer, commitDate, filePath, and beforeFilePath from the blame line + String[] parts = line.split("\\s+", 3); + String blameCommitId = parts[0].trim(); // Extract the commit ID + String prevFilePath = parts[1].trim(); + + // Find the index of the first space to separate the number + int firstSpaceIndex = parts[2].indexOf(" "); + int resultLineNumber = -1; + try { + resultLineNumber = Integer.parseInt(parts[2].substring(0, firstSpaceIndex)); + parts[2] = parts[2].substring(firstSpaceIndex + 1); + } + catch (NumberFormatException e) { +// System.out.println("Error parsing line number: " + parts[2].substring(0, firstSpaceIndex)); + parts[2] = parts[2].substring(firstSpaceIndex + 1); + } + + + String[] meta = parts[2].split("\\s{2,}"); // Split on at least 2 spaces + String commiter = ""; + long commitTime = 0; + + // Regex to capture the timestamp + Pattern pattern = Pattern.compile("(.*?)(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]\\d{4})"); + Matcher matcher = pattern.matcher(parts[2]); + + if (matcher.find()) { + // Extract the committer and timestamp + commiter = matcher.group(1).trim(); // Everything before the timestamp + commiter = commiter.substring(1); // Remove the parentheses + String timestamp = matcher.group(2).trim(); // Timestamp + + + // Define the formatter for the timestamp + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss Z"); + + // Parse the timestamp string + OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestamp, formatter); + + // Convert to Instant + Instant instant = offsetDateTime.toInstant(); + commitTime = instant.toEpochMilli() / 1000; + } + return new LineBlameResult(blameCommitId, filePath, prevFilePath, commiter, commitTime, resultLineNumber, lineNumber); + } + @Override public List blameFile(Repository repository, String commitId, String filePath, int fromLine, int toLine) throws Exception { // Implement logic to blame a specific range of lines in a file