Skip to content

Commit

Permalink
Blame: Experiment with both options (considering and ignoring whitesp…
Browse files Browse the repository at this point in the history
…aces)
  • Loading branch information
pouryafard75 authored and tsantalis committed Sep 5, 2024
1 parent a117bd4 commit 1185982
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 91 deletions.
55 changes: 26 additions & 29 deletions src/main/java/org/codetracker/blame/benchmark/BlameDiffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,14 @@
import org.eclipse.jgit.lib.Repository;

import java.util.*;
import java.util.function.Predicate;
import java.util.function.BiPredicate;

/* Created by pourya on 2024-07-14*/
public class BlameDiffer {

private final EnumSet<BlamerFactory> blamerFactories;
private EnumMap<BlamerFactory, List<LineBlameResult>> blameResults;
private List<String> content;
private Predicate<Integer> emptyLinesCondition;
private Repository repository;
private String commitId;
private String filePath;
private final Map<Integer, CodeElement> codeElementMap = new LinkedHashMap<>();
private int legitSize;
protected final EnumSet<BlamerFactory> blamerFactories;
protected LineNumberToCommitIDRecordManager benchmarkRecordManager;
protected BiPredicate<Integer, List<String>> emptyLinesCondition = (lineNumber, content) -> content.get(lineNumber-1).trim().isEmpty();

public BlameDiffer(EnumSet<BlamerFactory> blamerFactories){
this.blamerFactories = blamerFactories;
Expand All @@ -38,30 +32,37 @@ private EnumMap<BlamerFactory, List<LineBlameResult>> runBlamers(Repository repo
return results;
}

private static void verify(EnumMap<BlamerFactory, List<LineBlameResult>> blameResults) {
if (blameResults.size() != 2)
protected boolean verify(EnumMap<BlamerFactory, List<LineBlameResult>> results) {
if (results.size() != 2)
throw new RuntimeException("BlameDiffer only works with two blamers");
return true;
}

public Map<Integer, EnumMap<BlamerFactory, String>> diff(Repository repository, String commitId, String filePath) throws Exception {
this.repository = repository;
this.commitId = commitId;
this.filePath = filePath;
this.blameResults = runBlamers(repository, commitId, filePath);
this.content = Utils.getFileContentByCommit(repository, commitId, filePath);
this.emptyLinesCondition = lineNumber -> content.get(lineNumber-1).trim().isEmpty();
verify(blameResults);
LineNumberToCommitIDRecordManager benchmarkRecordManager = new LineNumberToCommitIDRecordManager();
public final BlameDifferResult diff(Repository repository, String commitId, String filePath) throws Exception {
EnumMap<BlamerFactory, List<LineBlameResult>> blameResults = prepareResults(repository, commitId, filePath);
List<String> content = Utils.getFileContentByCommit(repository, commitId, filePath);
benchmarkRecordManager = new LineNumberToCommitIDRecordManager();
benchmarkRecordManager.diff(blameResults);
Map<Integer, EnumMap<BlamerFactory, String>> table = benchmarkRecordManager.getRegistry();
table.entrySet().removeIf(entry -> emptyLinesCondition.test(entry.getKey()));
legitSize = table.size();
table.entrySet().removeIf(entry -> emptyLinesCondition.test(entry.getKey(), content));
int legitSize = table.size();
table = process(repository, commitId, filePath, table);
return new BlameDifferResult(table, makeCodeElementMap(table.keySet(), repository, commitId, filePath), legitSize);
}

protected Map<Integer, EnumMap<BlamerFactory, String>> process(Repository repository, String commitId, String filePath, Map<Integer, EnumMap<BlamerFactory, String>> table) {
table.entrySet().removeIf(entry -> entry.getValue().values().stream().distinct().count() == 1);
makeCodeElementMap(table.keySet());
return table;
}

void makeCodeElementMap(Set<Integer> lineNumbers){
private EnumMap<BlamerFactory, List<LineBlameResult>> prepareResults(Repository repository, String commitId, String filePath) throws Exception {
EnumMap<BlamerFactory, List<LineBlameResult>> blameResults = runBlamers(repository, commitId, filePath);
verify(blameResults);
return blameResults;
}

Map<Integer, CodeElement> makeCodeElementMap(Set<Integer> lineNumbers, Repository repository, String commitId, String filePath){
Map<Integer, CodeElement> codeElementMap = new LinkedHashMap<>();
for (Integer lineNumber : lineNumbers) {
try {
codeElementMap.put(lineNumber,
Expand All @@ -70,10 +71,6 @@ void makeCodeElementMap(Set<Integer> lineNumbers){
throw new RuntimeException(e);
}
}
}

public Map<Integer, CodeElement> getCodeElementMap() {
return codeElementMap;
}
public int getLegitSize() {return legitSize;}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@ public class BlameDifferDriver {
private static final EnumSet<BlamerFactory> blamerFactories =
EnumSet.of(
BlamerFactory.CliGitBlameIgnoringWhiteSpace,
BlamerFactory.CliGitBlameDefault,
BlamerFactory.FileTrackerBlame
);

private static final BlameDiffer blameDiffer = new BlameDifferOneWithMany(blamerFactories, BlamerFactory.FileTrackerBlame);

private static final String[][] dummies = {
{"https://github.com/checkstyle/checkstyle/commit/119fd4fb33bef9f5c66fc950396669af842c21a3", "src/main/java/com/puppycrawl/tools/checkstyle/Checker.java"},
{"https://github.com/javaparser/javaparser/commit/97555053af3025556efe1a168fd7943dac28a2a6", "javaparser-core/src/main/java/com/github/javaparser/printer/lexicalpreservation/Difference.java"},
Expand Down Expand Up @@ -59,10 +63,10 @@ public static void process(String url, String filePath, StatsCollector statsColl
String project = getProject(url);
String ownerSlashProject = owner + "/" + project;
Repository repository = gitService.cloneIfNotExists(REPOS_PATH + "/" + ownerSlashProject, URLHelper.getRepo(url));
BlameDiffer blameDiffer = new BlameDiffer(blamerFactories);
Map<Integer, EnumMap<BlamerFactory, String>> result = blameDiffer.diff(repository, commitId, filePath);
statsCollector.process(result, blameDiffer.getLegitSize(), blameDiffer.getCodeElementMap());
new CsvWriter(owner, project, commitId, filePath, blameDiffer.getCodeElementMap()).writeToCSV(result);
BlameDifferResult blameDifferResult = blameDiffer.diff(repository, commitId, filePath);
Map<Integer, EnumMap<BlamerFactory, String>> result = blameDifferResult.getTable();
statsCollector.process(result, blameDifferResult.getLegitSize(), blameDifferResult.getCodeElementMap());
new CsvWriter(owner, project, commitId, filePath, blameDifferResult.getCodeElementMap()).writeToCSV(result);
statsCollector.writeInfo();
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.codetracker.blame.benchmark;

import org.codetracker.blame.model.LineBlameResult;
import org.eclipse.jgit.lib.Repository;

import java.util.EnumMap;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

/* Created by pourya on 2024-09-03*/
public class BlameDifferOneWithMany extends BlameDiffer {

protected final BlamerFactory subject;
public BlameDifferOneWithMany(EnumSet<BlamerFactory> blamerFactories, BlamerFactory subject) {
super(blamerFactories);
this.subject = subject;
}

@Override
protected Map<Integer, EnumMap<BlamerFactory, String>> process(Repository repository, String commitId, String filePath, Map<Integer, EnumMap<BlamerFactory, String>> table) {
table.entrySet().removeIf(entry -> {
EnumMap<BlamerFactory, String> factories = entry.getValue();
String subject_value = factories.get(subject);
return factories.values().stream().filter(value -> value.equals(subject_value)).count() > 1;
});
return table;
}

@Override
protected boolean verify(EnumMap<BlamerFactory, List<LineBlameResult>> results) {
return true; //TODO:
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.codetracker.blame.benchmark;

import org.codetracker.api.CodeElement;

import java.util.EnumMap;
import java.util.Map;

public class BlameDifferResult{
private final Map<Integer, EnumMap<BlamerFactory, String>> table;
private final Map<Integer, CodeElement> codeElementMap;
private final int legitSize;

public BlameDifferResult(Map<Integer, EnumMap<BlamerFactory, String>> table, Map<Integer, CodeElement> codeElementMap, int legitSize) {
this.table = table;
this.codeElementMap = codeElementMap;
this.legitSize = legitSize;
}

public Map<Integer, EnumMap<BlamerFactory, String>> getTable() {
return table;
}

public Map<Integer, CodeElement> getCodeElementMap() {
return codeElementMap;
}

public int getLegitSize() {
return legitSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public enum BlamerFactory {

JGitBlameWithFollow(new JGitBlame()),
CliGitBlameIgnoringWhiteSpace(new CliGitBlame(true)),
CliGitBlameDefault(new CliGitBlame()),
CliGitBlameDefault(new CliGitBlame(false)),
CodeTrackerBlame(new CodeTrackerBlame()),
FileTrackerBlame(new FileTrackerBlame());
private final IBlame blamer;
Expand Down
110 changes: 53 additions & 57 deletions src/main/java/org/codetracker/blame/impl/CliGitBlame.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
/* Created by pourya on 2024-08-22*/
public class CliGitBlame implements IBlame {
private final boolean ignore_whitespace;

public CliGitBlame() {
this(false);
}
public CliGitBlame(boolean ignore_whitespace) {
this.ignore_whitespace = ignore_whitespace;
}
Expand All @@ -29,12 +25,11 @@ public List<LineBlameResult> blameFile(Repository repository, String commitId, S
Process process = null;

try {
// Construct the git blame command with the commit and file path
String[] command;
if (ignore_whitespace) {
command = new String[]{"git", "blame", "-w", commitId, "--", filePath};
command = new String[]{"git", "blame", "-n", "-w", "--follow", commitId, "--", filePath};
} else {
command = new String[]{"git", "blame", commitId, "--", filePath};
command = new String[]{"git", "blame", "-n", "--follow", commitId, "--", filePath};
}
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.directory(repository.getDirectory());
Expand All @@ -46,58 +41,9 @@ public List<LineBlameResult> blameFile(Repository repository, String commitId, S
String line;
int lineNumber = 1;
while ((line = reader.readLine()) != null) {
if (line.charAt(0) == '^') {
line = line.substring(1);
}
// Extract commitId, committer, commitDate, filePath, and beforeFilePath from the blame line
String[] parts = line.split("\\s+", 3);
String blameCommitId = parts[0].trim(); // Extract the commit ID
String prevFilePath = parts[1].trim();

// Find the index of the first space to separate the number
int firstSpaceIndex = parts[2].indexOf(" ");
int resultLineNumber = -1;
try {
resultLineNumber = Integer.parseInt(parts[2].substring(0, firstSpaceIndex));
}
catch (NumberFormatException e) {

}

parts[2] = parts[2].substring(firstSpaceIndex + 1);

String[] meta = parts[2].split("\\s{2,}"); // Split on at least 2 spaces
String commiter = "";
long commitTime = 0;

// Regex to capture the timestamp
Pattern pattern = Pattern.compile("(.*?)(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]\\d{4})");
Matcher matcher = pattern.matcher(parts[2]);

if (matcher.find()) {
// Extract the committer and timestamp
commiter = matcher.group(1).trim(); // Everything before the timestamp
commiter = commiter.substring(1); // Remove the parentheses
String timestamp = matcher.group(2).trim(); // Timestamp


// Define the formatter for the timestamp
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss Z");

// Parse the timestamp string
OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestamp, formatter);

// Convert to Instant
Instant instant = offsetDateTime.toInstant();
commitTime = instant.toEpochMilli() / 1000;
}
LineBlameResult result = new LineBlameResult(blameCommitId, filePath, prevFilePath, commiter, commitTime, resultLineNumber, lineNumber);
blameResults.add(result);


blameResults.add(getLineBlameResult(line, filePath, lineNumber));
lineNumber++;
}

int exitCode = process.waitFor();
if (exitCode != 0) {
throw new Exception("Error executing git blame command");
Expand All @@ -111,6 +57,56 @@ public List<LineBlameResult> blameFile(Repository repository, String commitId, S
return blameResults;
}

private static LineBlameResult getLineBlameResult(String line, String filePath, int lineNumber) {
if (line.charAt(0) == '^') {
line = line.substring(1);
}
// Extract commitId, committer, commitDate, filePath, and beforeFilePath from the blame line
String[] parts = line.split("\\s+", 3);
String blameCommitId = parts[0].trim(); // Extract the commit ID
String prevFilePath = parts[1].trim();

// Find the index of the first space to separate the number
int firstSpaceIndex = parts[2].indexOf(" ");
int resultLineNumber = -1;
try {
resultLineNumber = Integer.parseInt(parts[2].substring(0, firstSpaceIndex));
parts[2] = parts[2].substring(firstSpaceIndex + 1);
}
catch (NumberFormatException e) {
// System.out.println("Error parsing line number: " + parts[2].substring(0, firstSpaceIndex));
parts[2] = parts[2].substring(firstSpaceIndex + 1);
}


String[] meta = parts[2].split("\\s{2,}"); // Split on at least 2 spaces
String commiter = "";
long commitTime = 0;

// Regex to capture the timestamp
Pattern pattern = Pattern.compile("(.*?)(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]\\d{4})");
Matcher matcher = pattern.matcher(parts[2]);

if (matcher.find()) {
// Extract the committer and timestamp
commiter = matcher.group(1).trim(); // Everything before the timestamp
commiter = commiter.substring(1); // Remove the parentheses
String timestamp = matcher.group(2).trim(); // Timestamp


// Define the formatter for the timestamp
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss Z");

// Parse the timestamp string
OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestamp, formatter);

// Convert to Instant
Instant instant = offsetDateTime.toInstant();
commitTime = instant.toEpochMilli() / 1000;
}
return new LineBlameResult(blameCommitId, filePath, prevFilePath, commiter, commitTime, resultLineNumber, lineNumber);
}

@Override
public List<LineBlameResult> blameFile(Repository repository, String commitId, String filePath, int fromLine, int toLine) throws Exception {
// Implement logic to blame a specific range of lines in a file
Expand Down

0 comments on commit 1185982

Please sign in to comment.