Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blame benchmark #188

Merged
merged 4 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 26 additions & 29 deletions src/main/java/org/codetracker/blame/benchmark/BlameDiffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,14 @@
import org.eclipse.jgit.lib.Repository;

import java.util.*;
import java.util.function.Predicate;
import java.util.function.BiPredicate;

/* Created by pourya on 2024-07-14*/
public class BlameDiffer {

private final EnumSet<BlamerFactory> blamerFactories;
private EnumMap<BlamerFactory, List<LineBlameResult>> blameResults;
private List<String> content;
private Predicate<Integer> emptyLinesCondition;
private Repository repository;
private String commitId;
private String filePath;
private final Map<Integer, CodeElement> codeElementMap = new LinkedHashMap<>();
private int legitSize;
protected final EnumSet<BlamerFactory> blamerFactories;
protected LineNumberToCommitIDRecordManager benchmarkRecordManager;
protected BiPredicate<Integer, List<String>> emptyLinesCondition = (lineNumber, content) -> content.get(lineNumber-1).trim().isEmpty();

public BlameDiffer(EnumSet<BlamerFactory> blamerFactories){
this.blamerFactories = blamerFactories;
Expand All @@ -38,30 +32,37 @@ private EnumMap<BlamerFactory, List<LineBlameResult>> runBlamers(Repository repo
return results;
}

private static void verify(EnumMap<BlamerFactory, List<LineBlameResult>> blameResults) {
if (blameResults.size() != 2)
protected boolean verify(EnumMap<BlamerFactory, List<LineBlameResult>> results) {
if (results.size() != 2)
throw new RuntimeException("BlameDiffer only works with two blamers");
return true;
}

public Map<Integer, EnumMap<BlamerFactory, String>> diff(Repository repository, String commitId, String filePath) throws Exception {
this.repository = repository;
this.commitId = commitId;
this.filePath = filePath;
this.blameResults = runBlamers(repository, commitId, filePath);
this.content = Utils.getFileContentByCommit(repository, commitId, filePath);
this.emptyLinesCondition = lineNumber -> content.get(lineNumber-1).trim().isEmpty();
verify(blameResults);
LineNumberToCommitIDRecordManager benchmarkRecordManager = new LineNumberToCommitIDRecordManager();
public final BlameDifferResult diff(Repository repository, String commitId, String filePath) throws Exception {
EnumMap<BlamerFactory, List<LineBlameResult>> blameResults = prepareResults(repository, commitId, filePath);
List<String> content = Utils.getFileContentByCommit(repository, commitId, filePath);
benchmarkRecordManager = new LineNumberToCommitIDRecordManager();
benchmarkRecordManager.diff(blameResults);
Map<Integer, EnumMap<BlamerFactory, String>> table = benchmarkRecordManager.getRegistry();
table.entrySet().removeIf(entry -> emptyLinesCondition.test(entry.getKey()));
legitSize = table.size();
table.entrySet().removeIf(entry -> emptyLinesCondition.test(entry.getKey(), content));
int legitSize = table.size();
table = process(repository, commitId, filePath, table);
return new BlameDifferResult(table, makeCodeElementMap(table.keySet(), repository, commitId, filePath), legitSize);
}

protected Map<Integer, EnumMap<BlamerFactory, String>> process(Repository repository, String commitId, String filePath, Map<Integer, EnumMap<BlamerFactory, String>> table) {
table.entrySet().removeIf(entry -> entry.getValue().values().stream().distinct().count() == 1);
makeCodeElementMap(table.keySet());
return table;
}

void makeCodeElementMap(Set<Integer> lineNumbers){
private EnumMap<BlamerFactory, List<LineBlameResult>> prepareResults(Repository repository, String commitId, String filePath) throws Exception {
EnumMap<BlamerFactory, List<LineBlameResult>> blameResults = runBlamers(repository, commitId, filePath);
verify(blameResults);
return blameResults;
}

Map<Integer, CodeElement> makeCodeElementMap(Set<Integer> lineNumbers, Repository repository, String commitId, String filePath){
Map<Integer, CodeElement> codeElementMap = new LinkedHashMap<>();
for (Integer lineNumber : lineNumbers) {
try {
codeElementMap.put(lineNumber,
Expand All @@ -70,10 +71,6 @@ void makeCodeElementMap(Set<Integer> lineNumbers){
throw new RuntimeException(e);
}
}
}

public Map<Integer, CodeElement> getCodeElementMap() {
return codeElementMap;
}
public int getLegitSize() {return legitSize;}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@ public class BlameDifferDriver {
private static final String REPOS_PATH = System.getProperty("user.dir") + "/tmp";
private static final EnumSet<BlamerFactory> blamerFactories =
EnumSet.of(
BlamerFactory.CliGitBlame,
BlamerFactory.CliGitBlameIgnoringWhiteSpace,
BlamerFactory.CliGitBlameDefault,
BlamerFactory.FileTrackerBlame
);

private static final BlameDiffer blameDiffer = new BlameDifferOneWithMany(blamerFactories, BlamerFactory.FileTrackerBlame);

private static final String[][] dummies = {
{"https://github.com/checkstyle/checkstyle/commit/119fd4fb33bef9f5c66fc950396669af842c21a3", "src/main/java/com/puppycrawl/tools/checkstyle/Checker.java"},
{"https://github.com/javaparser/javaparser/commit/97555053af3025556efe1a168fd7943dac28a2a6", "javaparser-core/src/main/java/com/github/javaparser/printer/lexicalpreservation/Difference.java"},
Expand Down Expand Up @@ -59,10 +63,10 @@ public static void process(String url, String filePath, StatsCollector statsColl
String project = getProject(url);
String ownerSlashProject = owner + "/" + project;
Repository repository = gitService.cloneIfNotExists(REPOS_PATH + "/" + ownerSlashProject, URLHelper.getRepo(url));
BlameDiffer blameDiffer = new BlameDiffer(blamerFactories);
Map<Integer, EnumMap<BlamerFactory, String>> result = blameDiffer.diff(repository, commitId, filePath);
statsCollector.process(result, blameDiffer.getLegitSize(), blameDiffer.getCodeElementMap());
new CsvWriter(owner, project, commitId, filePath, blameDiffer.getCodeElementMap()).writeToCSV(result);
BlameDifferResult blameDifferResult = blameDiffer.diff(repository, commitId, filePath);
Map<Integer, EnumMap<BlamerFactory, String>> result = blameDifferResult.getTable();
statsCollector.process(result, blameDifferResult.getLegitSize(), blameDifferResult.getCodeElementMap());
new CsvWriter(owner, project, commitId, filePath, blameDifferResult.getCodeElementMap()).writeToCSV(result);
statsCollector.writeInfo();
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.codetracker.blame.benchmark;

import org.codetracker.blame.model.LineBlameResult;
import org.eclipse.jgit.lib.Repository;

import java.util.EnumMap;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

/* Created by pourya on 2024-09-03*/
public class BlameDifferOneWithMany extends BlameDiffer {

protected final BlamerFactory subject;
public BlameDifferOneWithMany(EnumSet<BlamerFactory> blamerFactories, BlamerFactory subject) {
super(blamerFactories);
this.subject = subject;
}

@Override
protected Map<Integer, EnumMap<BlamerFactory, String>> process(Repository repository, String commitId, String filePath, Map<Integer, EnumMap<BlamerFactory, String>> table) {
table.entrySet().removeIf(entry -> {
EnumMap<BlamerFactory, String> factories = entry.getValue();
String subject_value = factories.get(subject);
return factories.values().stream().filter(value -> value.equals(subject_value)).count() > 1;
});
return table;
}

@Override
protected boolean verify(EnumMap<BlamerFactory, List<LineBlameResult>> results) {
return true; //TODO:
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package org.codetracker.blame.benchmark;

import org.codetracker.api.CodeElement;

import java.util.EnumMap;
import java.util.Map;

public class BlameDifferResult{
private final Map<Integer, EnumMap<BlamerFactory, String>> table;
private final Map<Integer, CodeElement> codeElementMap;
private final int legitSize;

public BlameDifferResult(Map<Integer, EnumMap<BlamerFactory, String>> table, Map<Integer, CodeElement> codeElementMap, int legitSize) {
this.table = table;
this.codeElementMap = codeElementMap;
this.legitSize = legitSize;
}

public Map<Integer, EnumMap<BlamerFactory, String>> getTable() {
return table;
}

public Map<Integer, CodeElement> getCodeElementMap() {
return codeElementMap;
}

public int getLegitSize() {
return legitSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
public enum BlamerFactory {

JGitBlameWithFollow(new JGitBlame()),
CliGitBlame(new CliGitBlame()),
CliGitBlameIgnoringWhiteSpace(new CliGitBlame(true)),
CliGitBlameDefault(new CliGitBlame(false)),
CodeTrackerBlame(new CodeTrackerBlame()),
FileTrackerBlame(new FileTrackerBlame());
private final IBlame blamer;
Expand Down
114 changes: 61 additions & 53 deletions src/main/java/org/codetracker/blame/impl/CliGitBlame.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,22 @@

/* Created by pourya on 2024-08-22*/
public class CliGitBlame implements IBlame {

private final boolean ignore_whitespace;
public CliGitBlame(boolean ignore_whitespace) {
this.ignore_whitespace = ignore_whitespace;
}
@Override
public List<LineBlameResult> blameFile(Repository repository, String commitId, String filePath) throws Exception {
List<LineBlameResult> blameResults = new ArrayList<>();
Process process = null;

try {
// Construct the git blame command with the commit and file path
String[] command = {"git", "blame", "-n", "-w", "--follow", commitId, "--", filePath};
String[] command;
if (ignore_whitespace) {
command = new String[]{"git", "blame", "-n", "-w", "--follow", commitId, "--", filePath};
} else {
command = new String[]{"git", "blame", "-n", "--follow", commitId, "--", filePath};
}
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.directory(repository.getDirectory());

Expand All @@ -34,58 +41,9 @@ public List<LineBlameResult> blameFile(Repository repository, String commitId, S
String line;
int lineNumber = 1;
while ((line = reader.readLine()) != null) {
if (line.charAt(0) == '^') {
line = line.substring(1);
}
// Extract commitId, committer, commitDate, filePath, and beforeFilePath from the blame line
String[] parts = line.split("\\s+", 3);
String blameCommitId = parts[0].trim(); // Extract the commit ID
String prevFilePath = parts[1].trim();

// Find the index of the first space to separate the number
int firstSpaceIndex = parts[2].indexOf(" ");
int resultLineNumber = -1;
try {
resultLineNumber = Integer.parseInt(parts[2].substring(0, firstSpaceIndex));
}
catch (NumberFormatException e) {

}

parts[2] = parts[2].substring(firstSpaceIndex + 1);

String[] meta = parts[2].split("\\s{2,}"); // Split on at least 2 spaces
String commiter = "";
long commitTime = 0;

// Regex to capture the timestamp
Pattern pattern = Pattern.compile("(.*?)(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]\\d{4})");
Matcher matcher = pattern.matcher(parts[2]);

if (matcher.find()) {
// Extract the committer and timestamp
commiter = matcher.group(1).trim(); // Everything before the timestamp
commiter = commiter.substring(1); // Remove the parentheses
String timestamp = matcher.group(2).trim(); // Timestamp


// Define the formatter for the timestamp
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss Z");

// Parse the timestamp string
OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestamp, formatter);

// Convert to Instant
Instant instant = offsetDateTime.toInstant();
commitTime = instant.toEpochMilli() / 1000;
}
LineBlameResult result = new LineBlameResult(blameCommitId, filePath, prevFilePath, commiter, commitTime, resultLineNumber, lineNumber);
blameResults.add(result);


blameResults.add(getLineBlameResult(line, filePath, lineNumber));
lineNumber++;
}

int exitCode = process.waitFor();
if (exitCode != 0) {
throw new Exception("Error executing git blame command");
Expand All @@ -99,6 +57,56 @@ public List<LineBlameResult> blameFile(Repository repository, String commitId, S
return blameResults;
}

private static LineBlameResult getLineBlameResult(String line, String filePath, int lineNumber) {
if (line.charAt(0) == '^') {
line = line.substring(1);
}
// Extract commitId, committer, commitDate, filePath, and beforeFilePath from the blame line
String[] parts = line.split("\\s+", 3);
String blameCommitId = parts[0].trim(); // Extract the commit ID
String prevFilePath = parts[1].trim();

// Find the index of the first space to separate the number
int firstSpaceIndex = parts[2].indexOf(" ");
int resultLineNumber = -1;
try {
resultLineNumber = Integer.parseInt(parts[2].substring(0, firstSpaceIndex));
parts[2] = parts[2].substring(firstSpaceIndex + 1);
}
catch (NumberFormatException e) {
// System.out.println("Error parsing line number: " + parts[2].substring(0, firstSpaceIndex));
parts[2] = parts[2].substring(firstSpaceIndex + 1);
}


String[] meta = parts[2].split("\\s{2,}"); // Split on at least 2 spaces
String commiter = "";
long commitTime = 0;

// Regex to capture the timestamp
Pattern pattern = Pattern.compile("(.*?)(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} [+-]\\d{4})");
Matcher matcher = pattern.matcher(parts[2]);

if (matcher.find()) {
// Extract the committer and timestamp
commiter = matcher.group(1).trim(); // Everything before the timestamp
commiter = commiter.substring(1); // Remove the parentheses
String timestamp = matcher.group(2).trim(); // Timestamp


// Define the formatter for the timestamp
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss Z");

// Parse the timestamp string
OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestamp, formatter);

// Convert to Instant
Instant instant = offsetDateTime.toInstant();
commitTime = instant.toEpochMilli() / 1000;
}
return new LineBlameResult(blameCommitId, filePath, prevFilePath, commiter, commitTime, resultLineNumber, lineNumber);
}

@Override
public List<LineBlameResult> blameFile(Repository repository, String commitId, String filePath, int fromLine, int toLine) throws Exception {
// Implement logic to blame a specific range of lines in a file
Expand Down
27 changes: 27 additions & 0 deletions src/test/java/org/codetracker/blame/CliBlameTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.codetracker.blame;

import org.codetracker.blame.impl.CliGitBlame;
import org.junit.jupiter.api.Test;
import org.refactoringminer.api.GitService;
import org.refactoringminer.util.GitServiceImpl;

import static org.codetracker.blame.CodeTrackerBlameTest.assertEqualWithFile;
import static org.codetracker.blame.util.Utils.*;

/* Created by pourya on 2024-08-22*/
public class CliBlameTest {
private static final GitService gitService = new GitServiceImpl();
private final String REPOS_PATH = System.getProperty("user.dir") + "/tmp";

@Test
public void testCliGitBlame() throws Exception {
String url = "https://github.com/hibernate/hibernate-orm/commit/9e063ffa2";
String path = "hibernate-core/src/main/java/org/hibernate/cfg/AnnotationBinder.java";
String iw = getBlameOutput(url, path, new CliGitBlame(true), REPOS_PATH, gitService);
String def = getBlameOutput(url, path, new CliGitBlame(false), REPOS_PATH, gitService);
String expected_iw = System.getProperty("user.dir") + "/src/test/resources/blame/gitcli/9e063ffa2_cgit_iw.txt";
String expected_def = System.getProperty("user.dir") + "/src/test/resources/blame/gitcli/9e063ffa2_cgit_def.txt";
assertEqualWithFile(expected_iw,iw);
assertEqualWithFile(expected_def,def);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public void testBlameLineRangeWithLocalRepo() throws Exception {
assertEqualWithFile(expectedFilePath, actual);
}

private void assertEqualWithFile(String expectedResultPath, String actual) throws IOException {
public static void assertEqualWithFile(String expectedResultPath, String actual) throws IOException {
String expected = IOUtils.toString(
new FileInputStream(expectedResultPath),
StandardCharsets.UTF_8
Expand Down
Loading
Loading