Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Programming exercises: Remove unnecessary binary files from template and solution repository comparison #10091

Open
wants to merge 12 commits into
base: develop
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public PyrisProgrammingExerciseDTO toPyrisProgrammingExerciseDTO(ProgrammingExer
catch (GitAPIException e) {
log.error("Could not fetch existing test repository", e);
}
var testsRepositoryContents = testRepo.map(repositoryService::getFilesContentFromWorkingCopy).orElse(Map.of());
var testsRepositoryContents = testRepo.map((Repository repository) -> repositoryService.getFilesContentFromWorkingCopy(repository, false)).orElse(Map.of());

return new PyrisProgrammingExerciseDTO(exercise.getId(), exercise.getTitle(), exercise.getProgrammingLanguage(), templateRepositoryContents, solutionRepositoryContents,
testsRepositoryContents, exercise.getProblemStatement(), toInstant(exercise.getReleaseDate()), toInstant(exercise.getDueDate()));
Expand Down Expand Up @@ -157,7 +157,8 @@ private Map<String, String> getRepositoryContents(ProgrammingExerciseParticipati
}).orElse(Map.of());
}
else {
return Optional.ofNullable(gitService.getOrCheckoutRepository(repositoryUri, true)).map(repositoryService::getFilesContentFromWorkingCopy).orElse(Map.of());
return Optional.ofNullable(gitService.getOrCheckoutRepository(repositoryUri, true)).map(repo -> repositoryService.getFilesContentFromWorkingCopy(repo, false))
.orElse(Map.of());
}
}
catch (GitAPIException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ public class GitService extends AbstractGitService {

private static final Logger log = LoggerFactory.getLogger(GitService.class);

private static final List<String> BINARY_EXTENSIONS = List.of(".exe", ".jar", ".dll", ".so", ".class", ".bin", ".msi");

private final ProfileService profileService;

@Value("${artemis.version-control.local-vcs-repo-path:#{null}}")
Expand Down Expand Up @@ -1022,31 +1024,39 @@ public boolean accept(java.io.File directory, String fileName) {
* Note: This method does not handle changes to the repository content between invocations. If files change
* after the initial caching, the cache does not automatically refresh, which may lead to stale data.
*
* @param repo The repository to scan for files and directories.
* @param repo The repository to scan for files and directories.
* @param omitBinaries do not include binaries to reduce payload size
* @return A {@link Map} where each key is a {@link File} object representing a file or directory, and each value is
* the corresponding {@link FileType} (FILE or FOLDER). The map excludes symbolic links.
*/
public Map<File, FileType> listFilesAndFolders(Repository repo) {
public Map<File, FileType> listFilesAndFolders(Repository repo, boolean omitBinaries) {
FileAndDirectoryFilter filter = new FileAndDirectoryFilter();

Iterator<java.io.File> itr = FileUtils.iterateFilesAndDirs(repo.getLocalPath().toFile(), filter, filter);
Map<File, FileType> files = new HashMap<>();

while (itr.hasNext()) {
File nextFile = new File(itr.next(), repo);
Path nextPath = nextFile.toPath();

// filter out symlinks
if (Files.isSymbolicLink(nextPath)) {
log.warn("Found a symlink {} in the git repository {}. Do not allow access!", nextPath, repo);
continue;
}

if (omitBinaries && nextFile.isFile() && BINARY_EXTENSIONS.stream().anyMatch(nextFile.getName()::endsWith)) {
log.debug("Omitting binary file: {}", nextFile);
continue;
}

files.put(nextFile, nextFile.isFile() ? FileType.FILE : FileType.FOLDER);
}
return files;
}

public Map<File, FileType> listFilesAndFolders(Repository repo) {
return listFilesAndFolders(repo, false);
}

/**
* List all files in the repository. In an empty git repo, this method returns 0.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public Map<String, String> getFilesContentAtCommit(ProgrammingExercise programmi
repository = gitService.checkoutRepositoryAtCommit(participation.getVcsRepositoryUri(), commitId, true);
}
// Get the files content from the working copy of the repository
Map<String, String> filesWithContent = getFilesContentFromWorkingCopy(repository);
Map<String, String> filesWithContent = getFilesContentFromWorkingCopy(repository, false);
// Switch back to the default branch head
gitService.switchBackToDefaultBranchHead(repository);
return filesWithContent;
Expand All @@ -152,12 +152,13 @@ public Map<String, String> getFilesContentAtCommit(ProgrammingExercise programmi
* This method filters out all non-file type entries and reads the content of each file.
* Note: If an I/O error occurs reading any file, this exception is caught internally and logged.
*
* @param repository The repository from which files are to be fetched.
* @param repository The repository from which files are to be fetched.
* @param omitBinaries omit binary files for brevity and reducing size
* @return A {@link Map} where each key is a file path (as a {@link String}) and each value is the content of the file (also as a {@link String}).
* The map includes only those files that could successfully have their contents read; files that cause an IOException are logged but not included.
*/
public Map<String, String> getFilesContentFromWorkingCopy(Repository repository) {
var files = gitService.listFilesAndFolders(repository).entrySet().stream().filter(entry -> entry.getValue() == FileType.FILE).map(Map.Entry::getKey).toList();
public Map<String, String> getFilesContentFromWorkingCopy(Repository repository, boolean omitBinaries) {
var files = gitService.listFilesAndFolders(repository, omitBinaries).entrySet().stream().filter(entry -> entry.getValue() == FileType.FILE).map(Map.Entry::getKey).toList();
Map<String, String> fileListWithContent = new HashMap<>();

files.forEach(file -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ private Map<String, Integer> getLineCountByFilePath(ProgrammingSubmission submis
var solutionRepo = gitService.getOrCheckoutRepository(solutionParticipation.getVcsRepositoryUri(), true);
gitService.resetToOriginHead(solutionRepo);
gitService.pullIgnoreConflicts(solutionRepo);
var solutionFiles = repositoryService.getFilesContentFromWorkingCopy(solutionRepo);
var solutionFiles = repositoryService.getFilesContentFromWorkingCopy(solutionRepo, false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we could overload the method with a list of argument that only contains the Repository? So we can use repositoryService.getFilesFromWorkingCopy(solutionRepo)?

var result = new HashMap<String, Integer>();
solutionFiles.forEach((filePath, value) -> {
// do not count lines for non-java/kotlin files
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ private Map<String, String> readSolutionRepo(ProgrammingExercise programmingExer
gitService.resetToOriginHead(solutionRepo);
gitService.pullIgnoreConflicts(solutionRepo);

return repositoryService.getFilesContentFromWorkingCopy(solutionRepo);
return repositoryService.getFilesContentFromWorkingCopy(solutionRepo, false);
}
catch (GitAPIException e) {
throw new BehavioralSolutionEntryGenerationException("Error while reading solution repository", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -818,21 +818,23 @@ public ResponseEntity<Void> deleteTaskWithSolutionEntries(@PathVariable Long exe
* Note: This endpoint redirects the request to the ProgrammingExerciseParticipationService. This is required if
* the solution participation id is not known for the client.
*
* @param exerciseId the exercise for which the solution repository files should be retrieved
* @param exerciseId the exercise for which the solution repository files should be retrieved
* @param omitBinaries do not send binaries to reduce payload size
* @return a redirect to the endpoint returning the files with content
*/
@GetMapping("programming-exercises/{exerciseId}/solution-files-content")
@EnforceAtLeastTutor
@FeatureToggle(Feature.ProgrammingExercises)
public ModelAndView redirectGetSolutionRepositoryFiles(@PathVariable Long exerciseId) {
public ModelAndView redirectGetSolutionRepositoryFiles(@PathVariable Long exerciseId,
@RequestParam(value = "omitBinaries", required = false, defaultValue = "false") boolean omitBinaries) {
log.debug("REST request to get latest Solution Repository Files for ProgrammingExercise with id : {}", exerciseId);
ProgrammingExercise exercise = programmingExerciseRepository.findByIdElseThrow(exerciseId);
authCheckService.checkHasAtLeastRoleForExerciseElseThrow(Role.TEACHING_ASSISTANT, exercise, null);

var participation = solutionProgrammingExerciseParticipationRepository.findByProgrammingExerciseIdElseThrow(exerciseId);

// TODO: We want to get rid of ModelAndView and use ResponseEntity instead. Define an appropriate service method and then call it here and in the referenced endpoint.
return new ModelAndView("forward:/api/repository/" + participation.getId() + "/files-content");
return new ModelAndView("forward:/api/repository/" + participation.getId() + "/files-content" + "?omitBinaries=" + omitBinaries);
}

/**
Expand All @@ -842,21 +844,23 @@ public ModelAndView redirectGetSolutionRepositoryFiles(@PathVariable Long exerci
* Note: This endpoint redirects the request to the ProgrammingExerciseParticipationService. This is required if
* the template participation id is not known for the client.
*
* @param exerciseId the exercise for which the template repository files should be retrieved
* @param exerciseId the exercise for which the template repository files should be retrieved
* @param omitBinaries do not send binaries to reduce payload size
* @return a redirect to the endpoint returning the files with content
*/
@GetMapping("programming-exercises/{exerciseId}/template-files-content")
@EnforceAtLeastTutor
@FeatureToggle(Feature.ProgrammingExercises)
public ModelAndView redirectGetTemplateRepositoryFiles(@PathVariable Long exerciseId) {
public ModelAndView redirectGetTemplateRepositoryFiles(@PathVariable Long exerciseId,
@RequestParam(value = "omitBinaries", required = false, defaultValue = "false") boolean omitBinaries) {
log.debug("REST request to get latest Template Repository Files for ProgrammingExercise with id : {}", exerciseId);
ProgrammingExercise exercise = programmingExerciseRepository.findByIdElseThrow(exerciseId);
authCheckService.checkHasAtLeastRoleForExerciseElseThrow(Role.TEACHING_ASSISTANT, exercise, null);

var participation = templateProgrammingExerciseParticipationRepository.findByProgrammingExerciseIdElseThrow(exerciseId);

// TODO: We want to get rid of ModelAndView and use ResponseEntity instead. Define an appropriate service method and then call it here and in the referenced endpoint.
return new ModelAndView("forward:/api/repository/" + participation.getId() + "/files-content");
return new ModelAndView("forward:/api/repository/" + participation.getId() + "/files-content" + (omitBinaries ? "?omitBinaries=" + omitBinaries : ""));
AjayvirS marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,16 @@ public ResponseEntity<byte[]> getFileForPlagiarismView(@PathVariable Long partic
* Gets the files of the repository with content
*
* @param participationId participation of the student/template/solution
* @param omitBinaries do not send binaries to reduce payload size
* @return the ResponseEntity with status 200 (OK) and a map of files with their content
*/
@GetMapping(value = "repository/{participationId}/files-content", produces = MediaType.APPLICATION_JSON_VALUE)
@EnforceAtLeastTutor
public ResponseEntity<Map<String, String>> getFilesWithContent(@PathVariable Long participationId) {
public ResponseEntity<Map<String, String>> getFilesWithContent(@PathVariable Long participationId,
@RequestParam(value = "omitBinaries", required = false, defaultValue = "false") boolean omitBinaries) {
return super.executeAndCheckForExceptions(() -> {
Repository repository = getRepository(participationId, RepositoryActionType.READ, true);
var filesWithContent = super.repositoryService.getFilesContentFromWorkingCopy(repository);
var filesWithContent = super.repositoryService.getFilesContentFromWorkingCopy(repository, omitBinaries);
return new ResponseEntity<>(filesWithContent, HttpStatus.OK);
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,7 @@ export class ProgrammingExerciseService {
* Gets all files from the last solution participation repository
*/
getSolutionRepositoryTestFilesWithContent(exerciseId: number): Observable<Map<string, string> | undefined> {
return this.http.get(`${this.resourceUrl}/${exerciseId}/solution-files-content`).pipe(
return this.http.get(`${this.resourceUrl}/${exerciseId}/solution-files-content?omitBinaries=true`).pipe(
map((res: HttpResponse<any>) => {
// this mapping is required because otherwise the HttpResponse object would be parsed
// to an arbitrary object (and not a map)
Expand All @@ -637,7 +637,7 @@ export class ProgrammingExerciseService {
* Gets all files from the last commit in the template participation repository
*/
getTemplateRepositoryTestFilesWithContent(exerciseId: number): Observable<Map<string, string> | undefined> {
return this.http.get(`${this.resourceUrl}/${exerciseId}/template-files-content`).pipe(
return this.http.get(`${this.resourceUrl}/${exerciseId}/template-files-content?omitBinaries=true`).pipe(
map((res: HttpResponse<any>) => {
// this mapping is required because otherwise the HttpResponse object would be parsed
// to an arbitrary object (and not a map)
Expand Down
Loading
Loading