Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Athena: Improve AI feedback request validation #10165

Merged
merged 10 commits into from
Jan 21, 2025
Merged
Next Next commit
Improve feedback request validation
  • Loading branch information
maximiliansoelch committed Jan 17, 2025
commit ead1d8f30745886f904e89860239ea6d6e4e868d
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;

import com.fasterxml.jackson.annotation.JsonInclude;

import de.tum.cit.aet.artemis.assessment.domain.AssessmentType;
import de.tum.cit.aet.artemis.assessment.domain.Result;
import de.tum.cit.aet.artemis.athena.dto.ExerciseBaseDTO;
import de.tum.cit.aet.artemis.athena.dto.ModelingFeedbackDTO;
import de.tum.cit.aet.artemis.athena.dto.ProgrammingFeedbackDTO;
Expand All @@ -23,6 +26,7 @@
import de.tum.cit.aet.artemis.core.domain.LLMRequest;
import de.tum.cit.aet.artemis.core.domain.LLMServiceType;
import de.tum.cit.aet.artemis.core.domain.User;
import de.tum.cit.aet.artemis.core.exception.BadRequestAlertException;
import de.tum.cit.aet.artemis.core.exception.ConflictException;
import de.tum.cit.aet.artemis.core.exception.NetworkingException;
import de.tum.cit.aet.artemis.core.service.LLMTokenUsageService;
Expand Down Expand Up @@ -58,6 +62,9 @@ public class AthenaFeedbackSuggestionsService {

private final LLMTokenUsageService llmTokenUsageService;

@Value("${artemis.athena.allowed-feedback-requests:10}")
private int allowedFeedbackRequests;

/**
* Create a new AthenaFeedbackSuggestionsService to receive feedback suggestions from the Athena service.
*
Expand Down Expand Up @@ -185,4 +192,37 @@ private void storeTokenUsage(Exercise exercise, Submission submission, ResponseM
llmTokenUsageService.saveLLMTokenUsage(llmRequests, LLMServiceType.ATHENA,
(llmTokenUsageBuilder -> llmTokenUsageBuilder.withCourse(courseId).withExercise(exercise.getId()).withUser(userId)));
}

/**
* Checks if the number of Athena results for the given participation exceeds
* the allowed threshold and throws an exception if the limit is reached.
*
* @param participation the student participation to check
* @throws BadRequestAlertException if the maximum number of Athena feedback requests is exceeded
*/
public void checkRateLimitOrThrow(StudentParticipation participation) {
List<Result> athenaResults = participation.getResults().stream().filter(result -> result.getAssessmentType() == AssessmentType.AUTOMATIC_ATHENA).toList();

long countOfSuccessfulRequests = athenaResults.stream().filter(result -> result.isSuccessful() == Boolean.TRUE).count();

if (countOfSuccessfulRequests >= this.allowedFeedbackRequests) {
throw new BadRequestAlertException("Maximum number of AI feedback requests reached.", "participation", "maxAthenaResultsReached", true);
}
}

/**
* Ensures that the submission does not already have an Athena-generated result.
* Throws an exception if Athena result already exists.
*
* @param submission the student's submission to validate
* @throws BadRequestAlertException if an Athena result is already present for the submission
*/
public void checkLatestSubmissionHasNoAthenaResultOrThrow(Submission submission) {
Result latestResult = submission.getLatestResult();

if (latestResult != null && latestResult.getAssessmentType() == AssessmentType.AUTOMATIC_ATHENA) {
log.debug("Submission ID: {} already has an Athena result. Skipping feedback generation.", submission.getId());
throw new BadRequestAlertException("Submission already has an Athena result", "submission", "submissionAlreadyHasAthenaResult", true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,9 @@ private ResponseEntity<StudentParticipation> handleExerciseFeedbackRequest(Exerc

// Check submission requirements
if (exercise instanceof TextExercise || exercise instanceof ModelingExercise) {
if (submissionRepository.findAllByParticipationId(participation.getId()).isEmpty()) {
throw new BadRequestAlertException("You need to submit at least once", "participation", "preconditions not met");
boolean hasSubmittedOnce = submissionRepository.findAllByParticipationId(participation.getId()).stream().anyMatch(Submission::isSubmitted);
if (!hasSubmittedOnce) {
throw new BadRequestAlertException("You need to submit at least once", "participation", "noSubmissionExists", true);
}
}
else if (exercise instanceof ProgrammingExercise) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ public ModelingExerciseFeedbackService(Optional<AthenaFeedbackSuggestionsService
*/
public StudentParticipation handleNonGradedFeedbackRequest(StudentParticipation participation, ModelingExercise modelingExercise) {
if (this.athenaFeedbackSuggestionsService.isPresent()) {
this.checkRateLimitOrThrow(participation);
this.checkLatestSubmissionHasAthenaResultOrThrow(participation);
this.athenaFeedbackSuggestionsService.get().checkRateLimitOrThrow(participation);
CompletableFuture.runAsync(() -> this.generateAutomaticNonGradedFeedback(participation, modelingExercise));
}
return participation;
Expand All @@ -90,19 +89,25 @@ public void generateAutomaticNonGradedFeedback(StudentParticipation participatio
.findLatestSubmission();

if (submissionOptional.isEmpty()) {
throw new BadRequestAlertException("No legal submissions found", "submission", "noSubmission");
throw new BadRequestAlertException("No legal submissions found", "submission", "noSubmissionExists");
}

Submission submission = submissionOptional.get();
ModelingSubmission modelingSubmission = (ModelingSubmission) submissionOptional.get();

Result automaticResult = createInitialResult(participation, submission);
this.athenaFeedbackSuggestionsService.orElseThrow().checkLatestSubmissionHasNoAthenaResultOrThrow(modelingSubmission);

if (modelingSubmission.isEmpty()) {
throw new BadRequestAlertException("Submission can not be empty for an AI feedback request", "submission", "noAthenaFeedbackOnEmptySubmission");
}

Result automaticResult = createInitialResult(participation, modelingSubmission);

try {
this.resultWebsocketService.broadcastNewResult(participation, automaticResult);

log.debug("Submission id: {}", submission.getId());
log.debug("Submission id: {}", modelingSubmission.getId());

List<Feedback> feedbacks = getAthenaFeedback(modelingExercise, (ModelingSubmission) submission);
List<Feedback> feedbacks = getAthenaFeedback(modelingExercise, (ModelingSubmission) modelingSubmission);

double totalFeedbackScore = calculateTotalFeedbackScore(feedbacks, modelingExercise);

Expand All @@ -112,7 +117,7 @@ public void generateAutomaticNonGradedFeedback(StudentParticipation participatio

automaticResult = this.resultRepository.save(automaticResult);
resultService.storeFeedbackInResult(automaticResult, feedbacks, true);
submissionService.saveNewResult(submission, automaticResult);
submissionService.saveNewResult(modelingSubmission, automaticResult);
this.resultWebsocketService.broadcastNewResult(participation, automaticResult);
}
catch (Exception e) {
Expand Down Expand Up @@ -190,45 +195,4 @@ private double calculateTotalFeedbackScore(List<Feedback> feedbacks, ModelingExe

return (totalCredits / maxPoints) * 100;
}

/**
* Checks if the number of Athena results for the given participation exceeds
* the allowed threshold and throws an exception if the limit is reached.
*
* @param participation the student participation to check
* @throws BadRequestAlertException if the maximum number of Athena feedback requests is exceeded
*/
private void checkRateLimitOrThrow(StudentParticipation participation) {
List<Result> athenaResults = participation.getResults().stream().filter(result -> result.getAssessmentType() == AssessmentType.AUTOMATIC_ATHENA).toList();

if (athenaResults.size() >= 10) {
throw new BadRequestAlertException("Maximum number of AI feedback requests reached.", "participation", "maxAthenaResultsReached", true);
}
}

/**
* Ensures that the latest submission associated with the participation does not already
* have an Athena-generated result. Throws an exception if Athena result already exists.
*
* @param participation the student participation to validate
* @throws BadRequestAlertException if no legal submissions exist or if an Athena result is already present
*/
private void checkLatestSubmissionHasAthenaResultOrThrow(StudentParticipation participation) {
Optional<Submission> submissionOptional = participationService.findExerciseParticipationWithLatestSubmissionAndResultElseThrow(participation.getId())
.findLatestSubmission();

if (submissionOptional.isEmpty()) {
throw new BadRequestAlertException("No legal submissions found", "submission", "noSubmission");
}

Submission submission = submissionOptional.get();

Result latestResult = submission.getLatestResult();

if (latestResult != null && latestResult.getAssessmentType() == AssessmentType.AUTOMATIC_ATHENA) {
log.debug("Submission ID: {} already has an Athena result. Skipping feedback generation.", submission.getId());
throw new BadRequestAlertException("Submission already has an Athena result", "submission", "submissionAlreadyHasAthenaResult", true);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import de.tum.cit.aet.artemis.assessment.domain.AssessmentType;
import de.tum.cit.aet.artemis.assessment.domain.Feedback;
import de.tum.cit.aet.artemis.assessment.domain.FeedbackType;
import de.tum.cit.aet.artemis.assessment.domain.Result;
import de.tum.cit.aet.artemis.assessment.repository.ResultRepository;
import de.tum.cit.aet.artemis.assessment.service.ResultService;
import de.tum.cit.aet.artemis.athena.service.AthenaFeedbackSuggestionsService;
Expand Down Expand Up @@ -91,7 +90,7 @@ public ProgrammingExerciseCodeReviewFeedbackService(GroupNotificationService gro
public ProgrammingExerciseStudentParticipation handleNonGradedFeedbackRequest(Long exerciseId, ProgrammingExerciseStudentParticipation participation,
ProgrammingExercise programmingExercise) {
if (this.athenaFeedbackSuggestionsService.isPresent()) {
this.checkRateLimitOrThrow(participation);
this.athenaFeedbackSuggestionsService.get().checkRateLimitOrThrow(participation);
CompletableFuture.runAsync(() -> this.generateAutomaticNonGradedFeedback(participation, programmingExercise));
return participation;
}
Expand All @@ -110,7 +109,7 @@ public ProgrammingExerciseStudentParticipation handleNonGradedFeedbackRequest(Lo
* @param programmingExercise the programming exercise object.
*/
public void generateAutomaticNonGradedFeedback(ProgrammingExerciseStudentParticipation participation, ProgrammingExercise programmingExercise) {
log.debug("Using athena to generate feedback request: {}", programmingExercise.getId());
log.debug("Using athena to generate (programming exercise) feedback request: {}", programmingExercise.getId());

// athena takes over the control here
var submissionOptional = programmingExerciseParticipationService.findProgrammingExerciseParticipationWithLatestSubmissionAndResult(participation.getId())
Expand Down Expand Up @@ -222,15 +221,4 @@ private void unlockRepository(ProgrammingExerciseStudentParticipation participat
this.programmingExerciseStudentParticipationRepository.save(participation);
}
}

private void checkRateLimitOrThrow(ProgrammingExerciseStudentParticipation participation) {

List<Result> athenaResults = participation.getResults().stream().filter(result -> result.getAssessmentType() == AssessmentType.AUTOMATIC_ATHENA).toList();

long countOfSuccessfulRequests = athenaResults.stream().filter(result -> result.isSuccessful() == Boolean.TRUE).count();

if (countOfSuccessfulRequests >= this.allowedFeedbackAttempts) {
throw new BadRequestAlertException("Maximum number of AI feedback requests reached.", "participation", "maxAthenaResultsReached", true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,6 @@ public TextExerciseFeedbackService(Optional<AthenaFeedbackSuggestionsService> at
this.textBlockService = textBlockService;
}

private void checkRateLimitOrThrow(StudentParticipation participation) {

List<Result> athenaResults = participation.getResults().stream().filter(result -> result.getAssessmentType() == AssessmentType.AUTOMATIC_ATHENA).toList();

long countOfAthenaResults = athenaResults.size();

if (countOfAthenaResults >= 10) {
throw new BadRequestAlertException("Maximum number of AI feedback requests reached.", "participation", "maxAthenaResultsReached", true);
}
}

/**
* Handles the request for generating feedback for a text exercise.
* Unlike programming exercises a tutor is not notified if Athena is not available.
Expand All @@ -84,7 +73,7 @@ private void checkRateLimitOrThrow(StudentParticipation participation) {
*/
public StudentParticipation handleNonGradedFeedbackRequest(StudentParticipation participation, TextExercise textExercise) {
if (this.athenaFeedbackSuggestionsService.isPresent()) {
this.checkRateLimitOrThrow(participation);
this.athenaFeedbackSuggestionsService.get().checkRateLimitOrThrow(participation);
CompletableFuture.runAsync(() -> this.generateAutomaticNonGradedFeedback(participation, textExercise));
}
return participation;
Expand All @@ -104,10 +93,16 @@ public void generateAutomaticNonGradedFeedback(StudentParticipation participatio
var submissionOptional = participationService.findExerciseParticipationWithLatestSubmissionAndResultElseThrow(participation.getId()).findLatestSubmission();

if (submissionOptional.isEmpty()) {
throw new BadRequestAlertException("No legal submissions found", "submission", "noSubmission");
throw new BadRequestAlertException("No legal submissions found", "submission", "noSubmissionExists");
}
TextSubmission textSubmission = (TextSubmission) submissionOptional.get();

this.athenaFeedbackSuggestionsService.orElseThrow().checkLatestSubmissionHasNoAthenaResultOrThrow(textSubmission);

if (textSubmission.isEmpty()) {
throw new BadRequestAlertException("Submission can not be empty for an AI feedback request", "submission", "noAthenaFeedbackOnEmptySubmission");
}

Result automaticResult = new Result();
automaticResult.setAssessmentType(AssessmentType.AUTOMATIC_ATHENA);
automaticResult.setRated(true);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
@if (!isExamExercise && requestFeedbackEnabled) {
@if (athenaEnabled) {
@if (exercise().type === ExerciseType.TEXT) {
@if (exercise().type === ExerciseType.TEXT || exercise().type === ExerciseType.MODELING) {
<button
class="btn btn-primary"
(click)="requestFeedback()"
Expand Down
3 changes: 2 additions & 1 deletion src/main/webapp/i18n/de/exercise.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@
"maxAthenaResultsReached": "Du hast die maximale Anzahl an KI-Feedbackanfragen erreicht.",
"athenaFeedbackSuccessful": "AI-Feedback erfolgreich generiert. Klicke auf das Ergebnis, um Details zu sehen.",
"athenaFeedbackFailed": "Etwas ist schiefgelaufen... KI-Feedback konnte im Moment nicht generiert werden",
"submissionAlreadyHasAthenaResult": "Für diese Abgabe liegt bereits ein KI-Ergebnis vor. Bitte reiche eine neue Abgabe ein, bevor du erneut einreichst.",
"submissionAlreadyHasAthenaResult": "Für diese Abgabe liegt bereits ein KI-Ergebnis vor. Bitte reiche eine neue Abgabe ein, bevor du erneut KI-Feedback anfragst.",
"noAthenaFeedbackOnEmptySubmission": "Du kannst kein KI-Feedback für eine leere Abgabe anfordern.",
"startError": "<strong>Uh oh! Etwas ist schiefgelaufen... Bitte versuch es in wenigen Minuten noch einmal die Aufgabe zu starten.</strong>",
"name": "Name",
"studentId": "Login",
Expand Down
1 change: 1 addition & 0 deletions src/main/webapp/i18n/en/exercise.json
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@
"athenaFeedbackSuccessful": "AI feedback successfully generated. Click on the result to view details.",
"athenaFeedbackFailed": "Something went wrong... AI feedback could not be generated at the moment",
"submissionAlreadyHasAthenaResult": "This submission already has an existing AI result. Please submit again before requesting further feedback.",
"noAthenaFeedbackOnEmptySubmission": "You cannot request AI feedback on an empty submission.",
"startError": "<strong>Uh oh! Something went wrong... Please try again to start the exercise in a few minutes.</strong>",
"name": "Name",
"studentId": "Login",
Expand Down
Loading