Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

log detail errors from remote #58

Merged
merged 3 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ class NomadService implements Closeable{
JobRegisterResponse jobRegisterResponse = jobsApi.registerJob(jobRegisterRequest, config.jobOpts().region, config.jobOpts().namespace, null, null)
jobRegisterResponse.evalID
} catch (Throwable e) {
log.error("[NOMAD] Failed to submit ${job.name} -- Cause: ${e.message ?: e}", e)
throw new ProcessSubmitException("[NOMAD] Failed to submit ${job.name} -- Cause: ${e.message ?: e}", e)
}

Expand Down Expand Up @@ -267,27 +268,42 @@ class NomadService implements Closeable{
}

String getJobState(String jobId){
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub last = allocations?.sort{
it.modifyIndex
}?.last()
String currentState = last?.taskStates?.values()?.last()?.state
log.debug "Task $jobId , state=$currentState"
currentState ?: "Unknown"
try {
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub last = allocations?.sort {
it.modifyIndex
}?.last()
String currentState = last?.taskStates?.values()?.last()?.state
log.debug "Task $jobId , state=$currentState"
currentState ?: "Unknown"
}catch(Exception e){
log.error("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e)
"dead"
}
}



boolean checkIfRunning(String jobId){
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status"
job.status == "running"
try {
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfRunning jobID=$job.ID; status=$job.status"
job.status == "running"
}catch (Exception e){
log.error("[NOMAD] Failed to get jobState ${jobId} -- Cause: ${e.message ?: e}", e)
false
}
}

boolean checkIfDead(String jobId){
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfDead jobID=$job.ID; status=$job.status"
job.status == "dead"
try{
Job job = jobsApi.getJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null)
log.debug "[NOMAD] checkIfDead jobID=$job.ID; status=$job.status"
job.status == "dead"
}catch (Exception e){
log.error("[NOMAD] Failed to get job ${jobId} -- Cause: ${e.message ?: e}", e)
true
}
}

void kill(String jobId) {
Expand All @@ -300,12 +316,21 @@ class NomadService implements Closeable{

protected void purgeJob(String jobId, boolean purge){
log.debug "[NOMAD] purgeJob with jobId=${jobId}"
jobsApi.deleteJob(jobId,config.jobOpts().region, config.jobOpts().namespace,null,null,purge, true)
try {
jobsApi.deleteJob(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, purge, true)
}catch(Exception e){
log.error("[NOMAD] Failed to delete job ${jobId} -- Cause: ${e.message ?: e}", e)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jagedn , do you think that within an exception block, we should use only log.debug or this should be decided as per the context?

I think error makes sense for session terminating steps and not for the retries - what do you think?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good question

yes, you're right, probably we can use only debug level in retries and if the user want to have more details can change the level

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Jorge, I've refactored accordingly 👍

}
}

String getClientOfJob(String jobId) {
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub jobAllocation = allocations.first()
return jobAllocation.nodeName
try{
List<AllocationListStub> allocations = jobsApi.getJobAllocations(jobId, config.jobOpts().region, config.jobOpts().namespace, null, null, null, null, null, null, null, null)
AllocationListStub jobAllocation = allocations.first()
return jobAllocation.nodeName
}catch (Exception e){
log.error("[NOMAD] Failed to get job allocations ${jobId} -- Cause: ${e.message ?: e}", e)
throw new ProcessSubmitException("[NOMAD] Failed to get alloactions ${jobId} -- Cause: ${e.message ?: e}", e)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,6 @@ class MockExecutor extends NomadExecutor {
@Override
void signal() { }

protected TaskMonitor createTaskMonitor2() {
new MockMonitor()
}

}

class MockMonitor implements TaskMonitor {
Expand Down
47 changes: 47 additions & 0 deletions plugins/nf-nomad/src/test/nextflow/nomad/NomadDSLSpec.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.pf4j.PluginDescriptorFinder
import spock.lang.Shared
import spock.lang.Timeout
import test.Dsl2Spec
import test.OutputCapture

import java.nio.file.Files
import java.nio.file.Path
Expand Down Expand Up @@ -157,4 +158,50 @@ class NomadDSLSpec extends Dsl2Spec{
// submitted
// summary
}

@org.junit.Rule
OutputCapture capture = new OutputCapture()

def 'should catch a remote exception' () {
given:
mockWebServer.dispatcher = new Dispatcher() {
@Override
MockResponse dispatch(@NotNull RecordedRequest recordedRequest) throws InterruptedException {
new MockResponse().setResponseCode(500).setBody("Dummy exception")
}
}

when:
def SCRIPT = '''
process sayHello{
container 'ubuntu:22.0.4'
input:
val x
output:
stdout
script:
"""
echo '$x world!\'
"""
}
workflow {
channel.of('hi!') | sayHello | view
}
'''
and:
def result = new MockScriptRunner([
process:[executor:'nomad'],
nomad:
[
client:
[
address : "http://${mockWebServer.hostName}:${mockWebServer.port}"
]
]
]).setScript(SCRIPT).execute()

then:
thrown(AbortRunException) //it fails because no real task is executed
capture.toString().indexOf("io.nomadproject.client.ApiException: Server Error") != -1
}
}
Loading