-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WOR-161] Stop trying to clone workspace files if it hasn't succeeded within a day #2644
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
<?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||
<databaseChangeLog logicalFilePath="dummy" xmlns="http://www.liquibase.org/xml/ns/dbchangelog" | ||
xmlns:ext="http://www.liquibase.org/xml/ns/dbchangelog-ext" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog-ext http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-ext.xsd http://www.liquibase.org/xml/ns/dbchangelog http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-3.4.xsd"> | ||
<changeSet logicalFilePath="dummy" author="mtalbott" id="add_timestamps_outcome_CLONE_WORKSPACE_FILE_TRANSFER"> | ||
<addColumn tableName="CLONE_WORKSPACE_FILE_TRANSFER"> | ||
<column name="CREATED" type="DATETIME" defaultValueComputed="CURRENT_TIMESTAMP"> | ||
<constraints nullable="false" /> | ||
</column> | ||
</addColumn> | ||
<addColumn tableName="CLONE_WORKSPACE_FILE_TRANSFER"> | ||
<column name="FINISHED" type="DATETIME"> | ||
<constraints nullable="true" /> | ||
</column> | ||
</addColumn> | ||
<addColumn tableName="CLONE_WORKSPACE_FILE_TRANSFER"> | ||
<column name="OUTCOME" type="VARCHAR(254)"> | ||
<constraints nullable="true" /> | ||
</column> | ||
</addColumn> | ||
</changeSet> | ||
</databaseChangeLog> |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ import org.scalatest.time.{Seconds, Span} | |
import org.scalatest.{BeforeAndAfterAll, OptionValues} | ||
import org.scalatestplus.mockito.MockitoSugar | ||
|
||
import java.sql.Timestamp | ||
import java.util.UUID | ||
import scala.concurrent.duration._ | ||
import scala.concurrent.{ExecutionContext, Future} | ||
|
@@ -213,7 +214,8 @@ class CloneWorkspaceFileTransferMonitorSpec(_system: ActorSystem) | |
|
||
val mockGcsDAO = mock[GoogleServicesDAO](RETURNS_SMART_NULLS) | ||
val failureMessage = "because I feel like it" | ||
val exception = new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).build | ||
val exception = | ||
new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).setMessage(failureMessage).build | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added Before:
After:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps make the exception text more meaningful, like "expected test exception"? |
||
when( | ||
mockGcsDAO.listObjectsWithPrefix(sourceBucketName, copyFilesWithPrefix, Option(destWorkspace.googleProjectId)) | ||
) | ||
|
@@ -302,7 +304,8 @@ class CloneWorkspaceFileTransferMonitorSpec(_system: ActorSystem) | |
|
||
val mockGcsDAO = mock[GoogleServicesDAO](RETURNS_SMART_NULLS) | ||
val failureMessage = "because I feel like it" | ||
val exception = new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).build | ||
val exception = | ||
new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).setMessage(failureMessage).build | ||
when( | ||
mockGcsDAO.listObjectsWithPrefix(sourceBucketName, copyFilesWithPrefix, Option(destWorkspace.googleProjectId)) | ||
) | ||
|
@@ -403,7 +406,8 @@ class CloneWorkspaceFileTransferMonitorSpec(_system: ActorSystem) | |
|
||
val mockGcsDAO = mock[GoogleServicesDAO](RETURNS_SMART_NULLS) | ||
val failureMessage = "because I feel like it" | ||
val exception = new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).build | ||
val exception = | ||
new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).setMessage(failureMessage).build | ||
when( | ||
mockGcsDAO.listObjectsWithPrefix(sourceBucketName, copyFilesWithPrefix, Option(destWorkspace.googleProjectId)) | ||
) | ||
|
@@ -423,7 +427,7 @@ class CloneWorkspaceFileTransferMonitorSpec(_system: ActorSystem) | |
destinationBucketName, | ||
goodObjectToCopy.getName, | ||
Option(destWorkspace.googleProjectId) | ||
) | ||
)(system.dispatchers.defaultGlobalDispatcher) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was causing a NPE since the call wasn't properly mocked. The test still succeeded because the |
||
) | ||
.thenReturn(Future.successful(Option(goodObjectToCopy))) | ||
|
||
|
@@ -546,7 +550,8 @@ class CloneWorkspaceFileTransferMonitorSpec(_system: ActorSystem) | |
|
||
val mockGcsDAO = mock[GoogleServicesDAO](RETURNS_SMART_NULLS) | ||
val failureMessage = "because I feel like it" | ||
val exception = new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).build | ||
val exception = | ||
new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).setMessage(failureMessage).build | ||
when( | ||
mockGcsDAO.listObjectsWithPrefix(sourceBucketName, | ||
copyFilesWithPrefix, | ||
|
@@ -610,4 +615,117 @@ class CloneWorkspaceFileTransferMonitorSpec(_system: ActorSystem) | |
system.stop(actor) | ||
} | ||
} | ||
|
||
it should "eventually stop trying to copy files" in { | ||
withEmptyTestDatabase { dataSource: SlickDataSource => | ||
val billingProject = RawlsBillingProject(defaultBillingProjectName, | ||
CreationStatuses.Ready, | ||
Option(defaultBillingAccountName), | ||
None, | ||
googleProjectNumber = Option(defaultGoogleProjectNumber) | ||
) | ||
val sourceBucketName = "sourceBucket" | ||
val destinationBucketName = "destinationBucket" | ||
val copyFilesWithPrefix = "prefix" | ||
val objectToCopy = new StorageObject().setName("copy-me") | ||
val sourceWorkspace = Workspace( | ||
billingProject.projectName.value, | ||
"source", | ||
UUID.randomUUID().toString, | ||
sourceBucketName, | ||
None, | ||
DateTime.now, | ||
DateTime.now, | ||
"[email protected]", | ||
Map.empty, | ||
false, | ||
WorkspaceVersions.V2, | ||
GoogleProjectId("some-project"), | ||
Option(GoogleProjectNumber("43")), | ||
billingProject.billingAccount, | ||
None, | ||
Option(DateTime.now), | ||
WorkspaceType.RawlsWorkspace, | ||
WorkspaceState.Ready | ||
) | ||
val destWorkspace = Workspace( | ||
billingProject.projectName.value, | ||
"destination", | ||
UUID.randomUUID().toString, | ||
destinationBucketName, | ||
None, | ||
DateTime.now, | ||
DateTime.now, | ||
"[email protected]", | ||
Map.empty, | ||
false, | ||
WorkspaceVersions.V2, | ||
GoogleProjectId("different-project"), | ||
Option(GoogleProjectNumber("44")), | ||
billingProject.billingAccount, | ||
None, | ||
None, | ||
WorkspaceType.RawlsWorkspace, | ||
WorkspaceState.Ready | ||
) | ||
|
||
runAndWait(rawlsBillingProjectQuery.create(billingProject)) | ||
runAndWait(workspaceQuery.createOrUpdate(sourceWorkspace)) | ||
runAndWait(workspaceQuery.createOrUpdate(destWorkspace)) | ||
runAndWait( | ||
cloneWorkspaceFileTransferQuery.save(destWorkspace.workspaceIdAsUUID, | ||
sourceWorkspace.workspaceIdAsUUID, | ||
copyFilesWithPrefix | ||
) | ||
) | ||
|
||
val mockGcsDAO = mock[GoogleServicesDAO](RETURNS_SMART_NULLS) | ||
val failureMessage = "because I feel like it" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, not again!! |
||
val exception = new HttpResponseException.Builder(403, failureMessage, new HttpHeaders()).build | ||
when( | ||
mockGcsDAO.listObjectsWithPrefix(sourceBucketName, copyFilesWithPrefix, Option(destWorkspace.googleProjectId)) | ||
) | ||
.thenReturn(Future.successful(List(objectToCopy))) | ||
when( | ||
mockGcsDAO.copyFile(sourceBucketName, | ||
objectToCopy.getName, | ||
destinationBucketName, | ||
objectToCopy.getName, | ||
Option(destWorkspace.googleProjectId) | ||
)(system.dispatchers.defaultGlobalDispatcher) | ||
) | ||
.thenReturn(Future.failed(exception)) | ||
|
||
val actor = createCloneWorkspaceFileTransferMonitor(dataSource, mockGcsDAO) | ||
import driver.api._ | ||
runAndWait( | ||
cloneWorkspaceFileTransferQuery | ||
.filter(_.destWorkspaceId === destWorkspace.workspaceIdAsUUID) | ||
.map(_.created) | ||
.update(new Timestamp(DateTime.now().minusDays(2).getMillis)) | ||
) | ||
|
||
eventually(timeout = timeout(Span(10, Seconds))) { | ||
runAndWait(workspaceQuery.findById(destWorkspace.workspaceIdAsUUID.toString)) | ||
.getOrElse(fail(s"${destWorkspace.name} not found")) | ||
.completedCloneWorkspaceFileTransfer | ||
.isDefined shouldBe true | ||
runAndWait(cloneWorkspaceFileTransferQuery.listPendingTransfers()) shouldBe empty | ||
|
||
val allWorkspaceTransfers = runAndWait( | ||
cloneWorkspaceFileTransferQuery | ||
.filter(_.destWorkspaceId === destWorkspace.workspaceIdAsUUID) | ||
.map(r => (r.finished, r.outcome)) | ||
.result | ||
) | ||
allWorkspaceTransfers should have size 1 | ||
|
||
val transferResult = allWorkspaceTransfers.head | ||
transferResult._1 shouldBe defined | ||
transferResult._2 shouldBe Some("Failure") | ||
} | ||
|
||
system.stop(actor) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So
copyBucketFiles
doesn't complete until the transfer is complete? But somehow we get to the point where we can kill off a transfer that is taking too long….There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This addresses cases where a source workspace's files can't be copied to a destination workspace for longer than a day due to persistent errors. When that happens, the monitor currently tries to clone the files nonstop and fills up our logs unnecessarily.
It doesn't attempt to protect against long running copy operations while transferring large buckets. While it would be nice to have those protections, the solution is likely to switch to STS which would be more involved.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, so
copyBucketFIles
would throw an exception at some point, and then the monitor kicks off this code again?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that's correct. And if
copyBucketFiles
continues to throw exceptions for over a day for a given workspace, this will make it so the monitor stops trying to transfer the files for that workspaceThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the idea is to stop retries, not to interrupt an in-progress operation?
And then if it fails, this entire function will return a failed future (it won't even get to
markTransferAsComplete
).