Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make pipeline version per Organism #3534

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
854f78e
Add stubs and TODOs
fhennig Jan 16, 2025
f02aaba
WIP (things start up fine it seems; but I think version isn't upgrade…
fhennig Jan 23, 2025
c300fb5
make more changes
fhennig Jan 23, 2025
dfaab96
Update schema documentation based on migration changes
actions-user Jan 23, 2025
68381b4
format
fhennig Jan 23, 2025
c1c4781
minor changes
fhennig Jan 23, 2025
47e3cab
Add initialization of the table for all organisms
fhennig Jan 23, 2025
2c72e9f
Update schema documentation based on migration changes
actions-user Jan 23, 2025
7d8f169
version cannot be null anymore after previous change
fhennig Jan 23, 2025
a8e2760
progress in fixing tests
fhennig Jan 23, 2025
040ed92
Update schema documentation based on migration changes
actions-user Jan 23, 2025
b11d3ad
remove todos
fhennig Jan 23, 2025
8931fc2
21
fhennig Jan 23, 2025
f91e69c
format
fhennig Jan 23, 2025
a3ee70f
Fixed some tests
fhennig Jan 28, 2025
1d0eb88
Update schema documentation based on migration changes
actions-user Jan 28, 2025
2e8b6e1
Move table init to flyway init
fhennig Jan 28, 2025
559b9f0
Fixed one test
fhennig Jan 28, 2025
71433e3
make mock bean primary -- one failing test remaining
fhennig Jan 28, 2025
18068ef
fixed last test
fhennig Jan 28, 2025
ea83337
adress review
fhennig Jan 28, 2025
eb098a9
Add some docs
fhennig Jan 28, 2025
103208d
Some logic changes (still needs testing)
fhennig Jan 28, 2025
4317511
Update schema documentation based on migration changes
actions-user Jan 28, 2025
2fde44c
docs
fhennig Jan 28, 2025
5679923
docs
fhennig Jan 28, 2025
7f51c8f
foo
fhennig Jan 28, 2025
f0ea769
Add test
fhennig Jan 28, 2025
85fc28d
Add another test
fhennig Jan 28, 2025
02f864f
Add some documentation
fhennig Jan 29, 2025
809720f
Update values.yaml to test non-identical prepro versions
corneliusroemer Jan 30, 2025
a49ef1a
Versions must be integers
corneliusroemer Jan 30, 2025
d81848a
improve documentation
fhennig Jan 31, 2025
aba6c61
clarify
fhennig Jan 31, 2025
a88aae1
Simplify table definitions
fhennig Jan 31, 2025
20eece8
Make test description more descriptive and override linter error for …
fhennig Jan 31, 2025
e8638ba
Add missing and
fhennig Jan 31, 2025
5fd4c34
fix typo
fhennig Jan 31, 2025
e6d12c1
Update schema documentation based on migration changes
actions-user Jan 31, 2025
9981b9f
left join -> join
fhennig Jan 31, 2025
22805d0
Update schema documentation based on migration changes
actions-user Jan 31, 2025
4922514
maybe fix?
fhennig Feb 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 42 additions & 40 deletions backend/docs/db/schema.sql
chaoran-chen marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ ALTER SEQUENCE public.audit_log_id_seq OWNED BY public.audit_log.id;

CREATE TABLE public.current_processing_pipeline (
version bigint NOT NULL,
started_using_at timestamp without time zone NOT NULL
started_using_at timestamp without time zone NOT NULL,
organism text NOT NULL
);


Expand All @@ -192,6 +193,28 @@ CREATE TABLE public.data_use_terms_table (

ALTER TABLE public.data_use_terms_table OWNER TO postgres;

--
-- Name: sequence_entries; Type: TABLE; Schema: public; Owner: postgres
--

CREATE TABLE public.sequence_entries (
accession text NOT NULL,
version bigint NOT NULL,
organism text NOT NULL,
submission_id text NOT NULL,
submitter text NOT NULL,
approver text,
group_id integer NOT NULL,
submitted_at timestamp without time zone NOT NULL,
released_at timestamp without time zone,
is_revocation boolean DEFAULT false NOT NULL,
original_data jsonb,
version_comment text
);


ALTER TABLE public.sequence_entries OWNER TO postgres;

--
-- Name: sequence_entries_preprocessed_data; Type: TABLE; Schema: public; Owner: postgres
--
Expand Down Expand Up @@ -223,18 +246,18 @@ CREATE VIEW public.external_metadata_view AS
WHEN (all_external_metadata.external_metadata IS NULL) THEN jsonb_build_object('metadata', (cpd.processed_data -> 'metadata'::text))
ELSE jsonb_build_object('metadata', ((cpd.processed_data -> 'metadata'::text) || all_external_metadata.external_metadata))
END AS joint_metadata
FROM (( SELECT sequence_entries_preprocessed_data.accession,
sequence_entries_preprocessed_data.version,
sequence_entries_preprocessed_data.pipeline_version,
sequence_entries_preprocessed_data.processed_data,
sequence_entries_preprocessed_data.errors,
sequence_entries_preprocessed_data.warnings,
sequence_entries_preprocessed_data.processing_status,
sequence_entries_preprocessed_data.started_processing_at,
sequence_entries_preprocessed_data.finished_processing_at
FROM public.sequence_entries_preprocessed_data
WHERE (sequence_entries_preprocessed_data.pipeline_version = ( SELECT current_processing_pipeline.version
FROM public.current_processing_pipeline))) cpd
FROM (( SELECT sepd.accession,
sepd.version,
sepd.pipeline_version,
sepd.processed_data,
sepd.errors,
sepd.warnings,
sepd.processing_status,
sepd.started_processing_at,
sepd.finished_processing_at
FROM ((public.sequence_entries_preprocessed_data sepd
JOIN public.sequence_entries se ON (((sepd.accession = se.accession) AND (sepd.version = se.version))))
JOIN public.current_processing_pipeline cpp ON (((se.organism = cpp.organism) AND (sepd.pipeline_version = cpp.version))))) cpd
LEFT JOIN public.all_external_metadata ON (((all_external_metadata.accession = cpd.accession) AND (all_external_metadata.version = cpd.version))));


Expand Down Expand Up @@ -421,28 +444,6 @@ CREATE TABLE public.seqsets (

ALTER TABLE public.seqsets OWNER TO postgres;

--
-- Name: sequence_entries; Type: TABLE; Schema: public; Owner: postgres
--

CREATE TABLE public.sequence_entries (
accession text NOT NULL,
version bigint NOT NULL,
organism text NOT NULL,
submission_id text NOT NULL,
submitter text NOT NULL,
approver text,
group_id integer NOT NULL,
submitted_at timestamp without time zone NOT NULL,
released_at timestamp without time zone,
is_revocation boolean DEFAULT false NOT NULL,
original_data jsonb,
version_comment text
);


ALTER TABLE public.sequence_entries OWNER TO postgres;

--
-- Name: sequence_entries_view; Type: VIEW; Schema: public; Owner: postgres
--
Expand All @@ -466,7 +467,8 @@ CREATE VIEW public.sequence_entries_view AS
(sepd.processed_data || em.joint_metadata) AS joint_metadata,
CASE
WHEN se.is_revocation THEN ( SELECT current_processing_pipeline.version
FROM public.current_processing_pipeline)
FROM public.current_processing_pipeline
fhennig marked this conversation as resolved.
Show resolved Hide resolved
WHERE (current_processing_pipeline.organism = se.organism))
ELSE sepd.pipeline_version
END AS pipeline_version,
sepd.errors,
Expand All @@ -484,9 +486,9 @@ CREATE VIEW public.sequence_entries_view AS
WHEN ((sepd.warnings IS NOT NULL) AND (jsonb_array_length(sepd.warnings) > 0)) THEN 'HAS_WARNINGS'::text
ELSE 'NO_ISSUES'::text
END AS processing_result
FROM ((public.sequence_entries se
LEFT JOIN public.sequence_entries_preprocessed_data sepd ON (((se.accession = sepd.accession) AND (se.version = sepd.version) AND (sepd.pipeline_version = ( SELECT current_processing_pipeline.version
FROM public.current_processing_pipeline)))))
FROM (((public.sequence_entries se
LEFT JOIN public.sequence_entries_preprocessed_data sepd ON (((se.accession = sepd.accession) AND (se.version = sepd.version))))
LEFT JOIN public.current_processing_pipeline ccp ON (((se.organism = ccp.organism) AND (sepd.pipeline_version = ccp.version))))
LEFT JOIN public.external_metadata_view em ON (((se.accession = em.accession) AND (se.version = em.version))));


Expand Down Expand Up @@ -601,7 +603,7 @@ ALTER TABLE ONLY public.audit_log
--

ALTER TABLE ONLY public.current_processing_pipeline
ADD CONSTRAINT current_processing_pipeline_pkey PRIMARY KEY (version);
ADD CONSTRAINT current_processing_pipeline_pkey PRIMARY KEY (organism);


--
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@ import org.flywaydb.core.Flyway
import org.jetbrains.exposed.spring.autoconfigure.ExposedAutoConfiguration
import org.jetbrains.exposed.sql.DatabaseConfig
import org.jetbrains.exposed.sql.Slf4jSqlDebugLogger
import org.jetbrains.exposed.sql.transactions.transaction
import org.loculus.backend.controller.LoculusCustomHeaders
import org.loculus.backend.log.REQUEST_ID_HEADER_DESCRIPTION
import org.loculus.backend.service.submission.CurrentProcessingPipelineTable
import org.loculus.backend.utils.DateProvider
import org.springdoc.core.customizers.OperationCustomizer
import org.springframework.beans.factory.annotation.Value
import org.springframework.boot.autoconfigure.ImportAutoConfiguration
Expand Down Expand Up @@ -66,13 +69,25 @@ class BackendSpringConfig {

@Bean
@Profile("!test")
fun getFlyway(dataSource: DataSource): Flyway {
fun getFlyway(dataSource: DataSource, backendConfig: BackendConfig, dateProvider: DateProvider): Flyway {
val configuration = Flyway.configure()
.baselineOnMigrate(true)
.dataSource(dataSource)
.validateMigrationNaming(true)
val flyway = Flyway(configuration)
flyway.migrate()

// Since migration V1.10 we need to initialize the CurrentProcessingPipelineTable
// in code, because the configured organisms are not known in the SQL table definitions.
logger.info("Initializing CurrentProcessingPipelineTable")
transaction {
val insertedRows = CurrentProcessingPipelineTable.setV1ForOrganismsIfNotExist(
backendConfig.organisms.keys,
dateProvider.getCurrentDateTime(),
)
logger.info("$insertedRows inserted.")
}

return flyway
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ open class SubmissionController(
@RequestParam pipelineVersion: Long,
@RequestHeader(value = HttpHeaders.IF_NONE_MATCH, required = false) ifNoneMatch: String?,
): ResponseEntity<StreamingResponseBody> {
val currentProcessingPipelineVersion = submissionDatabaseService.getCurrentProcessingPipelineVersion()
val currentProcessingPipelineVersion = submissionDatabaseService.getCurrentProcessingPipelineVersion(organism)
if (pipelineVersion < currentProcessingPipelineVersion) {
throw UnprocessableEntityException(
"The processing pipeline version $pipelineVersion is not accepted " +
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.loculus.backend.service.debug

import org.jetbrains.exposed.sql.deleteAll
import org.jetbrains.exposed.sql.insert
import org.loculus.backend.config.BackendConfig
import org.loculus.backend.service.datauseterms.DataUseTermsTable
import org.loculus.backend.service.submission.CurrentProcessingPipelineTable
import org.loculus.backend.service.submission.MetadataUploadAuxTable
Expand All @@ -13,7 +13,7 @@ import org.springframework.stereotype.Component
import org.springframework.transaction.annotation.Transactional

@Component
class DeleteSequenceDataService(private val dateProvider: DateProvider) {
class DeleteSequenceDataService(private val dateProvider: DateProvider, private val config: BackendConfig) {
@Transactional
fun deleteAllSequenceData() {
SequenceEntriesTable.deleteAll()
Expand All @@ -22,9 +22,9 @@ class DeleteSequenceDataService(private val dateProvider: DateProvider) {
SequenceUploadAuxTable.deleteAll()
DataUseTermsTable.deleteAll()
CurrentProcessingPipelineTable.deleteAll()
CurrentProcessingPipelineTable.insert {
it[versionColumn] = 1
it[startedUsingAtColumn] = dateProvider.getCurrentDateTime()
}
CurrentProcessingPipelineTable.setV1ForOrganismsIfNotExist(
config.organisms.keys,
dateProvider.getCurrentDateTime(),
)
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,53 @@
package org.loculus.backend.service.submission

import kotlinx.datetime.LocalDateTime
import org.jetbrains.exposed.sql.Table
import org.jetbrains.exposed.sql.andWhere
import org.jetbrains.exposed.sql.batchInsert
import org.jetbrains.exposed.sql.kotlin.datetime.datetime
import org.jetbrains.exposed.sql.selectAll
import org.jetbrains.exposed.sql.update

const val CURRENT_PROCESSING_PIPELINE_TABLE_NAME = "current_processing_pipeline"

object CurrentProcessingPipelineTable : Table(CURRENT_PROCESSING_PIPELINE_TABLE_NAME) {
val organismColumn = varchar("organism", 255)
fhennig marked this conversation as resolved.
Show resolved Hide resolved
val versionColumn = long("version")
val startedUsingAtColumn = datetime("started_using_at")

/**
* Every organism needs to have a current pipeline version in the CurrentProcessingPipelineTable.
* This function sets V1 for all given organisms, if no version is defined yet.
*/
fun setV1ForOrganismsIfNotExist(organisms: Collection<String>, now: LocalDateTime) =
CurrentProcessingPipelineTable.batchInsert(organisms, ignore = true) { organism ->
this[organismColumn] = organism
this[versionColumn] = 1
fhennig marked this conversation as resolved.
Show resolved Hide resolved
this[startedUsingAtColumn] = now
}

/**
* Given a version that was found that is potentially newer than the current once, check if the currently stored
* 'current' pipeline version for this organism is less than the one that was found?
* If so, the pipeline needs to 'update' i.e. reprocess older entries.
*/
fun pipelineNeedsUpdate(maybeNewerVersion: Long, organism: String) = CurrentProcessingPipelineTable
.selectAll()
.where { versionColumn less maybeNewerVersion }
.andWhere { organismColumn eq organism }
.empty()
.not()

/**
* Set the pipeline version for the given organism to newVersion.
*/
fun updatePipelineVersion(organism: String, newVersion: Long, startedUsingAt: LocalDateTime) =
CurrentProcessingPipelineTable.update(
where = {
organismColumn eq organism
},
) {
it[versionColumn] = newVersion
it[startedUsingAtColumn] = startedUsingAt
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ object SequenceEntriesTable : Table(SEQUENCE_ENTRIES_TABLE_NAME) {
)
}

fun distinctOrganisms() = SequenceEntriesTable
.select(SequenceEntriesTable.organismColumn)
.withDistinct()
fhennig marked this conversation as resolved.
Show resolved Hide resolved
.asSequence()
.map {
it[SequenceEntriesTable.organismColumn]
fhennig marked this conversation as resolved.
Show resolved Hide resolved
}

fun accessionVersionIsIn(accessionVersions: List<AccessionVersionInterface>) =
Pair(accessionColumn, versionColumn) inList accessionVersions.toPairs()

Expand Down
Loading
Loading