Skip to content

Commit

Permalink
Merge pull request #9 from data-intuitive/develop
Browse files Browse the repository at this point in the history
Merge recent developments
  • Loading branch information
tverbeiren authored Feb 28, 2022
2 parents 6778eab + f640465 commit b18c632
Show file tree
Hide file tree
Showing 51 changed files with 785 additions and 2,171 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/sbt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Scala CI and Publish

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]

jobs:
build:
runs-on: ${{ matrix.os }}
if: "!contains(github.event.head_commit.message, 'ci skip')"
strategy:
matrix:
os: [ ubuntu-latest ]

steps:
- uses: actions/checkout@v2
- name: Set up Scala
uses: olafurpg/setup-scala@v10
- name: Run tests on Ubuntu
if: startsWith(matrix.os, 'ubuntu')
run: sbt 'testOnly -- -l com.dataintuitive.tags.IOtag'
publish:
name: Publish package
needs: [build]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Scala
uses: olafurpg/setup-scala@v10
- name: Upload
run: sbt aetherDeploy
env:
GITHUB_TOKEN: ${{ secrets.PACKAGE_SECRET }}
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
# Introduction

This project provides the API for [LuciusWeb](https://github.com/data-intuitive/LuciusWeb) to talk to. The API is a [Spark Jobserver](https://github.com/spark-jobserver/spark-jobserver) project. It needs to be compiled and the resulting `jar` has to be uploaded to the Spark-Jobserver.
This project provides the API for [LuciusWeb](https://github.com/data-intuitive/LuciusWeb). The API is a [Spark Jobserver](https://github.com/spark-jobserver/spark-jobserver) project. It needs to be compiled and the resulting `jar` has to be uploaded to the Spark-Jobserver.

There's still a lot of work to be done on this (version numbers don't reflect everything).

__Please note that version 2.1.0 and onwards is for Spark 2.2.1.__
# Dependencies

| LuciusAPI | LuciusCore | Spark Jobserver | Spark |
|-----------|------------|-----------------|-------|
| 5.0.0 | 4.0.10 | 0.11.1 | 2.4.7 |
| 5.0.1 | 4.0.11 | 0.11.1 | 2.4.7 |

# API
# API Documentation

The documentation of the (__old version__) of the API is available in [postman](https://www.getpostman.com/) and [can be found here](https://www.getpostman.com/collections/cf537f6cae9b82c35034).

# Data

Public data is not available yet.
Public data is not available (yet).

# Local Deployment

Expand Down
35 changes: 20 additions & 15 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,31 +1,39 @@
name := "LuciusAPI"

version := "3.3.6"
import aether.AetherKeys._

scalaVersion := "2.11.12"

resolvers += "Job Server Bintray" at "https://dl.bintray.com/spark-jobserver/maven"
version in ThisBuild := "5.0.1"

resolvers += "Local Ivy" at "file://Users/toni/.ivy2/local"
scalaVersion := "2.11.12"

resolvers += Resolver.githubPackages("data-intuitive")
resolvers += "Artifactory" at "https://sparkjobserver.jfrog.io/artifactory/jobserver/"

libraryDependencies ++= Seq(
"com.data-intuitive" %% "luciuscore" % "3.3.4",
"spark.jobserver" %% "job-server-api" % "0.8.0" % "provided",
"spark.jobserver" %% "job-server-extras" % "0.8.0" % "provided",
"com.data-intuitive" %% "luciuscore" % "4.0.11",
"spark.jobserver" %% "job-server-api" % "0.11.1" % "provided",
"spark.jobserver" %% "job-server-extras" % "0.11.1" % "provided",
"org.scalactic" %% "scalactic" % "3.0.7" % "test" ,
"org.scalatest" %% "scalatest" % "3.0.7" % "test" ,
"org.apache.spark" %% "spark-core" % "2.3.1" % "provided",
"org.apache.spark" %% "spark-sql" % "2.3.1" % "provided"
/* "org.scalaz" %% "scalaz-core" % "7.2.0" */
"org.apache.spark" %% "spark-core" % "2.4.7" % "provided",
"org.apache.spark" %% "spark-sql" % "2.4.7" % "provided"
)

test in assembly := {}

organization := "com.data-intuitive"
licenses += ("Apache-2.0", url("https://www.apache.org/licenses/LICENSE-2.0.html"))

// publish to github packages
publishTo := Some("GitHub data-intuitive Apache Maven Packages" at "https://maven.pkg.github.com/data-intuitive/luciusapi")
publishMavenStyle := true
credentials += Credentials(
"GitHub Package Registry",
"maven.pkg.github.com",
"tverbeiren",
System.getenv("GITHUB_TOKEN")
)

// Publish assembly jar as well
artifact in (Compile, assembly) := {
val art = (artifact in (Compile, assembly)).value
Expand All @@ -34,7 +42,4 @@ artifact in (Compile, assembly) := {

addArtifact(artifact in (Compile, assembly), assembly)

githubOwner := "data-intuitive"
githubRepository := "luciusapi"
githubTokenSource := TokenSource.GitConfig("github.token")
publishMavenStyle := true
aetherPackageMain := assembly.value
7 changes: 7 additions & 0 deletions config/example.conf
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,17 @@
geneFeatures {
probesetID = probesetid,
dataType = dataType,
dataType2 = dataType2,
ENTREZID = entrezid,
ENSEMBL = ensemblid,
SYMBOL = symbol,
GENENAME = name,
GENEFAMILY = geneFamily
}
geneDataType {
"1-1" = "L1000"
"0-1" = "BING"
"0-0" = "AIG"
"1-0" = "INVALID"
}
}
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.2.8
sbt.version=1.3.10
4 changes: 2 additions & 2 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
logLevel := Level.Warn

addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.4.2")
/* addSbtPlugin("org.foundweekends" % "sbt-bintray" % "0.5.4") */
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.11")
addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.4.0")
addSbtPlugin("org.ensime" % "sbt-ensime" % "2.5.1")
addSbtPlugin("no.arktekk.sbt" % "aether-deploy" % "0.26.0")
addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.2")
4 changes: 4 additions & 0 deletions project/project/metals.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// DO NOT EDIT! This file is auto-generated.
// This file enables sbt-bloop to create bloop config files.

addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.4.8")
4 changes: 4 additions & 0 deletions project/project/project/metals.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// DO NOT EDIT! This file is auto-generated.
// This file enables sbt-bloop to create bloop config files.

addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.4.8")
106 changes: 64 additions & 42 deletions src/main/scala/com/dataintuitive/luciusapi/Common.scala
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package com.dataintuitive.luciusapi

// LuciusCore
import com.dataintuitive.luciuscore.Model.DbRow
import com.dataintuitive.luciuscore.genes._
import com.dataintuitive.luciuscore._
import model.v4._
import genes._
import api._

// Jobserver
import spark.jobserver.api.{JobEnvironment, SingleProblem, ValidationProblem}
Expand All @@ -23,8 +25,6 @@ import org.apache.spark.broadcast.Broadcast
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import com.dataintuitive.luciusapi.Model.FlatDbRow

import com.dataintuitive.jobserver.NamedDataSet
import com.dataintuitive.jobserver.DataSetPersister

Expand All @@ -40,8 +40,6 @@ object Common extends Serializable {
new BroadcastPersister[U]
implicit def DataSetPersister[T]: NamedObjectPersister[NamedDataSet[T]] = new DataSetPersister[T]

case class CachedData(db: Dataset[DbRow], flatDb: Dataset[FlatDbRow], genesDB: GenesDB)

object ParamHandlers {

def paramSignature(config: Config): List[String] Or One[ValidationProblem] = {
Expand Down Expand Up @@ -115,7 +113,12 @@ object Common extends Serializable {
}

def validHeadTail(config: Config): Boolean Or One[ValidationProblem] = {
if (optParamHead(config) > 0 || optParamTail(config) > 0) Good(true)
// we only want either head or tail but not both, so 'exclusive or' needed instead of 'or', so use '!=" instead of '||'
// (false, false) => false
// (true, false) => true
// (false, true) => true
// (true, true) => false
if (optParamHead(config) > 0 != optParamTail(config) > 0) Good(true)
else Bad(One(SingleProblem("Either head or tail count needs to be provided")))
}

Expand Down Expand Up @@ -156,8 +159,8 @@ object Common extends Serializable {
}.toOption
.getOrElse(Seq())

def validVersion(config: Config): Boolean Or One[ValidationProblem] = {
if (VERSIONS contains optParamVersion(config)) Good(true)
def validVersion(config: Config): String Or One[ValidationProblem] = {
if (VERSIONS contains optParamVersion(config)) Good(optParamVersion(config))
else Bad(One(SingleProblem("Not a valid version identifier")))
}

Expand All @@ -169,13 +172,21 @@ object Common extends Serializable {
Try(config.getString("limit").toInt).getOrElse(default)
}

def optParamLike(config: Config, default: List[String] = Nil): List[String] = {
Try(config.getString("like").split(" ").toList).getOrElse(default)
}

def optParamTrtType(config: Config, default: List[String] = Nil): List[String] = {
Try(config.getString("trtType").split(" ").toList).getOrElse(default)
}

def optParamFeatures(config: Config, default: List[String] = List(".*")): List[String] = {
Try(config.getString("features").toString.split(" ").toList).getOrElse(default)
}

def getDB(runtime: JobEnvironment): Dataset[DbRow] Or One[ValidationProblem] = {
def getDB(runtime: JobEnvironment): Dataset[Perturbation] Or One[ValidationProblem] = {
Try {
val NamedDataSet(db, _, _) = runtime.namedObjects.get[NamedDataSet[DbRow]]("db").get
val NamedDataSet(db, _, _) = runtime.namedObjects.get[NamedDataSet[Perturbation]]("db").get
db
}.map(db => Good(db))
.getOrElse(Bad(One(SingleProblem("Cached DB not available"))))
Expand All @@ -197,12 +208,44 @@ object Common extends Serializable {
.getOrElse(Bad(One(SingleProblem("Broadcast genes not available"))))
}

def getFilters(runtime: JobEnvironment): Filters.FiltersDB Or One[ValidationProblem] = {
Try {
val NamedBroadcast(filters) = runtime.namedObjects.get[NamedBroadcast[Filters.FiltersDB]]("filters").get
filters.value
}.map(filters => Good(filters))
.getOrElse(Bad(One(SingleProblem("Broadcast filters not available"))))
}

def paramDb(config: Config): String Or One[ValidationProblem] = {
Try(config.getString("db.uri"))
.map(db => Good(db))
.getOrElse(Bad(One(SingleProblem("DB config parameter not provided"))))
}

def paramDbs(config: Config): List[String] Or One[ValidationProblem] = {
Try(config.getStringList("db.uris").asScala.toList)
.map(dbs => Good(dbs))
.getOrElse(Bad(One(SingleProblem("DB config parameter not provided"))))
}

/**
* Checks config for either db.uri or db.uris.
* This allows for both using the older format db.uri as single string
* or the newer format db.uris which allows a list of strings.
* By supporting the old format we prevent old config files from breaking.
*/
def paramDbOrDbs(config: Config): List[String] Or One[ValidationProblem] = {
val singleDb = paramDb(config)
val multipleDbs = paramDbs(config)

(singleDb.isGood, multipleDbs.isGood) match {
case (false, false) => Bad(One(SingleProblem("DB config parameter not provided")))
case (true, true) => Bad(One(SingleProblem("Only one declaration of db.uri or db.uris is allowed")))
case (true, false) => singleDb.map(List(_))
case (false, true) => multipleDbs
}
}

def paramGenes(config: Config): String Or One[ValidationProblem] = {
Try(config.getString("geneAnnotations"))
.map(ga => Good(ga))
Expand Down Expand Up @@ -252,37 +295,16 @@ object Common extends Serializable {
.getOrElse(defaultDict)
}

}

object Variables {

// Calculated
val ZHANG = Set("zhang", "similarity", "Zhang", "Similarity")

// Sample
val ID = Set("id", "pwid")
val BATCH = Set("batch", "Batch")
val PLATEID = Set("plateid", "PlateId")
val WELL = Set("well", "Well")
val PROTOCOLNAME = Set("protocolname", "cellline", "CellLine", "ProtocolName", "protocol", "Protocol")
val CONCENTRATION = Set("concentration", "Concentration")
val YEAR = Set("year", "Year")
val TIME = Set("time", "Time")

// Compound
val COMPOUND_ID = Set("jnjs", "Jnjs", "cid", "pid", "compound_id")
val JNJB = Set("jnjb", "Jnjb")
val COMPOUND_SMILES = Set("Smiles", "smiles", "SMILES", "compound_smiles")
val COMPOUND_INCHIKEY = Set("inchikey", "Inchikey", "compound_inchikey")
val COMPOUND_NAME = Set("compoundname", "CompoundName", "Compoundname", "name", "Name", "compound_name")
val COMPOUND_TYPE = Set("Type", "type", "compound_type")
val COMPOUND_TARGETS = Set("targets", "knownTargets", "Targets", "compound_targets")

// Filters
val FILTERS = Set("filters", "Filters", "filter", "filters")

// Derived
val SIGNIFICANTGENES = Set("significantGenes")
/**
* geneDataType contains a mapping between the read dataType and how it should be
* returned from the code.
* This is especially useful when two dataTypes are read in together to be concatenated,
*/
def paramGeneDataTypes(config: Config):Map[String, String] = {
Try(config.getObject("geneDataType")).toOption
.map(_.unwrapped.asScala.toMap.map{case (k,v) => (k.toString, v.toString)})
.getOrElse(Map.empty)
}

}

Expand Down
7 changes: 0 additions & 7 deletions src/main/scala/com/dataintuitive/luciusapi/Model.scala

This file was deleted.

Loading

0 comments on commit b18c632

Please sign in to comment.