diff --git a/_data/CONTRIBUTORS.yaml b/_data/CONTRIBUTORS.yaml index 8535c10a7..056b9bb9a 100755 --- a/_data/CONTRIBUTORS.yaml +++ b/_data/CONTRIBUTORS.yaml @@ -575,7 +575,12 @@ Styliani-Christina Fragkouli: git: sfragkoul orcid: 0000-0003-4067-7123 email: sfragkoul@certh.gr - affiliation: Institute of Applied Biosciences(INAB|CERTH) / University of Athens / ELIXIR-GR + affiliation: Institute of Applied Biosciences(INAB|CERTH) / University of Athens / ELIXIR-GR +Sveinung Gundersen: + git: sveinugu + orcid: 0000-0001-9888-7954 + email: sveinugu@ifi.uio.no + affiliation: ELIXIR Norway Diana Pilvar: git: diana-pilvar email: diana.pilvar@ut.ee @@ -596,4 +601,4 @@ Pavankumar Videm: git: pavanvidem email: videmp@informatik.uni-freiburg.de orcid: 0000-0002-5192-126X - affiliation: University of Freiburg / European Galaxy team \ No newline at end of file + affiliation: University of Freiburg / European Galaxy team diff --git a/_data/affiliations.yaml b/_data/affiliations.yaml index 502c519cf..7507da2f6 100644 --- a/_data/affiliations.yaml +++ b/_data/affiliations.yaml @@ -166,3 +166,9 @@ expose: yes type: infrastructure url: https://www.bbmri.nl/ +- name: EMBL-EBI + image_url: /images/institutions/Ebi_official_logo.png + pid: https://ror.org/02catss52 + expose: yes + type: project + url: https://www.ebi.ac.uk/ diff --git a/_data/news.yml b/_data/news.yml index c08d1ca86..da39f1a4d 100644 --- a/_data/news.yml +++ b/_data/news.yml @@ -154,3 +154,7 @@ date: 2023-12-19 linked_pr: 1429 description: The content of the "tool assembly" page for CSC (Finnish IT Center for Science) was updated. [Discover the page here](csc_assembly). +- name: "New page: FAIRtracks tool assembly" + date: 2023-12-20 + linked_pr: 1419 + description: A new "tool assembly" page for FAIRtracks was added. [Discover the page here](fairtracks_assembly). diff --git a/_data/sidebars/data_management.yml b/_data/sidebars/data_management.yml index 4fd352012..277c7d90d 100755 --- a/_data/sidebars/data_management.yml +++ b/_data/sidebars/data_management.yml @@ -112,6 +112,8 @@ subitems: url: /covid19_data_portal - title: CSC url: /csc_assembly + - title: FAIRtracks + url: /fairtracks_assembly - title: Galaxy url: /galaxy_assembly - title: IFB diff --git a/_data/tool_and_resource_list.yml b/_data/tool_and_resource_list.yml index c32aa17ab..30e22a52e 100755 --- a/_data/tool_and_resource_list.yml +++ b/_data/tool_and_resource_list.yml @@ -2297,6 +2297,38 @@ registry: biotools: dataplan url: https://plan.nfdi4plants.org +- id: omnipy + name: Omnipy + url: https://github.com/fairtracks/omnipy + description: + Omnipy is a high level Python library for type-driven data wrangling and scalable workflow orchestration. + registry: + biotools: omnipy + tess: omnipy +- id: trackfind + name: TrackFind + url: https://trackfind.elixir.no/ + description: + TrackFind is a search and curation engine for metadata of geneomic tracks. It supports crawling of the TrackHub Registry and other portals. + registry: + biotools: trackfind +- id: pydantic + name: Pydantic + url: https://docs.pydantic.dev/latest/ + description: + Pydantic is the most widely used data validation library for Python. +- id: prefect + name: Prefect + url: https://www.prefect.io/ + description: + Prefect is a workflow orchestration tool empowering developers to build, observe, and react to data pipelines. +- id: track-hub-registry + name: Track Hub Registry + url: https://www.trackhubregistry.org/ + description: + A global centralised collection of publicly accessible track hubs + registry: + fairsharing: a1de61 - description: Fast, sensitive and accurate integration of single-cell data. id: harmony name: Harmony diff --git a/images/fairtracks_tool-assembly.png b/images/fairtracks_tool-assembly.png new file mode 100644 index 000000000..4dfa5113d Binary files /dev/null and b/images/fairtracks_tool-assembly.png differ diff --git a/images/institutions/Ebi_official_logo.png b/images/institutions/Ebi_official_logo.png new file mode 100644 index 000000000..8811f905c Binary files /dev/null and b/images/institutions/Ebi_official_logo.png differ diff --git a/pages/national_resources/no_resources.md b/pages/national_resources/no_resources.md index b35d68cc1..e4f5d1f57 100644 --- a/pages/national_resources/no_resources.md +++ b/pages/national_resources/no_resources.md @@ -6,7 +6,7 @@ contributors: [Nazeefa Fatima,Federico Bianchini,Korbinian Bösl,Erin Calhoun] coordinators: [Korbinian Bösl, Nazeefa Fatima] related_pages: - tool_assembly: [tsd, nels, marine_assembly] + tool_assembly: [tsd, nels, marine_assembly, fairtracks] training: - name: Training in TeSS @@ -84,7 +84,7 @@ national_resources: how_to_access: A formal application is required to gain access to the storage services. related_pages: your_tasks: [transfer, storage] - tool_assembly: [nels] + tool_assembly: [nels, fairtracks] url: https://documentation.sigma2.no/files_storage/nird.html - name: Sigma2 HPC systems description: The current Norwegian academic HPC infrastructure consists of three systems for different purposes. The Norwegian academic high-performance computing and storage infrastructure is maintained by [Sigma2 NRIS](https://sigma2.no/nris), which is a joint collaboration between UiO, UiB, NTNU, UiT, and [UNINETT Sigma2 (SIKT)](https://www.sigma2.no/). diff --git a/pages/tool_assembly/fairtracks_assembly.md b/pages/tool_assembly/fairtracks_assembly.md new file mode 100755 index 000000000..6f3e222be --- /dev/null +++ b/pages/tool_assembly/fairtracks_assembly.md @@ -0,0 +1,115 @@ +--- +title: FAIRtracks +contributors: [Federico Bianchini, Sveinung Gundersen] +description: The FAIRtracks ecosystem provides technical solutions for the FAIRification of genome browser track files +page_id: fairtracks +affiliations: ["NO", "ES", "EMBL-EBI"] +related_pages: + your_tasks: [data_publication, data_transfer, metadata] + your_domain: [plants, rare_disease, single_cell_sequencing, human_data] +training: + - name: Training in TeSS + registry: TeSS + url: https://tess.elixir-europe.org/search?q=fairtracks +--- + +## What is the FAIRtracks tool assembly? + +The [FAIRtracks ecosystem](https://fairtracks.net/) is a set of services associated with a minimal +[metadata model](https://fairtracks.net/standards/#standards-01-fairtracks) for +[genomic annotations/tracks](https://fairtracks.net/tracks/#tracks-01-genomic-tracks), +implemented as a [set of JSON Schemas](https://github.com/fairtracks/fairtracks_standard/tree/master/json/schema). +The FAIRtracks model contains metadata fields particularly useful for data discovery, +harmonised through strict adherence to a selection of ontologies available through the {%tool "ontology-lookup-service" %}. +The usability of the model can be expanded through referencing the original records via Compact Uniform Resource Identifiers (CURIEs) +resolvable by {% tool "identifiers-org" %}. + +In the context of the Data Life Cycle and its stages, the FAIRtracks ecosystem covers [Collecting](collecting), [Processing](processing), +[Analysing](analysing), [Sharing](sharing), and [Reusing](reusing). It has to be noted, however, that the FAIRtracks ecosystem is structured +around a secondary data life cycle, as illustrated in Figure 1. As part of this secondary life cycle, the annotation/track data gets further distributed +and its discovery is enhanced through derived metadata. The FAIRtracks ecosystem aims at harmonising this process. +Primary data needs to be handled independently following domain best practices +(see e.g. the pages on [Single cell sequencing](single_cell_sequencing), [Plant sciences](plant_sciences), or [Rare disease data](rare_disease_data)). + +The FAIRtracks ecosystem is developed and provided as part of the national Service Delivery Plans by +[ELIXIR Norway](https://elixir.no/) and [ELIXIR Spain](https://elixir-europe.org/about-us/who-we-are/nodes/spain), +and is supported by the [Track Hub Registry group](https://trackhubregistry.org/) at [EMBL-EBI](https://www.ebi.ac.uk/). +FAIRtracks is endorsed by [ELIXIR Europe](https://elixir-europe.org/) as a +[Recommended Interoperability Resource](https://elixir-europe.org/platforms/interoperability/rirs). + +{% include image.html file="fairtracks_tool-assembly.png" caption="Figure 1. Illustration of the Data life cycle +for the FAIRtracks tool assembly. As genomic tracks/annotations represent condensed summaries of the raw data, +this ecosystem covers a secondary cycle designed around the FAIRtracks metadata model. +The grey box shows the areas of relevance for the FAIRtracks ecosystem with its integrations, +and only a subset of the icons represents FAIRtracks services per se. Omnipy (dark grey box) is a general Python library +for scalable and reproducible data wrangling which can be used across several data models and research disciplines." +alt="FAIRtracks RDMkit" %} + +## Who can use the FAIRtracks tool assembly? + +There is no central authentication solution for the FAIRtracks services requiring login. +The entire FAIRtracks ecosystem is available to everyone. +Most of the services are accessible through Application Programming Interfaces (APIs). More details are provided in the description below. +Users of the FAIRtracks ecosystem belong to different categories, which could be summarised as: + +- Researchers and data analysts +- Data providers and biocurators +- Developers working on tooling for + - Research + - Implementation of the FAIR data principles + +Each of these categories benefits specifically from a subset of the global ecosystem. +The core services can be accessed both upstream (for data providers and biocurators) and downstream (for tool developers and analytical end users). + +## For what can you use the FAIRtracks tool assembly? + +The FAIRtracks tool assembly can be used for a large number of applications; we summarise the main ones below following the steps of the data life-cycle +and focusing on particular tools. + +While the assembly does not include a tool for [Data Management Planning](dmp), +the FAIRtracks metadata standard is registered in {%tool "fairsharing" %} +and, thus, formally connected to several other standards and databases. +The FAIRtracks standard can, thus, be selected on your Data Management Plan in all the instances of {% tool "data-stewardship-wizard" %} through +the integration with {%tool "fairsharing" %}. + +{%tool "omnipy" %} is a high-level Python library for type-driven data wrangling and scalable data flow orchestration; +it is a self-standing subset of the FAIRtracks ecosystem covering several steps in the data life-cicle. +It can be used to extract metadata from specific portals and for [Processing](processing) of metadata entries to harmonise them into a unique model. +{%tool "omnipy" %} data flows are defined as transformations from specific input data models to specific output data models. +Input and output data are validated at each iteration through parsing based on {%tool "pydantic" %}. +Offloading of data flows to external compute resources is provided through the integration of {%tool "omnipy" %} with an orchestration engine based on {%tool "prefect" %}. + +There is ongoing work into adding {%tool "prefect" %} as one of the services available in the +[National Infrastructure for Research Data (NIRD) service platform](https://www.sigma2.no/nird-service-platform). +This would enable running {%tool "omnipy" %} on data and metadata stored in the [NIRD data storage](https://www.sigma2.no/data-storage). +Refer also to the [Norwegian national page](no_resources) for more details. Note that, while the usage of NIRD storage and services +is certainly convenient for Norwegian users, this is not a central or mandatory part of the tool assembly which is born as an international +service and aims at maintaining this status. + +Data [Sharing](sharing) and preservation is one of the key components of the FAIRtracks ecosystem. +Since genomic annotations/tracks typically consist of secondary data files referring to primary data sources, +they are often deposited together with the primary data. The aim of the minimal metadata model is to +offer a greater level of granularity, providing each track with an identifier and enabling the possibility of analysis across datasets +in an automatised fashion. A dedicated registry would typically be required to accomplish this. Given that such a registry does not yet exist, +the current recommendation is to deposit FAIRtracks-compliant metadata files to {%tool "zenodo" %}, +as this platform supports both Digital Object Identifier (DOI) versioning and DOI reservation before publication. +The identifiers on the metadata FAIRtracks object are then cross-linked with the actual data which is hosted +e.g. in a [Track Hub](https://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html) and registered in +the {%tool "track-hub-registry" %}. + +Data and metadata organised in this fashion can be discovered for [Reusing](reusing) through {%tool "trackfind" %}, +a search and curation engine for genomic tracks. +{%tool "trackfind" %} will import FAIRtracks-compliant metadata from e.g. {%tool "zenodo" %}. +This metadata can be accessed through hierarchical browsing or by search queries both through a web-based user interface and as a RESTful API. +TrackFind supports advanced SQL-based queries that can be easily built into the user interface. + +Additional tools that comprise the core of the FAIRtracks ecosystem are the +[metadata validation](https://fairtracks.net/services/?category=Core%20services&tags%5B0%5D=Metadata%20validation) and the +[metadata augmentation](https://fairtracks.net/services/?category=Core%20services&tags%5B0%5D=Metadata%20augmentation) services. +The former is REST API that extends the standard JSON Schema validation technology to +e.g. validate ontology terms or check CURIEs against the registered entries. +The [FAIRtracks augmentation service](https://fairtracks.net/services/?category=Core%20services&tags%5B0%5D=Metadata%20augmentation) +is implemented as a REST API that expands on the information contained in a minimal FAIRtracks JSON by adding +a set of fields with human-readable values including ontology labels, versions, and summaries. +This service bridges the gap between data providers, which are required to submit only minimal information, and data consumers +who require richer information for data discovery and retrieval. diff --git a/pages/your_domain/human_data.md b/pages/your_domain/human_data.md index 9628430e6..cb018f9d0 100644 --- a/pages/your_domain/human_data.md +++ b/pages/your_domain/human_data.md @@ -5,7 +5,7 @@ contributors: [Niclas Jareborg, Nirupama Benis, Ana Portugal Melo, Pinar Alper, page_id: human_data related_pages: your_tasks: [sensitive, gdpr_compliance] - tool_assembly: [tsd, covid-19, transmed] + tool_assembly: [tsd, covid-19, transmed, fairtracks] training: - name: Training in TeSS registry: TeSS diff --git a/pages/your_domain/plant_sciences.md b/pages/your_domain/plant_sciences.md index c3e5a7b70..bfb0ee9aa 100644 --- a/pages/your_domain/plant_sciences.md +++ b/pages/your_domain/plant_sciences.md @@ -6,7 +6,7 @@ related_pages: page_id: plants related_pages: your_tasks: [metadata] - tool_assembly: [plant_geno_assembly, plant_pheno_assembly] + tool_assembly: [plant_geno_assembly, plant_pheno_assembly, fairtracks] training: - name: Training in TeSS registry: TeSS diff --git a/pages/your_domain/rare_disease_data.md b/pages/your_domain/rare_disease_data.md index 64aca3af9..f8c968c88 100644 --- a/pages/your_domain/rare_disease_data.md +++ b/pages/your_domain/rare_disease_data.md @@ -5,6 +5,7 @@ contributors: [Philip van Damme, Nirupama Benis, César Bernabé, Shuxin Zhang, page_id: rare_disease related_pages: your_domain: [human_data] + tool_assembly: [fairtracks] your_tasks: [dmp, data_publication, machine_actionability] --- diff --git a/pages/your_domain/single_cell_sequencing.md b/pages/your_domain/single_cell_sequencing.md index a4ad9ed39..1cc969198 100644 --- a/pages/your_domain/single_cell_sequencing.md +++ b/pages/your_domain/single_cell_sequencing.md @@ -4,7 +4,7 @@ description: "Managing data generated from single-cell sequencing experiments." contributors: [Johan Rollin, Pavankumar Videm, Mehmet Tekman] related_pages: your_tasks: [dmp, data_organisation, data_publication, metadata, storage] - tool_assembly: [galaxy] + tool_assembly: [galaxy, fairtracks] training: - name: Single-cell training on the Galaxy Training Network url: "https://usegalaxy.eu/training-material/topics/single-cell/"