diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json new file mode 100644 index 0000000..b9927fe --- /dev/null +++ b/docs/GWDM/2.0.form.json @@ -0,0 +1,1056 @@ +{ + "schema_fields": [ + { + "required": true, + "title": "Gateway Identifier", + "description": "Associated identifier (number) that is the BigInt key in our SQL database for the dataset version associated with this metadata", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "str", + "location": "required.gatewayId" + }, + { + "required": true, + "title": "Gateway Persistent Identifier", + "description": "A unique persistent identifier for the metadata version. This is a 128-bit unique identifiers, as 32 hexadecimal digits separated by hyphens", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "str", + "location": "required.gatewayPid" + }, + { + "required": true, + "title": "Metadata Issued Datetime',", + "description": "Datetime stamp of when this metadata version was initially issued", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "datetime", + "location": "required.issued" + }, + { + "required": true, + "title": "Last Modified Datetime", + "description": "Datetime stamp of when this metadata was last modified", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "datetime", + "location": "required.modified" + }, + { + "required": true, + "title": "revision version", + "description": "Version number used for previous version of this dataset", + "examples": [ + "6.0.0" + ], + "is_list": false, + "is_optional": false, + "types": "str", + "location": "required.revisions.version" + }, + { + "required": true, + "title": "revision url", + "description": "Some url with a reference to the record of a previous version of this dataset", + "examples": [ + "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" + ], + "is_list": false, + "is_optional": false, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "required.revisions.url" + }, + { + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "is_list": false, + "is_optional": false, + "types": "str", + "location": "required.version" + }, + { + "required": true, + "title": "Title", + "description": "The main title of the dataset", + "examples": [ + "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations" + ], + "is_list": false, + "is_optional": false, + "types": { + "maxLength": 255, + "minLength": 2, + "title": "TwoHundredFiftyFiveCharacters", + "type": "string" + }, + "location": "summary.title" + }, + { + "required": true, + "title": "Short Title", + "description": "A shorter descriptive title of the dataset", + "examples": [ + "ONS 2011 Census Wales (CENW)" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "ShortTitle", + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + "location": "summary.shortTitle" + }, + { + "required": true, + "title": "DOI Name", + "description": "DOI associated to this dataset", + "examples": [ + "10.1093/ije/dyx196" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "Doi", + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + "location": "summary.doiName" + }, + { + "required": true, + "title": "Abstract", + "description": "Longer abstract detailing the dataset.", + "examples": [ + "COVID-19 Key Worker Testing Results data is required by NHS Digital to support COVID-19 requests for linkage, analysis and dissemination to other organisations who require the data in a timely manner." + ], + "is_list": false, + "is_optional": false, + "types": { + "title": "LongAbstractText", + "maxLength": 5000, + "minLength": 5, + "type": "string" + }, + "location": "summary.abstract" + }, + { + "required": true, + "title": "Keywords", + "description": "Comma separated key words associated to this dataset.", + "examples": [ + "Preprints,Papers,HDR UK" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "summary.keywords" + }, + { + "required": true, + "title": "Controlled Keywords", + "description": "Keywords that have been filtered and limited", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "summary.controlledKeywords" + }, + { + "required": true, + "title": "Contact Point", + "description": "email of a person who can be the main contact point of this dataset", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "is_list": false, + "is_optional": true, + "types": "EmailStr", + "location": "summary.contactPoint" + }, + { + "required": true, + "title": "Dataset type", + "description": "What type of dataset is this?", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "DatasetType", + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + "location": "summary.datasetType" + }, + { + "required": true, + "title": "Description", + "description": "Longer description of the dataset in detail", + "examples": [ + "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations\\n\\nThis will include:\\n- Papers\\n- COVID-19 Papers\\n- COVID-19 Preprint" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "summary.description" + }, + { + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Name" + }, + "location": "summary.publisher.name" + }, + { + "required": false, + "title": "Organisation Gateway Identifier", + "description": "Identifier on the gateway", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "str", + "location": "summary.publisher.gatewayId" + }, + { + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "str", + "location": "summary.publisher.rorId" + }, + { + "required": false, + "title": "Population size", + "description": "Summary population size of the cohort", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "int", + "location": "summary.populationSize" + }, + { + "required": false, + "title": "Dataset Sub-type", + "description": "Placeholder for dataset sub-type", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "DatasetType", + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + "location": "summary.datasetSubType" + }, + { + "required": false, + "title": "Dataset pipeline status", + "description": "Indicate whether this dataset is currently available for Researchers to request access.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Available", + "Not available" + ] + }, + "location": "summary.inPipeline" + }, + { + "required": false, + "title": "Geographic Coverage", + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "coverage.spatial" + }, + { + "required": false, + "title": "Pathway", + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "coverage.pathway" + }, + { + "required": false, + "title": "Followup", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "0 - 6 MONTHS", + "6 - 12 MONTHS", + "1 - 10 YEARS", + "> 10 YEARS", + "UNKNOWN", + "CONTINUOUS", + "OTHER", + null + ] + }, + "location": "coverage.followup" + }, + { + "required": false, + "title": "Age Range", + "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "examples": [ + "18-90" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "AgeRange", + "pattern": "Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])", + "type": "string" + }, + "location": "coverage.typicalAgeRange" + }, + { + "required": false, + "title": "Dataset coverage/completeness/quality", + "description": "The URL where a Researcher can learn more about the completeness of the dataset.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "coverage.datasetCompleteness" + }, + { + "required": false, + "title": "Purpose", + "description": "Please indicate the purpose(s) that the dataset was collected.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "provenance.origin.purpose" + }, + { + "required": false, + "title": "Source", + "description": "Please indicate the source of the data extraction", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "provenance.origin.source" + }, + { + "required": false, + "title": "Collection Situation Setting", + "description": "Please indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "provenance.origin.collectionSituation" + }, + { + "required": false, + "title": "Image contrast", + "description": "Indicate whether usage of imaging contrast is captured within the dataset.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Yes", + "No", + "Not stated" + ] + }, + "location": "provenance.origin.imageContrast" + }, + { + "required": true, + "title": "Start Date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "date", + "location": "provenance.temporal.startDate" + }, + { + "required": false, + "title": "End Date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.'", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "date", + "location": "provenance.temporal.endDate" + }, + { + "required": true, + "title": "Time Lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NO TIMELAG", + "NOT APPLICABLE", + "OTHER", + null + ] + }, + "location": "provenance.temporal.timeLag" + }, + { + "required": true, + "title": "Periodicity", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "STATIC", + "IRREGULAR", + "CONTINUOUS", + "BIENNIAL", + "ANNUAL", + "BIANNUAL", + "QUARTERLY", + "BIMONTHLY", + "MONTHLY", + "BIWEEKLY", + "WEEKLY", + "SEMIWEEKLY", + "DAILY", + "OTHER", + null + ] + }, + "location": "provenance.temporal.accrualPeriodicity" + }, + { + "required": false, + "title": "Release Date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "date", + "location": "provenance.temporal.distributionReleaseDate" + }, + { + "required": true, + "title": "Data Use Limitation", + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.usage.dataUseLimitation" + }, + { + "required": true, + "title": "Data Use Requirements", + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.usage.dataUseRequirement" + }, + { + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Name" + }, + "location": "accessibility.usage.resourceCreator.name" + }, + { + "required": false, + "title": "Organisation Gateway Identifier", + "description": "Identifier on the gateway", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "str", + "location": "accessibility.usage.resourceCreator.gatewayId" + }, + { + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "str", + "location": "accessibility.usage.resourceCreator.rorId" + }, + { + "required": true, + "title": "Access Rights", + "description": "Please provide details for the data access rights", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.access.accessRights" + }, + { + "required": false, + "title": "Access Service", + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.accessService" + }, + { + "required": false, + "title": "Organisation Access Request Cost", + "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.accessRequestCost" + }, + { + "required": false, + "title": "Access Request Duration", + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NOT APPLICABLE", + "OTHER", + null + ] + }, + "location": "accessibility.access.deliveryLeadTime" + }, + { + "required": true, + "title": "Jurisdiction", + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.access.jurisdiction" + }, + { + "required": true, + "title": "Data Controller", + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.dataController" + }, + { + "required": false, + "title": "Data Processor", + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.dataProcessor" + }, + { + "required": false, + "title": "Access/governance requirements", + "description": "Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + "examples": [ + "TRE/SDE" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.access.accessServiceCategory" + }, + { + "required": false, + "title": "Access mode", + "description": "Indication of the application type to enable research access.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Join research consortium", + "New project" + ] + }, + "location": "accessibility.access.accessMode" + }, + { + "required": true, + "title": "Controlled Vocabulary", + "description": "Code value of the ontology vocabulary encoding", + "examples": [ + "OPCS4,NHS NATIONAL CODES,ICD10,OTHER" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.vocabularyEncodingSchemes" + }, + { + "required": true, + "title": "Conforms To", + "description": "What the vocabulary conforms to.", + "examples": [ + "LOCAL,NHS DATA DICTIONARY" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.conformsTo" + }, + { + "required": true, + "title": "Language Code(s)", + "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", + "examples": [ + "en" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.languages" + }, + { + "required": true, + "title": "Dataset Format", + "description": "Format(s) the dataset can be made available in", + "examples": [ + "CSV,JSON,SQL database table" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.formats" + }, + { + "required": false, + "title": "Is Generated Using", + "description": "??", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.isGeneratedUsing" + }, + { + "required": false, + "title": "Associated Media", + "description": "Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question", + "examples": [ + "https://popdatasci.swan.ac.uk/centres-of-excellence/sail/,https://www.youtube.com/watch?v=ZK9-Jw3uVkw,https://saildatabank.com/,https://saildatabank.com/about-us/" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.associatedMedia" + }, + { + "required": false, + "title": "Data Uses", + "description": "??", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.dataUses" + }, + { + "required": false, + "title": "Is Reference in", + "description": "The keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.',", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.isReferenceIn" + }, + { + "required": false, + "title": "Tools", + "description": "URL of any analysis tools or models that have been created for this dataset and are available for further use", + "examples": [ + "https://conceptlibrary.saildatabank.com/" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.tools" + }, + { + "required": false, + "title": "Derivations", + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", + "examples": [ + "Data will be minimised as appropriate relative to the data access application" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.datasetLinkage.isDerivedFrom" + }, + { + "required": false, + "title": "Is PartOf", + "description": "If the dataset is part of a group or family", + "examples": [ + "UKCRC Tissue Directory and Coordination Centre" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.datasetLinkage.isPartOf" + }, + { + "required": false, + "title": "Is MemberOf", + "description": "Dataset is a member of XXX(?)", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.datasetLinkage.isMemberOf" + }, + { + "required": false, + "title": "Linked Datasets", + "description": "Links to other datasets.", + "examples": [ + "Yes. To any SAIL dataset & reference data.,ALL" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.datasetLinkage.linkedDatasets" + }, + { + "required": false, + "title": "Investigations", + "description": "Please provide the keystone paper associated with the dataset.", + "examples": [ + "https://digital.nhs.uk/services/data-access-request-service-dars/register-of-approved-data-releases" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.investigations" + }, + { + "required": false, + "title": "Synthetic Data Web Links", + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "linkage.syntheticDataWebLink" + }, + { + "required": false, + "title": "Publication about the dataset", + "description": "DOIs for publications which describe the dataset.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Doi", + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + "location": "linkage.publicationAboutDataset" + }, + { + "required": false, + "title": "Publication using the dataset", + "description": "DOIs for publications which use the dataset for analysis.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Doi", + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + "location": "linkage.publicationUsingDataset" + }, + { + "required": true, + "title": "Statistical Population", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "PERSONS", + "EVENTS", + "FINDINGS" + ] + }, + "location": "observations.observedNode" + }, + { + "required": true, + "title": "Measured Value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "observations.measuredValue" + }, + { + "required": false, + "title": "Disambiguating Description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "AbstractText", + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + "location": "observations.disambiguatingDescription" + }, + { + "required": true, + "title": "Observation Date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "date", + "location": "observations.observationDate" + }, + { + "required": true, + "title": "Measured Property", + "description": "Initially this will be defaulted to \"COUNT\"", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "title": "MeasuredProperty" + }, + "location": "observations.measuredProperty" + } + ] +} \ No newline at end of file diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md new file mode 100644 index 0000000..efb2623 --- /dev/null +++ b/docs/GWDM/2.0.md @@ -0,0 +1,1118 @@ + +## required + +Required metadata needed for the GWDM + + + + + + +### gatewayId + +Associated identifier (number) that is the BigInt key in our SQL database for the dataset version associated with this metadata + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:--------| +| Gateway Identifier | False | False | True | ['str'] | + + + + +### gatewayPid + +A unique persistent identifier for the metadata version. This is a 128-bit unique identifiers, as 32 hexadecimal digits separated by hyphens + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:--------| +| Gateway Persistent Identifier | False | False | True | ['str'] | + + + + +### issued + +Datetime stamp of when this metadata version was initially issued + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-------------| +| Metadata Issued Datetime', | False | False | True | ['datetime'] | + + + + +### modified + +Datetime stamp of when this metadata was last modified + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:-------------| +| Last Modified Datetime | False | False | True | ['datetime'] | + + + + +### revisions + +A list of persistent identifiers and version numbers for previous versions of metadata for this dataset + + + + + + +#### version + +Version number used for previous version of this dataset + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------| +| revision version | False | False | True | ['str'] | + +Examples: + + * 6.0.0 + + +#### url + +Some url with a reference to the record of a previous version of this dataset + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| revision url | False | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + +Examples: + + * https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561 + + +### version + +Dataset metadata version + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:--------| +| Dataset Version | False | False | True | ['str'] | + +Examples: + + * 1.1.0 + + +## summary + +Summary of metadata describing key pieces of information. + + + + + + +### title + +The main title of the dataset + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------| +| Title | False | False | True | ["TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]"] | + +Examples: + + * Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations + + +### shortTitle + +A shorter descriptive title of the dataset + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------| +| Short Title | False | True | True | ["ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * ONS 2011 Census Wales (CENW) + + +### doiName + +DOI associated to this dataset + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| DOI Name | False | True | True | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * 10.1093/ije/dyx196 + + +### abstract + +Longer abstract detailing the dataset. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------| +| Abstract | False | False | True | ["LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]"] | + +Examples: + + * COVID-19 Key Worker Testing Results data is required by NHS Digital to support COVID-19 requests for linkage, analysis and dissemination to other organisations who require the data in a timely manner. + + +### keywords + +Comma separated key words associated to this dataset. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * Preprints,Papers,HDR UK + + +### controlledKeywords + +Keywords that have been filtered and limited + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### contactPoint + +email of a person who can be the main contact point of this dataset + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:---------------------| +| Contact Point | False | True | True | ['EmailStr', 'null'] | + +Examples: + + * SAILDatabank@swansea.ac.uk + + +### datasetType + +What type of dataset is this? + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset type | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### description + +Longer description of the dataset in detail + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Description | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations\n\nThis will include:\n- Papers\n- COVID-19 Papers\n- COVID-19 Preprint + + +### publisher + +Link to details about the publisher of this dataset + + + + + + +#### name + +Name of the organisation + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:---------------------| +| Organisation Name | False | True | True | ['Name[{}]', 'null'] | + + + + +#### gatewayId + +Identifier on the gateway + +| title | is_list | is_optional | required | type | +|:--------------------------------|:----------|:--------------|:-----------|:----------------| +| Organisation Gateway Identifier | False | True | False | ['str', 'null'] | + + + + +#### rorId + +The Research Organization Registry (ROR) for the organisation, if applicable + +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | + + + + +### populationSize + +Summary population size of the cohort + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Population size | False | True | False | ['int', 'null'] | + + + + +### datasetSubType + +Placeholder for dataset sub-type + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset Sub-type | False | True | False | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### inPipeline + +Indicate whether this dataset is currently available for Researchers to request access. + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:------------------------------------------| +| Dataset pipeline status | True | True | False | ["Pipeline['Available','Not available']"] | + + + + +## coverage + +This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data. + + + + + + +### spatial + +The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Geographic Coverage | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html + + +### pathway + +Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### followup + +If known, what is the typical time span that a patient appears in the dataset (follow up period) + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | + + + + +### typicalAgeRange + +Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * 18-90 + + +### datasetCompleteness + +The URL where a Researcher can learn more about the completeness of the dataset. + +| title | is_list | is_optional | required | type | +|:--------------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Dataset coverage/completeness/quality | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## provenance + +Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness. + + + + + + +### origin + +None + + + + + + +#### purpose + +Please indicate the purpose(s) that the dataset was collected. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### source + +Please indicate the source of the data extraction + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### collectionSituation + +Please indicate the setting(s) where data was collected. Multiple settings may be provided + +| title | is_list | is_optional | required | type | +|:-----------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Collection Situation Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### imageContrast + +Indicate whether usage of imaging contrast is captured within the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:---------------------------------------------| +| Image contrast | False | True | False | ["Ternary['Yes','No','Not stated']", 'null'] | + + + + +### temporal + +None + + + + + + +#### startDate + +The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | + + + + +#### endDate + +The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.' + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------| +| End Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### timeLag + +Please indicate the typical time-lag between an event and the data for that event appearing in the dataset + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | + + + + +#### accrualPeriodicity + +Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Periodicity | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | + + + + +#### distributionReleaseDate + +Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +## accessibility + +Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets. + + + + + + +### usage + +This section includes information about how the data can be used and how it is currently being used + + + + + + +#### dataUseLimitation + +Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataUseRequirement + +Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### resourceCreator + +Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.' + + + + + + +##### name + +Name of the organisation + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:---------------------| +| Organisation Name | False | True | True | ['Name[{}]', 'null'] | + + + + +##### gatewayId + +Identifier on the gateway + +| title | is_list | is_optional | required | type | +|:--------------------------------|:----------|:--------------|:-----------|:----------------| +| Organisation Gateway Identifier | False | True | False | ['str', 'null'] | + + + + +##### rorId + +The Research Organization Registry (ROR) for the organisation, if applicable + +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | + + + + +### access + +This section includes information about data access + + + + + + +#### accessRights + +Please provide details for the data access rights + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessService + +Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide + + +#### accessRequestCost + +Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian. + +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### deliveryLeadTime + +Please provide an indication of the typical processing times based on the types of requests typically received. + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | + + + + +#### jurisdiction + +Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataController + +Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataProcessor + +A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessServiceCategory + +Where access to data come from: TRE/SED, direct access, open acccess, varies based on project. + +| title | is_list | is_optional | required | type | +|:-------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access/governance requirements | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * TRE/SDE + + +#### accessMode + +Indication of the application type to enable research access. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------| +| Access mode | False | True | False | ["AccessMode['Join research consortium','New project']", 'null'] | + + + + +### formatAndStandards + +Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. + + + + + + +#### vocabularyEncodingSchemes + +Code value of the ontology vocabulary encoding + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * OPCS4,NHS NATIONAL CODES,ICD10,OTHER + + +#### conformsTo + +What the vocabulary conforms to. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * LOCAL,NHS DATA DICTIONARY + + +#### languages + +Language code(s) of the language of the dataset metadata and underlying data is made available. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Language Code(s) | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * en + + +#### formats + +Format(s) the dataset can be made available in + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Dataset Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * CSV,JSON,SQL database table + + +## linkage + +Metadata for various linkages with datasets and other gateway entities + + + + + + +### isGeneratedUsing + +?? + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Generated Using | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### associatedMedia + +Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://popdatasci.swan.ac.uk/centres-of-excellence/sail/,https://www.youtube.com/watch?v=ZK9-Jw3uVkw,https://saildatabank.com/,https://saildatabank.com/about-us/ + + +### dataUses + +?? + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Uses | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### isReferenceIn + +The keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.', + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Reference in | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### tools + +URL of any analysis tools or models that have been created for this dataset and are available for further use + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://conceptlibrary.saildatabank.com/ + + +### datasetLinkage + +Dataset Linkage copied over from + + + + + + +#### isDerivedFrom + +Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * Data will be minimised as appropriate relative to the data access application + + +#### isPartOf + +If the dataset is part of a group or family + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is PartOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * UKCRC Tissue Directory and Coordination Centre + + +#### isMemberOf + +Dataset is a member of XXX(?) + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is MemberOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### linkedDatasets + +Links to other datasets. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * Yes. To any SAIL dataset & reference data.,ALL + + +### investigations + +Please provide the keystone paper associated with the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://digital.nhs.uk/services/data-access-request-service-dars/register-of-approved-data-releases + + +### syntheticDataWebLink + +Links to locations of information and or raw downloads of synthetic data associated with this dataset + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Synthetic Data Web Links | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### publicationAboutDataset + +DOIs for publications which describe the dataset. + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Publication about the dataset | True | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### publicationUsingDataset + +DOIs for publications which use the dataset for analysis. + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Publication using the dataset | True | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +## observations + +Multiple observations about the dataset may be provided and users are expected to provide at least one observation +(1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. +Example: + <b> Statistical Population 1 + </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 + <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: “2017”" + + + + + + + +### observedNode + +Please select one of the following statistical populations for you observation + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------| +| Statistical Population | False | False | True | ["StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']"] | + +Examples: + + * PERSONS + + +### measuredValue + +Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured Value | False | False | True | ['int'] | + + + + +### disambiguatingDescription + +If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating Description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### observationDate + +Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation Date | False | False | True | ['date', 'datetime'] | + + + + +### measuredProperty + +Initially this will be defaulted to "COUNT" + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured Property | False | False | True | ['MeasuredProperty[{}]'] | + + + + +## structuralMetadata + +Descriptions of all tables and data elements that can be included in the dataset + + + + + + +### name + +The name of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:----------------| +| Table Name | False | True | True | ['str', 'null'] | + + + + +### description + +A description of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------| +| Table Description' | False | True | False | ['str', 'null'] | + + + + +### columns + +A list of columns contained within a table in a dataset. + + + + + + +#### name + +The name of a column in a table. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------| +| Column Name | False | False | True | ['Name[{}]'] | + + + + +#### dataType + +The name of a column in a table. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------| +| Column Name | False | False | True | ['str'] | + + + + +#### description + +A description of a column in a table. + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------| +| Column Description | False | True | False | ['str', 'null'] | + + + + +#### sensitive + +A True or False value, indicating if the field is sensitive or not + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:---------| +| Sensitive | False | False | True | ['bool'] | + + + + +#### values + +values in a dataset + + + + + + +##### name + +Unique value in a column . + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------| +| Value Name | False | False | True | ['Name[{}]'] | + + + + +##### description + +A description of a unique value in a column. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Value Description | False | True | False | ['str', 'null'] | + + + + +##### frequency + +The frequency of occurrance of a value in a column + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Value Frequency | False | True | False | ['int', 'null'] | + + + diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json new file mode 100644 index 0000000..27eb1bd --- /dev/null +++ b/docs/GWDM/2.0.structure.json @@ -0,0 +1,1411 @@ +[ + { + "name": "required", + "required": true, + "title": "Required", + "description": "Required metadata needed for the GWDM", + "examples": null, + "type": [ + "Required" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "gatewayId", + "required": true, + "title": "Gateway Identifier", + "description": "Associated identifier (number) that is the BigInt key in our SQL database for the dataset version associated with this metadata", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "gatewayPid", + "required": true, + "title": "Gateway Persistent Identifier", + "description": "A unique persistent identifier for the metadata version. This is a 128-bit unique identifiers, as 32 hexadecimal digits separated by hyphens", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "issued", + "required": true, + "title": "Metadata Issued Datetime',", + "description": "Datetime stamp of when this metadata version was initially issued", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "modified", + "required": true, + "title": "Last Modified Datetime", + "description": "Datetime stamp of when this metadata was last modified", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "revisions", + "required": true, + "title": "Metadata Version Revisions", + "description": "A list of persistent identifiers and version numbers for previous versions of metadata for this dataset", + "examples": null, + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "version", + "required": true, + "title": "revision version", + "description": "Version number used for previous version of this dataset", + "examples": [ + "6.0.0" + ], + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "url", + "required": true, + "title": "revision url", + "description": "Some url with a reference to the record of a previous version of this dataset", + "examples": [ + "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" + ], + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "version", + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "summary", + "required": true, + "title": "Summary", + "description": "Summary of metadata describing key pieces of information.", + "examples": null, + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "title", + "required": true, + "title": "Title", + "description": "The main title of the dataset", + "examples": [ + "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations" + ], + "type": [ + "TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "shortTitle", + "required": true, + "title": "Short Title", + "description": "A shorter descriptive title of the dataset", + "examples": [ + "ONS 2011 Census Wales (CENW)" + ], + "type": [ + "ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "doiName", + "required": true, + "title": "DOI Name", + "description": "DOI associated to this dataset", + "examples": [ + "10.1093/ije/dyx196" + ], + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "abstract", + "required": true, + "title": "Abstract", + "description": "Longer abstract detailing the dataset.", + "examples": [ + "COVID-19 Key Worker Testing Results data is required by NHS Digital to support COVID-19 requests for linkage, analysis and dissemination to other organisations who require the data in a timely manner." + ], + "type": [ + "LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "keywords", + "required": true, + "title": "Keywords", + "description": "Comma separated key words associated to this dataset.", + "examples": [ + "Preprints,Papers,HDR UK" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "controlledKeywords", + "required": true, + "title": "Controlled Keywords", + "description": "Keywords that have been filtered and limited", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "Contact Point", + "description": "email of a person who can be the main contact point of this dataset", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "type": [ + "EmailStr", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "datasetType", + "required": true, + "title": "Dataset type", + "description": "What type of dataset is this?", + "examples": null, + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "description", + "required": true, + "title": "Description", + "description": "Longer description of the dataset in detail", + "examples": [ + "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations\\n\\nThis will include:\\n- Papers\\n- COVID-19 Papers\\n- COVID-19 Preprint" + ], + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "publisher", + "required": true, + "title": "Publisher", + "description": "Link to details about the publisher of this dataset", + "examples": null, + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gatewayId", + "required": false, + "title": "Organisation Gateway Identifier", + "description": "Identifier on the gateway", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "rorId", + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "populationSize", + "required": false, + "title": "Population size", + "description": "Summary population size of the cohort", + "examples": null, + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "datasetSubType", + "required": false, + "title": "Dataset Sub-type", + "description": "Placeholder for dataset sub-type", + "examples": null, + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "inPipeline", + "required": false, + "title": "Dataset pipeline status", + "description": "Indicate whether this dataset is currently available for Researchers to request access.", + "examples": null, + "type": [ + "Pipeline['Available','Not available']" + ], + "is_list": true, + "is_optional": true + } + ] + }, + { + "name": "coverage", + "required": false, + "title": "Coverage", + "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "examples": null, + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "spatial", + "required": false, + "title": "Geographic Coverage", + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "pathway", + "required": false, + "title": "Pathway", + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "followup", + "required": false, + "title": "Followup", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "typicalAgeRange", + "required": false, + "title": "Age Range", + "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "examples": [ + "18-90" + ], + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "datasetCompleteness", + "required": false, + "title": "Dataset coverage/completeness/quality", + "description": "The URL where a Researcher can learn more about the completeness of the dataset.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "provenance", + "required": false, + "title": "Provenance", + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "examples": null, + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "origin", + "required": false, + "title": null, + "description": null, + "examples": null, + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "purpose", + "required": false, + "title": "Purpose", + "description": "Please indicate the purpose(s) that the dataset was collected.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "source", + "required": false, + "title": "Source", + "description": "Please indicate the source of the data extraction", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "collectionSituation", + "required": false, + "title": "Collection Situation Setting", + "description": "Please indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "imageContrast", + "required": false, + "title": "Image contrast", + "description": "Indicate whether usage of imaging contrast is captured within the dataset.", + "examples": null, + "type": [ + "Ternary['Yes','No','Not stated']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "temporal", + "required": true, + "title": null, + "description": null, + "examples": null, + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "startDate", + "required": true, + "title": "Start Date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "endDate", + "required": false, + "title": "End Date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.'", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "timeLag", + "required": true, + "title": "Time Lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "accrualPeriodicity", + "required": true, + "title": "Periodicity", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "distributionReleaseDate", + "required": false, + "title": "Release Date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + }, + { + "name": "accessibility", + "required": false, + "title": "Accessibility", + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + "examples": null, + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "usage", + "required": false, + "title": "Usage", + "description": "This section includes information about how the data can be used and how it is currently being used", + "examples": null, + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "dataUseLimitation", + "required": true, + "title": "Data Use Limitation", + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataUseRequirement", + "required": true, + "title": "Data Use Requirements", + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "resourceCreator", + "required": false, + "title": "Citation Requirements'", + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.'", + "examples": null, + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gatewayId", + "required": false, + "title": "Organisation Gateway Identifier", + "description": "Identifier on the gateway", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "rorId", + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + }, + { + "name": "access", + "required": true, + "title": "Access", + "description": "This section includes information about data access", + "examples": null, + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "accessRights", + "required": true, + "title": "Access Rights", + "description": "Please provide details for the data access rights", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessService", + "required": false, + "title": "Access Service", + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessRequestCost", + "required": false, + "title": "Organisation Access Request Cost", + "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "deliveryLeadTime", + "required": false, + "title": "Access Request Duration", + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "examples": null, + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "jurisdiction", + "required": true, + "title": "Jurisdiction", + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataController", + "required": true, + "title": "Data Controller", + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataProcessor", + "required": false, + "title": "Data Processor", + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessServiceCategory", + "required": false, + "title": "Access/governance requirements", + "description": "Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + "examples": [ + "TRE/SDE" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessMode", + "required": false, + "title": "Access mode", + "description": "Indication of the application type to enable research access.", + "examples": null, + "type": [ + "AccessMode['Join research consortium','New project']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "formatAndStandards", + "required": false, + "title": "Format and Standards", + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "examples": null, + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "vocabularyEncodingSchemes", + "required": true, + "title": "Controlled Vocabulary", + "description": "Code value of the ontology vocabulary encoding", + "examples": [ + "OPCS4,NHS NATIONAL CODES,ICD10,OTHER" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "conformsTo", + "required": true, + "title": "Conforms To", + "description": "What the vocabulary conforms to.", + "examples": [ + "LOCAL,NHS DATA DICTIONARY" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "languages", + "required": true, + "title": "Language Code(s)", + "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", + "examples": [ + "en" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "formats", + "required": true, + "title": "Dataset Format", + "description": "Format(s) the dataset can be made available in", + "examples": [ + "CSV,JSON,SQL database table" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + } + ] + }, + { + "name": "linkage", + "required": false, + "title": "Linkage", + "description": "Metadata for various linkages with datasets and other gateway entities", + "examples": null, + "type": [ + "Linkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "isGeneratedUsing", + "required": false, + "title": "Is Generated Using", + "description": "??", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "associatedMedia", + "required": false, + "title": "Associated Media", + "description": "Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question", + "examples": [ + "https://popdatasci.swan.ac.uk/centres-of-excellence/sail/,https://www.youtube.com/watch?v=ZK9-Jw3uVkw,https://saildatabank.com/,https://saildatabank.com/about-us/" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataUses", + "required": false, + "title": "Data Uses", + "description": "??", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isReferenceIn", + "required": false, + "title": "Is Reference in", + "description": "The keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.',", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "tools", + "required": false, + "title": "Tools", + "description": "URL of any analysis tools or models that have been created for this dataset and are available for further use", + "examples": [ + "https://conceptlibrary.saildatabank.com/" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "datasetLinkage", + "required": false, + "title": "Dataset Linkage", + "description": "Dataset Linkage copied over from", + "examples": null, + "type": [ + "DatasetLinkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "isDerivedFrom", + "required": false, + "title": "Derivations", + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", + "examples": [ + "Data will be minimised as appropriate relative to the data access application" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isPartOf", + "required": false, + "title": "Is PartOf", + "description": "If the dataset is part of a group or family", + "examples": [ + "UKCRC Tissue Directory and Coordination Centre" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isMemberOf", + "required": false, + "title": "Is MemberOf", + "description": "Dataset is a member of XXX(?)", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "linkedDatasets", + "required": false, + "title": "Linked Datasets", + "description": "Links to other datasets.", + "examples": [ + "Yes. To any SAIL dataset & reference data.,ALL" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "investigations", + "required": false, + "title": "Investigations", + "description": "Please provide the keystone paper associated with the dataset.", + "examples": [ + "https://digital.nhs.uk/services/data-access-request-service-dars/register-of-approved-data-releases" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "syntheticDataWebLink", + "required": false, + "title": "Synthetic Data Web Links", + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "publicationAboutDataset", + "required": false, + "title": "Publication about the dataset", + "description": "DOIs for publications which describe the dataset.", + "examples": null, + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + }, + { + "name": "publicationUsingDataset", + "required": false, + "title": "Publication using the dataset", + "description": "DOIs for publications which use the dataset for analysis.", + "examples": null, + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "observations", + "required": false, + "title": "Observations", + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation \n(1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. \nExample: \n <b> Statistical Population 1 \n </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 \n <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d\"\n", + "examples": null, + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "observedNode", + "required": true, + "title": "Statistical Population", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "type": [ + "StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredValue", + "required": true, + "title": "Measured Value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "disambiguatingDescription", + "required": false, + "title": "Disambiguating Description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "observationDate", + "required": true, + "title": "Observation Date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredProperty", + "required": true, + "title": "Measured Property", + "description": "Initially this will be defaulted to \"COUNT\"", + "examples": null, + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "structuralMetadata", + "required": false, + "title": "Structural Metadata", + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "examples": null, + "type": [ + "DataTable" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Table Name", + "description": "The name of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "description", + "required": false, + "title": "Table Description'", + "description": "A description of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "columns", + "required": true, + "title": "Data Columns", + "description": "A list of columns contained within a table in a dataset.", + "examples": null, + "type": [ + "DataColumn" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Column Name", + "description": "The name of a column in a table.", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "dataType", + "required": true, + "title": "Column Name", + "description": "The name of a column in a table.", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "description", + "required": false, + "title": "Column Description", + "description": "A description of a column in a table.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "sensitive", + "required": true, + "title": "Sensitive", + "description": "A True or False value, indicating if the field is sensitive or not", + "examples": null, + "type": [ + "bool" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "values", + "required": false, + "title": "Values", + "description": "values in a dataset", + "examples": null, + "type": [ + "DataValue" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Value Name", + "description": "Unique value in a column .", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Value Description", + "description": "A description of a unique value in a column.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "frequency", + "required": false, + "title": "Value Frequency", + "description": "The frequency of occurrance of a value in a column", + "examples": null, + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + } + ] + } +] \ No newline at end of file diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json new file mode 100644 index 0000000..9313c22 --- /dev/null +++ b/docs/HDRUK/3.0.0.form.json @@ -0,0 +1,1234 @@ +{ + "schema_fields": [ + { + "required": true, + "title": "Dataset identifier", + "description": "System dataset identifier", + "examples": [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ], + "is_list": false, + "is_optional": true, + "types": { + "maxLength": 36, + "minLength": 36, + "pattern": "^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$", + "title": "Uuidv4", + "type": "string" + }, + "location": "identifier" + }, + { + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "is_list": false, + "is_optional": false, + "types": { + "pattern": "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$", + "title": "Semver", + "type": "string" + }, + "location": "version" + }, + { + "required": true, + "title": "revision version", + "description": "Version number used for previous version of this dataset", + "examples": [ + "6.0.0" + ], + "is_list": false, + "is_optional": false, + "types": { + "pattern": "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$", + "title": "Semver", + "type": "string" + }, + "location": "revisions.version" + }, + { + "required": true, + "title": "revision url", + "description": "Some url with a reference to the record of a previous version of this dataset", + "examples": [ + "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "revisions.url" + }, + { + "required": true, + "title": "Metadata Issued Datetime',", + "description": "Datetime stamp of when this metadata version was initially issued", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "datetime", + "location": "issued" + }, + { + "required": true, + "title": "Last Modified Datetime", + "description": "Datetime stamp of when this metadata was last modified", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "datetime", + "location": "modified" + }, + { + "required": true, + "title": "Title", + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + "North West London COVID-19 Patient Level Situation Report" + ], + "is_list": false, + "is_optional": false, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "summary.title" + }, + { + "required": true, + "title": "Dataset abstract", + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "is_list": false, + "is_optional": false, + "types": { + "title": "AbstractText", + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + "location": "summary.abstract" + }, + { + "required": false, + "title": "identifier", + "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "summary.dataProvider.identifier" + }, + { + "required": true, + "title": "Name of Data Provider", + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "location": "summary.dataProvider.name" + }, + { + "required": false, + "title": "Organisation Logo", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "summary.dataProvider.logo" + }, + { + "required": false, + "title": "Organisation Description", + "description": "Please provide a URL that describes the organisation.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Description", + "maxLength": 10000, + "minLength": 2, + "type": "string" + }, + "location": "summary.dataProvider.description" + }, + { + "required": true, + "title": "contact point", + "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "title": "EmailAddress", + "format": "email", + "type": "string" + }, + "location": "summary.dataProvider.contactPoint" + }, + { + "required": false, + "title": "Organisation Membership", + "description": "Please indicate if the organisation is an Alliance Member or a Hub.'", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "HUB", + "ALLIANCE", + "OTHER", + "NCS" + ] + }, + "location": "summary.dataProvider.memberOf" + }, + { + "required": true, + "title": "Dataset population size", + "description": "Input the number of people captured within the dataset", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "summary.populationSize" + }, + { + "required": true, + "title": "Keywords", + "description": "Please provide a list of relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "examples": [ + "Preprints,Papers,HDR UK" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "summary.keywords" + }, + { + "required": false, + "title": "DOI for dataset", + "description": "DOI associated to this dataset", + "examples": [ + "10.1093/ije/dyx196" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "Doi", + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + "location": "summary.doiName" + }, + { + "required": true, + "title": "Contact point", + "description": "Please provide a valid email address that can be used to coordinate data access requests.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "is_list": false, + "is_optional": false, + "types": { + "title": "EmailAddress", + "format": "email", + "type": "string" + }, + "location": "summary.contactPoint" + }, + { + "required": false, + "title": "Alternate dataset identifiers", + "description": "Alternate dataset identifiers or local identifiers", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "summary.alternateIdentifiers" + }, + { + "required": false, + "title": "Description", + "description": "A free-text description of the dataset. Gateway Feature: Keywords and text may be extracted out of the description and index for search", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "title": "Description", + "maxLength": 10000, + "minLength": 2, + "type": "string" + }, + "location": "documentation.description" + }, + { + "required": false, + "title": "Associated media", + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "documentation.associatedMedia" + }, + { + "required": false, + "title": "Dataset pipeline status", + "description": "Indicate whether this dataset is currently available for Researchers to request access.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Available", + "Not available" + ] + }, + "location": "documentation.inPipeline" + }, + { + "required": false, + "title": "Geographic coverage", + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "is_list": false, + "is_optional": false, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "coverage.spatial" + }, + { + "required": false, + "title": "Minimum age range", + "description": "Please indicate the minimum age in years of participants in the dataset as a whole number (integer).", + "examples": [ + 18 + ], + "is_list": false, + "is_optional": true, + "types": "int", + "location": "coverage.typicalAgeRangeMin" + }, + { + "required": false, + "title": "Maximum age range", + "description": "Please indicate the maximum age in years of participants in the dataset as a whole number (integer).", + "examples": [ + 90 + ], + "is_list": false, + "is_optional": true, + "types": "int", + "location": "coverage.typicalAgeRangeMax" + }, + { + "required": false, + "title": "Dataset coverage/completeness/quality", + "description": "The URL where a Researcher can learn more about the completeness of the dataset.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "coverage.datasetCompleteness" + }, + { + "required": false, + "title": "Biological sample availability", + "description": "The type of biospecimen saved from a biological entity.", + "examples": null, + "is_list": true, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "None/not available", + "Bone marrow", + "Cancer cell lines", + "CDNA/MRNA", + "Core biopsy", + "DNA", + "Entire body organ", + "Faeces", + "Immortalized cell lines", + "Isolated pathogen", + "MicroRNA", + "Peripheral blood cells", + "Plasma", + "PM Tissue", + "Primary cells", + "RNA", + "Saliva", + "Serum", + "Swabs", + "Tissue", + "Urine", + "Whole blood", + "Availability to be confirmed", + "Other" + ] + }, + "location": "coverage.materialType" + }, + { + "required": false, + "title": "Follow-up", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "0 - 6 MONTHS", + "6 - 12 MONTHS", + "1 - 10 YEARS", + "> 10 YEARS", + "UNKNOWN", + "CONTINUOUS", + "OTHER", + null + ] + }, + "location": "coverage.followup" + }, + { + "required": false, + "title": "Patient pathway description", + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "Description", + "maxLength": 10000, + "minLength": 2, + "type": "string" + }, + "location": "coverage.pathway" + }, + { + "required": false, + "title": "Gender", + "description": "Male, Female, Other", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Male", + "Female", + "Other" + ] + }, + "location": "coverage.gender" + }, + { + "required": false, + "title": "Purpose of dataset collection", + "description": "Please indicate the purpose(s) that the dataset was collected.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Research cohort", + "Study", + "Disease registry", + "Trial", + "Care", + "Audit", + "Administrative", + "Financial", + "Statuatory", + "Other", + null + ] + }, + "location": "provenance.origin.purpose" + }, + { + "required": true, + "title": "Dataset type", + "description": "The topic areas to which the dataset content relates.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "Health and disease", + "Treatments/Interventions", + "Measurements/Tests", + "Imaging types", + "Imaging area of the body", + "Omics", + "Socioeconomic", + "Lifestyle", + "Registry", + "Environment and energy", + "Information and communication", + "Politics" + ] + }, + "location": "provenance.origin.datasetType" + }, + { + "required": true, + "title": "Dataset sub-type", + "description": "The sub-type of the dataset content.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Mental health", + "Cardiovascular", + "Cancer", + "Rare diseases", + "Metabolic and Endocrine", + "Neurological", + "Reproductive", + "Maternity and neonatology", + "Respiratory", + "Immunity", + "Musculoskeletal", + "Vision", + "Renal and urogenital", + "Oral and Gastrointestinal", + "Cognitive Function", + "Hearing", + "Others", + "Vaccines", + "Preventive", + "Therapeutic", + "Laboratory", + "Other diagnostics", + "CT", + "MRI", + "PET", + "X-ray", + "Ultrasound", + "Pathology", + "Head", + "Chest", + "Arm", + "Abdomen", + "Leg", + "Proteomics", + "Transcriptomics", + "Epigenomics", + "Metabolomics", + "Multiomics", + "Metagenomics", + "Genomics", + "Education", + "Crime and Justice", + "Ethnicity", + "Housing ", + "Labour", + "Ageing ", + "Economics", + "Marital status", + "Social support", + "Deprivation", + "Religion", + "Occupation", + "Finances", + "Family circumstance", + "Smoking", + "Physical Activity", + "Dietary habits", + "Alcohol", + "Disease Registry (research)", + "National Disease Registries and Audits", + "Births and Deaths", + "Not applicable" + ] + }, + "location": "provenance.origin.datasetSubType" + }, + { + "required": false, + "title": "Source of data extraction", + "description": "Please indicate the source of the data extraction", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "EPR", + "Electronic survey", + "LIMS", + "Paper-based", + "Free text NLP", + "Machine generated", + "Other" + ] + }, + "location": "provenance.origin.source" + }, + { + "required": false, + "title": "Collection source setting", + "description": "Please indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Cohort, study, trial", + "Clinic", + "Primary care - Referrals", + "Primary care - Clinic", + "Primary care - Out of hours", + "Secondary care - Accident and Emergency", + "Secondary care - Outpatients", + "Secondary care - In-patients", + "Secondary care - Ambulance", + "Secondary care - ICU", + "Prescribing - Community pharmacy", + "Prescribing - Hospital", + "Patient report outcome", + "Wearables", + "Local authority", + "National government", + "Community", + "Services", + "Home", + "Private", + "Social care - Health care at home", + "Social care - Other social data", + "Census", + "Other", + null + ] + }, + "location": "provenance.origin.collectionSource" + }, + { + "required": false, + "title": "Image contrast", + "description": "Indicate whether usage of imaging contrast is captured within the dataset.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Yes", + "No", + "Not stated" + ] + }, + "location": "provenance.origin.imageContrast" + }, + { + "required": true, + "title": "Publishing frequency", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "Static", + "Irregular", + "Continuous", + "Biennial", + "Annual", + "Biannual", + "Quarterly", + "Bimonthly", + "Monthly", + "Biweekly", + "Weekly", + "Twice a week", + "Daily", + "Other", + null + ] + }, + "location": "provenance.temporal.publishingFrequency" + }, + { + "required": false, + "title": "Distribution release date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "date", + "location": "provenance.temporal.distributionReleaseDate" + }, + { + "required": true, + "title": "Start date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "date", + "location": "provenance.temporal.startDate" + }, + { + "required": false, + "title": "End date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.'", + "examples": null, + "is_list": false, + "is_optional": true, + "types": "date", + "location": "provenance.temporal.endDate" + }, + { + "required": true, + "title": "Time lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "Less than 1 week", + "1-2 weeks", + "2-4 weeks", + "1-2 months", + "2-6 months", + "6 months plus", + "Variable", + "Not applicable", + "Other" + ] + }, + "location": "provenance.temporal.timeLag" + }, + { + "required": false, + "title": "Data use limitation", + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "General research use", + "Genetic studies only", + "No general methods research", + "No restriction", + "Research-specific restrictions", + "Research use only", + "No linkage" + ] + }, + "location": "accessibility.usage.dataUseLimitation" + }, + { + "required": false, + "title": "Data use requirements", + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Collaboration required", + "Ethics approval required", + "Geographical restrictions", + "Institution-specific restrictions", + "Not for profit use", + "Project-specific restrictions", + "Publication moratorium", + "Publication required", + "Return to database or resource", + "Time limit on use", + "User-specific restriction" + ] + }, + "location": "accessibility.usage.dataUseRequirements" + }, + { + "required": false, + "title": "Citation requirements", + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.'", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "ShortDescription", + "maxLength": 1000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.usage.resourceCreator" + }, + { + "required": true, + "title": "Access rights", + "description": "Please provide details for the data access rights", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.accessRights" + }, + { + "required": false, + "title": "Access method category", + "description": "The method a Researcher will use to access the dataset, if approved.", + "examples": [ + "TRE/SDE" + ], + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "TRE/SDE", + "Direct access", + "Open access", + "Varies based on project" + ] + }, + "location": "accessibility.access.accessServiceCategory" + }, + { + "required": false, + "title": "Access mode", + "description": "Indication of the application type to enable research access.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Join research consortium", + "New project" + ] + }, + "location": "accessibility.access.accessMode" + }, + { + "required": false, + "title": "Access service description", + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.accessService" + }, + { + "required": false, + "title": "Access request cost", + "description": "Please provide link(s) to a webpage or description detailing the service or cost model for processing data access requests.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.accessRequestCost" + }, + { + "required": false, + "title": "Time to dataset access", + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Less than 1 week", + "1-2 weeks", + "2-4 weeks", + "1-2 months", + "2-6 months", + "More than 6 months", + "Variable", + "Not applicable", + "Other" + ] + }, + "location": "accessibility.access.deliveryLeadTime" + }, + { + "required": true, + "title": "Jurisdiction", + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.access.jurisdiction" + }, + { + "required": true, + "title": "Data Controller", + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.dataController" + }, + { + "required": false, + "title": "Data Processor", + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "LongDescription", + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + "location": "accessibility.access.dataProcessor" + }, + { + "required": true, + "title": "Controlled vocabulary", + "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.vocabularyEncodingScheme" + }, + { + "required": true, + "title": "Alignment with standardised data models", + "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", + "examples": [ + "LOCAL,NHS DATA DICTIONARY" + ], + "is_list": true, + "is_optional": false, + "types": { + "title": "StandardisedDataModels", + "$ref": "#/$defs/StandardisedDataModelsEnum" + }, + "location": "accessibility.formatAndStandards.conformsTo" + }, + { + "required": true, + "title": "Language", + "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.language" + }, + { + "required": true, + "title": "Format", + "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "accessibility.formatAndStandards.format" + }, + { + "required": false, + "title": "Derived from", + "description": "If applicable, please provide DOIs or links to datasets from which data in this dataset can be derived or calculated.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "enrichmentAndLinkage.derivedFrom" + }, + { + "required": false, + "title": "Is part of", + "description": "This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "enrichmentAndLinkage.isPartOf" + }, + { + "required": false, + "title": "Linked datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "enrichmentAndLinkage.linkableDatasets" + }, + { + "required": false, + "title": "Linked datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "CommaSeparatedValues", + "pattern": "([^,]+)", + "type": "string" + }, + "location": "enrichmentAndLinkage.similarToDatasets" + }, + { + "required": false, + "title": "Investigations", + "description": "Please provide link to any active projects that are using the dataset.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "enrichmentAndLinkage.investigations" + }, + { + "required": false, + "title": "Tools", + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "enrichmentAndLinkage.tools" + }, + { + "required": false, + "title": "Synthetic Data Web Links", + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Url", + "format": "uri", + "minLength": 1, + "type": "string" + }, + "location": "enrichmentAndLinkage.syntheticDataWebLink" + }, + { + "required": false, + "title": "Publication about the dataset", + "description": "DOIs for publications which describe the dataset.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Doi", + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + "location": "enrichmentAndLinkage.publicationAboutDataset" + }, + { + "required": false, + "title": "Publication using the dataset", + "description": "DOIs for publications which use the dataset for analysis.", + "examples": null, + "is_list": true, + "is_optional": true, + "types": { + "title": "Doi", + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + "location": "enrichmentAndLinkage.publicationUsingDataset" + }, + { + "required": true, + "title": "Dataset volume measure", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "Person", + "Event", + "Findings", + "Number of scans per modality" + ] + }, + "location": "observations.observedNode" + }, + { + "required": true, + "title": "Measured value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "observations.measuredValue" + }, + { + "required": false, + "title": "Disambiguating description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "title": "AbstractText", + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + "location": "observations.disambiguatingDescription" + }, + { + "required": true, + "title": "Observation date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "date", + "location": "observations.observationDate" + }, + { + "required": true, + "title": "Measured property", + "description": "Descibe the property used to measure each observation.", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "title": "MeasuredProperty" + }, + "location": "observations.measuredProperty" + } + ] +} \ No newline at end of file diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md new file mode 100644 index 0000000..4bbf4e9 --- /dev/null +++ b/docs/HDRUK/3.0.0.md @@ -0,0 +1,1074 @@ + +## identifier + +System dataset identifier + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset identifier | False | True | True | ["Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * 226fb3f1-4471-400a-8c39-2b66d46a39b6 + * https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6 + + +## version + +Dataset metadata version + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| Dataset Version | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | + +Examples: + + * 1.1.0 + + +## revisions + +A list of persistent identifiers and version numbers for previous versions of metadata for this dataset + + + + + + +### version + +Version number used for previous version of this dataset + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| revision version | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | + +Examples: + + * 6.0.0 + + +### url + +Some url with a reference to the record of a previous version of this dataset + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| revision url | False | True | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561 + + +## issued + +Datetime stamp of when this metadata version was initially issued + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-------------| +| Metadata Issued Datetime', | False | False | True | ['datetime'] | + + + + +## modified + +Datetime stamp of when this metadata was last modified + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:-------------| +| Last Modified Datetime | False | False | True | ['datetime'] | + + + + +## summary + +Summary of metadata describing key pieces of information. + + + + + + +### title + +Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Title | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | + +Examples: + + * North West London COVID-19 Patient Level Situation Report + + +### abstract + +Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------| +| Dataset abstract | False | False | True | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]"] | + +Examples: + + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. + + +### dataProvider + +This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank. + + + + + + +#### identifier + +Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| identifier | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### name + +The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Name of Data Provider | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | + + + + +#### logo + +Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Logo | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### description + +Please provide a URL that describes the organisation. + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Organisation Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### contactPoint + +Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------| +| contact point | False | False | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | + + + + +#### memberOf + +Please indicate if the organisation is an Alliance Member or a Hub.' + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------| +| Organisation Membership | False | True | False | ["MemberOf['HUB','ALLIANCE','OTHER','NCS']", 'null'] | + + + + +### populationSize + +Input the number of people captured within the dataset + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:--------| +| Dataset population size | False | False | True | ['int'] | + + + + +### keywords + +Please provide a list of relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + +Examples: + + * Preprints,Papers,HDR UK + + +### doiName + +DOI associated to this dataset + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| DOI for dataset | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * 10.1093/ije/dyx196 + + +### contactPoint + +Please provide a valid email address that can be used to coordinate data access requests. + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------| +| Contact point | False | False | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]"] | + +Examples: + + * SAILDatabank@swansea.ac.uk + + +### alternateIdentifiers + +Alternate dataset identifiers or local identifiers + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Alternate dataset identifiers | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + + + + +## documentation + +Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media. + + + + + + +### description + +A free-text description of the dataset. Gateway Feature: Keywords and text may be extracted out of the description and index for search + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------| +| Description | False | False | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### associatedMedia + +Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Associated media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + +Examples: + + * PDF Document that describes study protocol + + +### inPipeline + +Indicate whether this dataset is currently available for Researchers to request access. + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:--------------------------------------------------| +| Dataset pipeline status | False | True | False | ["Pipeline['Available','Not available']", 'null'] | + + + + +## coverage + +This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data. + + + + + + +### spatial + +The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Geographic coverage | False | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | + +Examples: + + * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html + + +### typicalAgeRangeMin + +Please indicate the minimum age in years of participants in the dataset as a whole number (integer). + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Minimum age range | False | True | False | ['int', 'null'] | + +Examples: + + * 18 + + +### typicalAgeRangeMax + +Please indicate the maximum age in years of participants in the dataset as a whole number (integer). + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Maximum age range | False | True | False | ['int', 'null'] | + +Examples: + + * 90 + + +### datasetCompleteness + +The URL where a Researcher can learn more about the completeness of the dataset. + +| title | is_list | is_optional | required | type | +|:--------------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Dataset coverage/completeness/quality | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### materialType + +The type of biospecimen saved from a biological entity. + +| title | is_list | is_optional | required | type | +|:-------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Biological sample availability | True | False | False | ["MaterialTypeCategoriesV2['None/not available','Bone marrow','Cancer cell lines','CDNA/MRNA','Core biopsy','DNA','Entire body organ','Faeces','Immortalized cell lines','Isolated pathogen','MicroRNA','Peripheral blood cells','Plasma','PM Tissue','Primary cells','RNA','Saliva','Serum','Swabs','Tissue','Urine','Whole blood','Availability to be confirmed','Other']"] | + + + + +### followup + +If known, what is the typical time span that a patient appears in the dataset (follow up period) + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Follow-up | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | + + + + +### pathway + +Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | is_list | is_optional | required | type | +|:----------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Patient pathway description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### gender + +Male, Female, Other + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------| +| Gender | True | True | False | ["GenderType['Male','Female','Other']"] | + + + + +## provenance + +Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness. + + + + + + +### origin + +Coverate by origin (geographical and situations) + + + + + + +#### purpose + +Please indicate the purpose(s) that the dataset was collected. + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------| +| Purpose of dataset collection | True | True | False | ["PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statuatory','Other',null]"] | + + + + +#### datasetType + +The topic areas to which the dataset content relates. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset type | False | False | True | ["DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']"] | + + + + +#### datasetSubType + +The sub-type of the dataset content. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset sub-type | False | True | True | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']", 'null'] | + + + + +#### source + +Please indicate the source of the data extraction + +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------| +| Source of data extraction | True | True | False | ["SourceV2['EPR','Electronic survey','LIMS','Paper-based','Free text NLP','Machine generated','Other']"] | + + + + +#### collectionSource + +Please indicate the setting(s) where data was collected. Multiple settings may be provided + +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Collection source setting | True | True | False | ["SettingV2['Cohort, study, trial','Clinic','Primary care - Referrals','Primary care - Clinic','Primary care - Out of hours','Secondary care - Accident and Emergency','Secondary care - Outpatients','Secondary care - In-patients','Secondary care - Ambulance','Secondary care - ICU','Prescribing - Community pharmacy','Prescribing - Hospital','Patient report outcome','Wearables','Local authority','National government','Community','Services','Home','Private','Social care - Health care at home','Social care - Other social data','Census','Other',null]"] | + + + + +#### imageContrast + +Indicate whether usage of imaging contrast is captured within the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:---------------------------------------------| +| Image contrast | False | True | False | ["Ternary['Yes','No','Not stated']", 'null'] | + + + + +### temporal + +Dates and other temporal coverage information + + + + + + +#### publishingFrequency + +Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ + +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Publishing frequency | False | False | True | ["PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]"] | + + + + +#### distributionReleaseDate + +Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. + +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:-----------------------------| +| Distribution release date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### startDate + +The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:---------------------| +| Start date | False | False | True | ['date', 'datetime'] | + + + + +#### endDate + +The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.' + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------| +| End date | False | True | False | ['date', 'datetime', "EndDateEnum['CONTINUOUS',null]", 'null'] | + + + + +#### timeLag + +Please indicate the typical time-lag between an event and the data for that event appearing in the dataset + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------| +| Time lag | False | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']"] | + + + + +## accessibility + +Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets. + + + + + + +### usage + +This section includes information about how the data can be used and how it is currently being used + + + + + + +#### dataUseLimitation + +Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data use limitation | True | True | False | ["DataUseLimitationV2['General research use','Genetic studies only','No general methods research','No restriction','Research-specific restrictions','Research use only','No linkage']"] | + + + + +#### dataUseRequirements + +Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data use requirements | True | True | False | ["DataUseRequirementsV2['Collaboration required','Ethics approval required','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','User-specific restriction']"] | + + + + +#### resourceCreator + +Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.' + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------| +| Citation requirements | False | True | False | ["ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + + + + +### access + +Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets. + + + + + + +#### accessRights + +Please provide details for the data access rights + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------| +| Access rights | False | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +#### accessServiceCategory + +The method a Researcher will use to access the dataset, if approved. + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------| +| Access method category | False | True | False | ["AccessService['TRE/SDE','Direct access','Open access','Varies based on project']", 'null'] | + +Examples: + + * TRE/SDE + + +#### accessMode + +Indication of the application type to enable research access. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------| +| Access mode | False | True | False | ["AccessMode['Join research consortium','New project']", 'null'] | + + + + +#### accessService + +Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access service description | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide + + +#### accessRequestCost + +Please provide link(s) to a webpage or description detailing the service or cost model for processing data access requests. + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access request cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### deliveryLeadTime + +Please provide an indication of the typical processing times based on the types of requests typically received. + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Time to dataset access | False | True | False | ["DeliveryLeadTimeV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']", 'null'] | + + + + +#### jurisdiction + +Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + + + + +#### dataController + +Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataProcessor + +A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### formatAndStandards + +Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. + + + + + + +#### vocabularyEncodingScheme + +List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Controlled vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + + + + +#### conformsTo + +List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. + +| title | is_list | is_optional | required | type | +|:----------------------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Alignment with standardised data models | True | False | True | ["StandardisedDataModels[{'$defs': {'StandardisedDataModelsEnum': {'enum': ['HL7 FHIR', 'HL7 V2', 'HL7 CDA', 'HL7 CCOW', 'LOINC', 'DICOM', 'I2B2', 'IHE', 'OMOP', 'OPENEHR', 'SENTINEL', 'PCORNET', 'CDISC', 'NHS DATA DICTIONARY', 'NHS SCOTLAND DATA DICTIONARY', 'NHS WALES DATA DICTIONARY', 'LOCAL', 'OTHER'], 'title': 'StandardisedDataModelsEnum', 'type': 'string'}}, 'anyOf': [{'$ref': '#/$defs/StandardisedDataModelsEnum'}, {'type': 'null'}], 'default': null}]"] | + +Examples: + + * LOCAL,NHS DATA DICTIONARY + + +#### language + +This should list all the languages in which the dataset metadata and underlying data is made available. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Language | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + + + + +#### format + +If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | + + + + +## enrichmentAndLinkage + +This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers. + + + + + + +### derivedFrom + +If applicable, please provide DOIs or links to datasets from which data in this dataset can be derived or calculated. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------| +| Derived from | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'List', 'null'] | + + + + +### isPartOf + +This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------| +| Is part of | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'List', 'null'] | + + + + +### linkableDatasets + +If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------| +| Linked datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'List', 'null'] | + + + + +### similarToDatasets + +If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------| +| Linked datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'List', 'null'] | + + + + +### investigations + +Please provide link to any active projects that are using the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| Investigations | True | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### tools + +Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/ + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| Tools | True | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### syntheticDataWebLink + +Links to locations of information and or raw downloads of synthetic data associated with this dataset + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| Synthetic Data Web Links | True | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### publicationAboutDataset + +DOIs for publications which describe the dataset. + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Publication about the dataset | True | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### publicationUsingDataset + +DOIs for publications which use the dataset for analysis. + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Publication using the dataset | True | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +## observations + +Multiple observations about the dataset may be provided and users are expected to provide at least one observation +(1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. +Example: + <b> Statistical Population 1 + </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 + <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: “2017”" + + + + + + + +### observedNode + +Please select one of the following statistical populations for you observation + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------| +| Dataset volume measure | False | False | True | ["StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']"] | + +Examples: + + * PERSONS + + +### measuredValue + +Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured value | False | False | True | ['int'] | + + + + +### disambiguatingDescription + +If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### observationDate + +Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation date | False | False | True | ['date', 'datetime'] | + + + + +### measuredProperty + +Descibe the property used to measure each observation. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured property | False | False | True | ['MeasuredProperty[{}]'] | + + + + +## structuralMetadata + +Descriptions of all tables and data elements that can be included in the dataset + + + + + + +### name + +The name of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:----------------| +| Table Name | False | True | True | ['str', 'null'] | + + + + +### description + +A description of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------| +| Table Description' | False | True | False | ['str', 'null'] | + + + + +### columns + +A list of columns contained within a table in a dataset. + + + + + + +#### name + +The name of a column in a table. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------| +| Column Name | False | False | True | ['Name[{}]'] | + + + + +#### dataType + +The name of a column in a table. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------| +| Column Name | False | False | True | ['str'] | + + + + +#### description + +A description of a column in a table. + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------| +| Column Description | False | True | False | ['str', 'null'] | + + + + +#### sensitive + +A True or False value, indicating if the field is sensitive or not + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:---------| +| Sensitive | False | False | True | ['bool'] | + + + + +#### values + +values in a dataset + + + + + + +##### name + +Unique value in a column . + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------| +| Value Name | False | False | True | ['Name[{}]'] | + + + + +##### description + +A description of a unique value in a column. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Value Description | False | True | False | ['str', 'null'] | + + + + +##### frequency + +The frequency of occurrance of a value in a column + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Value Frequency | False | True | False | ['int', 'null'] | + + + diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json new file mode 100644 index 0000000..4d52b91 --- /dev/null +++ b/docs/HDRUK/3.0.0.structure.json @@ -0,0 +1,1352 @@ +[ + { + "name": "identifier", + "required": true, + "title": "Dataset identifier", + "description": "System dataset identifier", + "examples": [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ], + "type": [ + "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "version", + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "revisions", + "required": true, + "title": "Metadata Version Revisions", + "description": "A list of persistent identifiers and version numbers for previous versions of metadata for this dataset", + "examples": null, + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "version", + "required": true, + "title": "revision version", + "description": "Version number used for previous version of this dataset", + "examples": [ + "6.0.0" + ], + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "url", + "required": true, + "title": "revision url", + "description": "Some url with a reference to the record of a previous version of this dataset", + "examples": [ + "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" + ], + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "issued", + "required": true, + "title": "Metadata Issued Datetime',", + "description": "Datetime stamp of when this metadata version was initially issued", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "modified", + "required": true, + "title": "Last Modified Datetime", + "description": "Datetime stamp of when this metadata was last modified", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "summary", + "required": true, + "title": "Summary", + "description": "Summary of metadata describing key pieces of information.", + "examples": null, + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "title", + "required": true, + "title": "Title", + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + "North West London COVID-19 Patient Level Situation Report" + ], + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "abstract", + "required": true, + "title": "Dataset abstract", + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "dataProvider", + "required": true, + "title": "Dataset provider", + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "examples": null, + "type": [ + "Organisation" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "identifier", + "required": false, + "title": "identifier", + "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "name", + "required": true, + "title": "Name of Data Provider", + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata.", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "logo", + "required": false, + "title": "Organisation Logo", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Organisation Description", + "description": "Please provide a URL that describes the organisation.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "contact point", + "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers.", + "examples": null, + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "List" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "memberOf", + "required": false, + "title": "Organisation Membership", + "description": "Please indicate if the organisation is an Alliance Member or a Hub.'", + "examples": null, + "type": [ + "MemberOf['HUB','ALLIANCE','OTHER','NCS']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "populationSize", + "required": true, + "title": "Dataset population size", + "description": "Input the number of people captured within the dataset", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "keywords", + "required": true, + "title": "Keywords", + "description": "Please provide a list of relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "examples": [ + "Preprints,Papers,HDR UK" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "doiName", + "required": false, + "title": "DOI for dataset", + "description": "DOI associated to this dataset", + "examples": [ + "10.1093/ije/dyx196" + ], + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "Contact point", + "description": "Please provide a valid email address that can be used to coordinate data access requests.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "alternateIdentifiers", + "required": false, + "title": "Alternate dataset identifiers", + "description": "Alternate dataset identifiers or local identifiers", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "documentation", + "required": false, + "title": "Documentation", + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "examples": null, + "type": [ + "Documentation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "description", + "required": false, + "title": "Description", + "description": "A free-text description of the dataset. Gateway Feature: Keywords and text may be extracted out of the description and index for search", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "associatedMedia", + "required": false, + "title": "Associated media", + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "inPipeline", + "required": false, + "title": "Dataset pipeline status", + "description": "Indicate whether this dataset is currently available for Researchers to request access.", + "examples": null, + "type": [ + "Pipeline['Available','Not available']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "coverage", + "required": false, + "title": "Coverage", + "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "examples": null, + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "spatial", + "required": false, + "title": "Geographic coverage", + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "typicalAgeRangeMin", + "required": false, + "title": "Minimum age range", + "description": "Please indicate the minimum age in years of participants in the dataset as a whole number (integer).", + "examples": [ + 18 + ], + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "typicalAgeRangeMax", + "required": false, + "title": "Maximum age range", + "description": "Please indicate the maximum age in years of participants in the dataset as a whole number (integer).", + "examples": [ + 90 + ], + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "datasetCompleteness", + "required": false, + "title": "Dataset coverage/completeness/quality", + "description": "The URL where a Researcher can learn more about the completeness of the dataset.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "materialType", + "required": false, + "title": "Biological sample availability", + "description": "The type of biospecimen saved from a biological entity.", + "examples": null, + "type": [ + "MaterialTypeCategoriesV2['None/not available','Bone marrow','Cancer cell lines','CDNA/MRNA','Core biopsy','DNA','Entire body organ','Faeces','Immortalized cell lines','Isolated pathogen','MicroRNA','Peripheral blood cells','Plasma','PM Tissue','Primary cells','RNA','Saliva','Serum','Swabs','Tissue','Urine','Whole blood','Availability to be confirmed','Other']" + ], + "is_list": true, + "is_optional": false + }, + { + "name": "followup", + "required": false, + "title": "Follow-up", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "pathway", + "required": false, + "title": "Patient pathway description", + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gender", + "required": false, + "title": "Gender", + "description": "Male, Female, Other", + "examples": null, + "type": [ + "GenderType['Male','Female','Other']" + ], + "is_list": true, + "is_optional": true + } + ] + }, + { + "name": "provenance", + "required": false, + "title": "Provenance", + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "examples": null, + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "origin", + "required": false, + "title": "Origin Coverage", + "description": "Coverate by origin (geographical and situations)", + "examples": null, + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "purpose", + "required": false, + "title": "Purpose of dataset collection", + "description": "Please indicate the purpose(s) that the dataset was collected.", + "examples": null, + "type": [ + "PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statuatory','Other',null]" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "datasetType", + "required": true, + "title": "Dataset type", + "description": "The topic areas to which the dataset content relates.", + "examples": null, + "type": [ + "DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "datasetSubType", + "required": true, + "title": "Dataset sub-type", + "description": "The sub-type of the dataset content.", + "examples": null, + "type": [ + "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "source", + "required": false, + "title": "Source of data extraction", + "description": "Please indicate the source of the data extraction", + "examples": null, + "type": [ + "SourceV2['EPR','Electronic survey','LIMS','Paper-based','Free text NLP','Machine generated','Other']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "collectionSource", + "required": false, + "title": "Collection source setting", + "description": "Please indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "type": [ + "SettingV2['Cohort, study, trial','Clinic','Primary care - Referrals','Primary care - Clinic','Primary care - Out of hours','Secondary care - Accident and Emergency','Secondary care - Outpatients','Secondary care - In-patients','Secondary care - Ambulance','Secondary care - ICU','Prescribing - Community pharmacy','Prescribing - Hospital','Patient report outcome','Wearables','Local authority','National government','Community','Services','Home','Private','Social care - Health care at home','Social care - Other social data','Census','Other',null]" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "imageContrast", + "required": false, + "title": "Image contrast", + "description": "Indicate whether usage of imaging contrast is captured within the dataset.", + "examples": null, + "type": [ + "Ternary['Yes','No','Not stated']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "temporal", + "required": false, + "title": "Temporal Coverage", + "description": "Dates and other temporal coverage information", + "examples": null, + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "publishingFrequency", + "required": true, + "title": "Publishing frequency", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "type": [ + "PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "distributionReleaseDate", + "required": false, + "title": "Distribution release date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "startDate", + "required": true, + "title": "Start date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "endDate", + "required": false, + "title": "End date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.'", + "examples": null, + "type": [ + "date", + "datetime", + "EndDateEnum['CONTINUOUS',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "timeLag", + "required": true, + "title": "Time lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "type": [ + "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']" + ], + "is_list": false, + "is_optional": false + } + ] + } + ] + }, + { + "name": "accessibility", + "required": true, + "title": "Accessibility", + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + "examples": null, + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "usage", + "required": false, + "title": "Usage", + "description": "This section includes information about how the data can be used and how it is currently being used", + "examples": null, + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "dataUseLimitation", + "required": false, + "title": "Data use limitation", + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "examples": null, + "type": [ + "DataUseLimitationV2['General research use','Genetic studies only','No general methods research','No restriction','Research-specific restrictions','Research use only','No linkage']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "dataUseRequirements", + "required": false, + "title": "Data use requirements", + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "examples": null, + "type": [ + "DataUseRequirementsV2['Collaboration required','Ethics approval required','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','User-specific restriction']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "resourceCreator", + "required": false, + "title": "Citation requirements", + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.'", + "examples": null, + "type": [ + "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "access", + "required": true, + "title": "Accessibility", + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + "examples": null, + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "accessRights", + "required": true, + "title": "Access rights", + "description": "Please provide details for the data access rights", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "accessServiceCategory", + "required": false, + "title": "Access method category", + "description": "The method a Researcher will use to access the dataset, if approved.", + "examples": [ + "TRE/SDE" + ], + "type": [ + "AccessService['TRE/SDE','Direct access','Open access','Varies based on project']", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "accessMode", + "required": false, + "title": "Access mode", + "description": "Indication of the application type to enable research access.", + "examples": null, + "type": [ + "AccessMode['Join research consortium','New project']", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "accessService", + "required": false, + "title": "Access service description", + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessRequestCost", + "required": false, + "title": "Access request cost", + "description": "Please provide link(s) to a webpage or description detailing the service or cost model for processing data access requests.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "deliveryLeadTime", + "required": false, + "title": "Time to dataset access", + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "examples": null, + "type": [ + "DeliveryLeadTimeV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "jurisdiction", + "required": true, + "title": "Jurisdiction", + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataController", + "required": true, + "title": "Data Controller", + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataProcessor", + "required": false, + "title": "Data Processor", + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "formatAndStandards", + "required": false, + "title": "Format and Standards", + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "examples": null, + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "vocabularyEncodingScheme", + "required": true, + "title": "Controlled vocabulary", + "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "conformsTo", + "required": true, + "title": "Alignment with standardised data models", + "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", + "examples": [ + "LOCAL,NHS DATA DICTIONARY" + ], + "type": [ + "StandardisedDataModels[{'$defs': {'StandardisedDataModelsEnum': {'enum': ['HL7 FHIR', 'HL7 V2', 'HL7 CDA', 'HL7 CCOW', 'LOINC', 'DICOM', 'I2B2', 'IHE', 'OMOP', 'OPENEHR', 'SENTINEL', 'PCORNET', 'CDISC', 'NHS DATA DICTIONARY', 'NHS SCOTLAND DATA DICTIONARY', 'NHS WALES DATA DICTIONARY', 'LOCAL', 'OTHER'], 'title': 'StandardisedDataModelsEnum', 'type': 'string'}}, 'anyOf': [{'$ref': '#/$defs/StandardisedDataModelsEnum'}, {'type': 'null'}], 'default': null}]" + ], + "is_list": true, + "is_optional": false, + "subItems": [] + }, + { + "name": "language", + "required": true, + "title": "Language", + "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "format", + "required": true, + "title": "Format", + "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + } + ] + }, + { + "name": "enrichmentAndLinkage", + "required": false, + "title": "Enrichment and Linkage", + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "examples": null, + "type": [ + "EnrichmentAndLinkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "derivedFrom", + "required": false, + "title": "Derived from", + "description": "If applicable, please provide DOIs or links to datasets from which data in this dataset can be derived or calculated.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isPartOf", + "required": false, + "title": "Is part of", + "description": "This relationship indicates that the dataset is a component or subset of a broader collection of related datasets. For example, clinical trial data for a specific drug may be part of a larger database of pharmaceutical research data.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "linkableDatasets", + "required": false, + "title": "Linked datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "similarToDatasets", + "required": false, + "title": "Linked datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "List", + "List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "investigations", + "required": false, + "title": "Investigations", + "description": "Please provide link to any active projects that are using the dataset.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + }, + { + "name": "tools", + "required": false, + "title": "Tools", + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + }, + { + "name": "syntheticDataWebLink", + "required": false, + "title": "Synthetic Data Web Links", + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + }, + { + "name": "publicationAboutDataset", + "required": false, + "title": "Publication about the dataset", + "description": "DOIs for publications which describe the dataset.", + "examples": null, + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + }, + { + "name": "publicationUsingDataset", + "required": false, + "title": "Publication using the dataset", + "description": "DOIs for publications which use the dataset for analysis.", + "examples": null, + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "observations", + "required": true, + "title": "Observations", + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation \n(1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. \nExample: \n <b> Statistical Population 1 \n </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 \n <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d\"\n", + "examples": null, + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "observedNode", + "required": true, + "title": "Dataset volume measure", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "type": [ + "StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredValue", + "required": true, + "title": "Measured value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "disambiguatingDescription", + "required": false, + "title": "Disambiguating description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "observationDate", + "required": true, + "title": "Observation date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredProperty", + "required": true, + "title": "Measured property", + "description": "Descibe the property used to measure each observation.", + "examples": null, + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "structuralMetadata", + "required": false, + "title": "Structural Metadata", + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "examples": null, + "type": [ + "DataTable" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Table Name", + "description": "The name of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "description", + "required": false, + "title": "Table Description'", + "description": "A description of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "columns", + "required": true, + "title": "Data Columns", + "description": "A list of columns contained within a table in a dataset.", + "examples": null, + "type": [ + "DataColumn" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Column Name", + "description": "The name of a column in a table.", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "dataType", + "required": true, + "title": "Column Name", + "description": "The name of a column in a table.", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "description", + "required": false, + "title": "Column Description", + "description": "A description of a column in a table.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "sensitive", + "required": true, + "title": "Sensitive", + "description": "A True or False value, indicating if the field is sensitive or not", + "examples": null, + "type": [ + "bool" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "values", + "required": false, + "title": "Values", + "description": "values in a dataset", + "examples": null, + "type": [ + "DataValue" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Value Name", + "description": "Unique value in a column .", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Value Description", + "description": "A description of a unique value in a column.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "frequency", + "required": false, + "title": "Value Frequency", + "description": "The frequency of occurrance of a value in a column", + "examples": null, + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + } + ] + } +] \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/v3_0_0/EnrichmentAndLinkage.py b/hdr_schemata/models/HDRUK/v3_0_0/EnrichmentAndLinkage.py index c412170..5e015c2 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/EnrichmentAndLinkage.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/EnrichmentAndLinkage.py @@ -13,37 +13,21 @@ class EnrichmentAndLinkage(BaseModel): class Config: extra = "forbid" - derivedFrom: Optional[ - Union[ - Optional[CommaSeparatedValues], - List[Optional[AbstractText]], - List[DatasetDescriptor] - ] - ] = Field(None, **an.derivedFrom.__dict__) - - isPartOf: Optional[ - Union[ - Optional[CommaSeparatedValues], - List[Union[Url, OneHundredFiftyCharacters]], - List[DatasetDescriptor] - ] - ] = Field(None, **an.isPartOf.__dict__) - - linkableDatasets: Optional[ - Union[ - Optional[CommaSeparatedValues], - List[Union[Optional[Url], OneHundredFiftyCharacters]], - List[DatasetDescriptor] - ] - ] = Field(None, **an.linkableDatasets.__dict__) - - similarToDatasets: Optional[ - Union[ - Optional[CommaSeparatedValues], - List[Union[Optional[Url], OneHundredFiftyCharacters]], - List[DatasetDescriptor] - ] - ] = Field(None, **an.linkableDatasets.__dict__) + derivedFrom: Optional[List[DatasetDescriptor]] = Field( + None, **an.derivedFrom.__dict__ + ) + + isPartOf: Optional[List[DatasetDescriptor]] = Field( + None, **an.isPartOf.__dict__ + ) + + linkableDatasets: Optional[List[DatasetDescriptor]] = Field( + None, **an.linkableDatasets.__dict__ + ) + + similarToDatasets: Optional[List[DatasetDescriptor]] = Field( + None, **an.similarToDatasets.__dict__ + ) investigations: Optional[List[Url]] = Field(None, **an.investigations.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 9e02a27..e740114 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -35,7 +35,7 @@ summary: name: description: The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. - title: Name of Data Provider + title: Name of data provider logo: description: Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index a0c9ebe..ba2cd68 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -219,22 +219,27 @@ def remove_types(data): from hdr_schemata.models.HDRUK import Hdruk212 from hdr_schemata.models.HDRUK import Hdruk213 -from hdr_schemata.models.HDRUK import Hdruk220 +from hdr_schemata.models.HDRUK import Hdruk220 from hdr_schemata.models.HDRUK import Hdruk221 +from hdr_schemata.models.HDRUK import Hdruk300 from hdr_schemata.models.GWDM.v1_1 import Gwdm10 from hdr_schemata.models.GWDM.v1_1 import Gwdm11 from hdr_schemata.models.GWDM.v1_2 import Gwdm12 +from hdr_schemata.models.GWDM.v2_0 import Gwdm20 - + create_markdown(Hdruk220, "./docs/HDRUK/", "2.2.0") create_markdown(Hdruk221, "./docs/HDRUK/", "2.2.1") create_markdown(Hdruk212, "./docs/HDRUK/", "2.1.2") create_markdown(Hdruk213, "./docs/HDRUK/", "2.1.3") +create_markdown(Hdruk300, "./docs/HDRUK/", "3.0.0") from hdr_schemata.models.GWDM.v1_1 import Gwdm10 from hdr_schemata.models.GWDM.v1_1 import Gwdm11 from hdr_schemata.models.GWDM.v1_2 import Gwdm12 +from hdr_schemata.models.GWDM.v2_0 import Gwdm20 create_markdown(Gwdm10, "./docs/GWDM/", "1.0") create_markdown(Gwdm11, "./docs/GWDM/", "1.1") create_markdown(Gwdm12, "./docs/GWDM/", "1.2") +create_markdown(Gwdm20, "./docs/GWDM/", "2.0")