From bf186e00db139db42ea116b26938e83f4240a775 Mon Sep 17 00:00:00 2001 From: Vincent Emonet Date: Thu, 21 Dec 2023 16:41:07 +0100 Subject: [PATCH] Replaced the trie-rs package by a new ptrie package (it was giving inconsistent results). Improve docs, add JS tests with jest --- .github/workflows/test.yml | 2 - js/jest.config.ts | 9 +++ js/package.json | 9 ++- js/src/api.rs | 153 +++++++++++++++++++++++++++++++++++-- js/tests/curies.test.ts | 119 +++++++++++++++++++++++++++++ lib/Cargo.toml | 2 +- lib/docs/SUMMARY.md | 2 +- lib/docs/contributing.md | 10 +-- lib/docs/use_javascript.md | 90 +++++++++++++++------- lib/docs/use_rust.md | 50 +++++++++++- lib/src/api.rs | 113 +++++++++++++++------------ lib/src/error.rs | 2 + lib/src/fetch.rs | 2 + lib/tests/curies_test.rs | 40 ++++++---- python/src/lib.rs | 1 - scripts/benchmark_rust.py | 3 + scripts/build-js.sh | 2 +- 17 files changed, 491 insertions(+), 118 deletions(-) create mode 100644 js/jest.config.ts create mode 100644 js/tests/curies.test.ts diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b41eed3..2be2603 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,8 +25,6 @@ jobs: CARGO_TERM_COLOR: always steps: - uses: actions/checkout@v4 - with: - submodules: true - run: cargo build --all-features - run: cargo test env: diff --git a/js/jest.config.ts b/js/jest.config.ts new file mode 100644 index 0000000..168bec3 --- /dev/null +++ b/js/jest.config.ts @@ -0,0 +1,9 @@ +import type {Config} from 'jest'; + +const config: Config = { + preset: 'ts-jest', + verbose: true, + // testEnvironment: 'node', +}; + +export default config; diff --git a/js/package.json b/js/package.json index d4b0eb4..5e916b4 100644 --- a/js/package.json +++ b/js/package.json @@ -21,7 +21,8 @@ ], "scripts": { "fmt": "prettier \"**/*.{ts,tsx,js,cjs,json,md,html}\" --ignore-path .gitignore --write", - "test": "wasm-pack build --debug --target nodejs && mocha", + "jest" : "jest", + "test": "npm run build && jest", "build": "rm -rf pkg pkg-web pkg-node && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web pkg-node", "start": "http-server ./", "release": "npm run build && npm publish ./pkg --access public", @@ -33,8 +34,12 @@ ] }, "devDependencies": { + "@jest/globals": "^29.7.0", "http-server": "^14.1.1", - "prettier": "^3.1.0" + "jest": "^29.7.0", + "prettier": "^3.1.0", + "ts-jest": "^29.1.1", + "ts-node": "^10.9.2" }, "prettier": { "trailingComma": "none", diff --git a/js/src/api.rs b/js/src/api.rs index 5150ce8..55c4f07 100644 --- a/js/src/api.rs +++ b/js/src/api.rs @@ -1,7 +1,12 @@ use std::collections::HashSet; -use curies::{sources::get_obo_converter, Converter, Record}; -use js_sys::Promise; +use curies::{ + sources::{ + get_bioregistry_converter, get_go_converter, get_monarch_converter, get_obo_converter, + }, + Converter, Record, +}; +use js_sys::{Array, Promise}; use serde::{Deserialize, Serialize}; use serde_wasm_bindgen::to_value; use wasm_bindgen::prelude::*; @@ -64,11 +69,53 @@ impl ConverterJs { }) } + #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromPrefixMap)] + pub fn from_prefix_map(prefix_map: String) -> Promise { + future_to_promise(async move { + match Converter::from_prefix_map(&*prefix_map).await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&e.to_string())), + } + }) + } + + #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromJsonld)] + pub fn from_jsonld(jsonld: String) -> Promise { + future_to_promise(async move { + match Converter::from_jsonld(&*jsonld).await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&e.to_string())), + } + }) + } + + #[wasm_bindgen(static_method_of = ConverterJs, js_name = fromExtendedPrefixMap)] + pub fn from_extended_prefix_map(prefix_map: String) -> Promise { + future_to_promise(async move { + match Converter::from_extended_prefix_map(&*prefix_map).await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&e.to_string())), + } + }) + } + #[wasm_bindgen(js_name = addRecord)] pub fn add_record(&mut self, record: RecordJs) -> Result<(), JsValue> { self.converter .add_record(record.record) - .map(|_| self.converter.build()) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + #[wasm_bindgen(js_name = addCurie)] + pub fn add_curie(&mut self, prefix: &str, uri_prefix: &str) -> Result<(), JsValue> { + self.converter + .add_curie(prefix, uri_prefix) + .map_err(|e| JsValue::from_str(&e.to_string())) + } + + pub fn chain(&self, converter: &ConverterJs) -> Result { + Converter::chain(vec![self.converter.clone(), converter.converter.clone()]) + .map(|converter| ConverterJs { converter }) .map_err(|e| JsValue::from_str(&e.to_string())) } @@ -84,15 +131,36 @@ impl ConverterJs { .map_err(|e| JsValue::from_str(&e.to_string())) } + #[wasm_bindgen(js_name = expandList)] + pub fn expand_list(&self, curies: JsValue) -> Result { + let curies_vec: Vec = serde_wasm_bindgen::from_value(curies) + .map_err(|e| JsValue::from_str(&format!("Error converting CURIEs list: {}", e)))?; + let js_array = self + .converter + .expand_list(curies_vec.iter().map(String::as_str).collect()) + .into_iter() + .map(JsValue::from) + .collect::(); + Ok(JsValue::from(js_array)) + } + + #[wasm_bindgen(js_name = compressList)] + pub fn compress_list(&self, curies: JsValue) -> Result { + let curies_vec: Vec = serde_wasm_bindgen::from_value(curies) + .map_err(|e| JsValue::from_str(&format!("Error converting URIs list: {}", e)))?; + let js_array = self + .converter + .compress_list(curies_vec.iter().map(String::as_str).collect()) + .into_iter() + .map(JsValue::from) + .collect::(); + Ok(JsValue::from(js_array)) + } + #[wasm_bindgen(js_name = toString)] pub fn to_string(&self) -> String { self.converter.to_string() } - - // #[wasm_bindgen(js_name = prefixMap)] - // pub fn prefix_map(&self) -> Result { - // serde_wasm_bindgen::to_value(&self.converter.prefix_map).map_err(|e| e.into()) - // } } /// Get OBO converter @@ -108,9 +176,78 @@ pub fn get_obo_converter_js() -> Promise { }) } +/// Get Bioregistry converter +#[wasm_bindgen(js_name = getBioregistryConverter)] +pub fn get_bioregistry_converter_js() -> Promise { + future_to_promise(async move { + match get_bioregistry_converter().await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&format!( + "Error getting Bioregistry converter: {e}" + ))), + } + }) +} + +/// Get GO converter +#[wasm_bindgen(js_name = getGoConverter)] +pub fn get_go_converter_js() -> Promise { + future_to_promise(async move { + match get_go_converter().await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&format!( + "Error getting GO converter: {e}" + ))), + } + }) +} + +/// Get Monarch converter +#[wasm_bindgen(js_name = getMonarchConverter)] +pub fn get_monarch_converter_js() -> Promise { + future_to_promise(async move { + match get_monarch_converter().await { + Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), + Err(e) => Err(JsValue::from_str(&format!( + "Error getting Monarch converter: {e}" + ))), + } + }) +} + // impl Into for RecordJs { // fn into(self) -> JsValue { // // JsValue::from_serde(&self).unwrap() // self.to_js() // } // } + +// NOTE: we cannot easily convert a JS object to a string in Rust, it needs to be done in JS with JSON.stringify() +// fn get_str_from_obj(obj: JsValue) -> Result { +// if obj.is_string() { +// obj.as_string().ok_or_else(|| JsValue::from_str("String conversion failed")) +// } else if obj.is_object() { +// let str: String = serde_wasm_bindgen::from_value(obj) +// .map_err(|e| JsValue::from_str(&format!("Failed to serialize JSON: {}", e)))?; +// Ok(str) +// } else { +// return Err(JsValue::from_str("Expected a string or a JSON object")); +// } +// } + +// #[wasm_bindgen(static_method_of = ConverterJs)] +// pub fn chain(converters: &JsValue) -> Promise { +// future_to_promise(async move { +// let converters_vec: Vec = serde_wasm_bindgen::from_value(converters).map_err(|e| { +// JsValue::from_str(&format!("Error converting converters list: {}", e)) +// })?; +// let rust_converters: Vec = converters_vec +// .into_iter() +// .map(|converter_js| converter_js.converter) +// .collect(); +// match Converter::chain(rust_converters) { +// Ok(converter) => Ok(JsValue::from(ConverterJs { converter })), +// Err(e) => Err(JsValue::from_str(&e.to_string())), +// } +// }) +// } diff --git a/js/tests/curies.test.ts b/js/tests/curies.test.ts new file mode 100644 index 0000000..d1b2884 --- /dev/null +++ b/js/tests/curies.test.ts @@ -0,0 +1,119 @@ +import {describe, expect, test} from '@jest/globals'; +import {Record, Converter, getOboConverter, getBioregistryConverter} from "../pkg/node"; + +describe('Tests for the curies npm package', () => { + // NOTE: `await init()` only needed in browser environment + + test('from empty converter', async () => { + const converter = new Converter(); + const record1 = new Record("DOID", "http://purl.obolibrary.org/obo/DOID_", [], []) + converter.addRecord(record1); + converter.addCurie("OBO", "http://purl.obolibrary.org/obo/"); + expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("DOID:1234"); + expect(converter.expand("OBO:1234")).toBe("http://purl.obolibrary.org/obo/1234"); + expect(converter.expandList(["OBO:1234", "DOID:1234", "Wrong:1"])).toEqual([ + "http://purl.obolibrary.org/obo/1234", + "http://purl.obolibrary.org/obo/DOID_1234", + undefined + ]); + expect(converter.compress("http://purl.obolibrary.org/obo/1234")).toBe("OBO:1234"); + expect(converter.compressList([ + "http://purl.obolibrary.org/obo/1234", + "http://purl.obolibrary.org/obo/DOID_1234", + "http://identifiers.org/DOID:1234" + ])).toEqual(["OBO:1234", "DOID:1234", undefined]); + }); + + test('from prefix map', async () => { + const converter = await Converter.fromPrefixMap(`{ + "GO": "http://purl.obolibrary.org/obo/GO_", + "DOID": "http://purl.obolibrary.org/obo/DOID_", + "OBO": "http://purl.obolibrary.org/obo/" + }`); + expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("DOID:1234"); + expect(converter.expand("DOID:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); + expect(converter.expandList(["OBO:1234", "DOID:1234", "Wrong:1"])).toEqual([ + "http://purl.obolibrary.org/obo/1234", + "http://purl.obolibrary.org/obo/DOID_1234", + undefined + ]); + expect(converter.compressList([ + "http://purl.obolibrary.org/obo/1234", + "http://purl.obolibrary.org/obo/DOID_1234", + "http://identifiers.org/DOID:1234" + ])).toEqual(["OBO:1234", "DOID:1234", undefined]); + }); + + test('from JSON-LD', async () => { + const converter = await Converter.fromJsonld(`{ + "@context": { + "GO": "http://purl.obolibrary.org/obo/GO_", + "DOID": "http://purl.obolibrary.org/obo/DOID_", + "OBO": "http://purl.obolibrary.org/obo/" + } + }`); + expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("DOID:1234"); + expect(converter.expand("DOID:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); + }); + + test('from extended prefix map', async () => { + const converter = await Converter.fromExtendedPrefixMap(`[ + { + "prefix": "DOID", + "prefix_synonyms": [ + "doid" + ], + "uri_prefix": "http://purl.obolibrary.org/obo/DOID_", + "uri_prefix_synonyms": [ + "http://bioregistry.io/DOID:" + ], + "pattern": "^\\\\d+$" + }, + { + "prefix": "GO", + "prefix_synonyms": [ + "go" + ], + "uri_prefix": "http://purl.obolibrary.org/obo/GO_", + "pattern": "^\\\\d{7}$" + }, + { + "prefix": "OBO", + "prefix_synonyms": [ + "obo" + ], + "uri_prefix": "http://purl.obolibrary.org/obo/" + } + ]`); + expect(converter.compress("http://bioregistry.io/DOID:1234")).toBe("DOID:1234"); + expect(converter.expand("doid:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); + }); + + test('get OBO converter', async () => { + const converter = await getOboConverter(); + expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("DOID:1234"); + expect(converter.expand("DOID:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); + }); + + test('get Bioregistry converter', async () => { + const converter = await getBioregistryConverter(); + expect(converter.compress("http://purl.obolibrary.org/obo/DOID_1234")).toBe("doid:1234"); + expect(converter.expand("doid:1234")).toBe("http://purl.obolibrary.org/obo/DOID_1234"); + }); + + test('chain converters', async () => { + const customConverter1 = await Converter.fromPrefixMap(`{ + "DOID": "http://purl.obolibrary.org/obo/SPECIAL_DOID_" + }`); + const customConverter2 = await Converter.fromPrefixMap(`{ + "GO": "http://purl.obolibrary.org/obo/SPECIAL_GO_", + "DOID": "http://purl.obolibrary.org/obo/DOID_" + }`); + const bioregistryConverter = await getBioregistryConverter(); + const converter = bioregistryConverter + .chain(customConverter1) + .chain(customConverter2) + expect(converter.compress("http://purl.obolibrary.org/obo/SPECIAL_DOID_1234")).toBe("DOID:1234"); + }); + +}); diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 69b0ce1..9c28256 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -12,7 +12,7 @@ categories.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -trie-rs = "0.1" +ptrie = "0.5" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" reqwest = { version = "0.11", features = ["blocking", "json"] } diff --git a/lib/docs/SUMMARY.md b/lib/docs/SUMMARY.md index b44c7da..8c9ec39 100644 --- a/lib/docs/SUMMARY.md +++ b/lib/docs/SUMMARY.md @@ -2,6 +2,6 @@ - [Introduction](introduction.md) - [Use Rust crate](use_rust.md) -- [Use Python package](use_python.md) - [Use NPM package](use_javascript.md) +- [Use Python package](use_python.md) - [Contributing](contributing.md) diff --git a/lib/docs/contributing.md b/lib/docs/contributing.md index 0bb4caa..cf14254 100644 --- a/lib/docs/contributing.md +++ b/lib/docs/contributing.md @@ -64,7 +64,7 @@ cd curies.rs git checkout -b add-my-contribution ``` -### ๐Ÿงช Run tests +### ๐Ÿงช Run tests Run tests for all packages: @@ -84,7 +84,7 @@ Run a specific test: cargo test new_empty_converter -- --nocapture ``` -If tests panic without telling on which test it failed: +If tests panic without telling on which test it failed, use: ```bash cargo test -- --test-threads=1 @@ -93,7 +93,7 @@ cargo test -- --test-threads=1 Test the `curies` crate with code coverage: ```bash -cargo tarpaulin -p curies --out html +./scripts/cov.sh ``` ### ๐Ÿ Run python @@ -113,7 +113,7 @@ python -m pytest python/tests/ ### ๐ŸŸจ Run JavaScript -Build the npm package: +Build the npm package, and run the TypeScript tests in a NodeJS environment: ```bash ./scripts/build-js.py @@ -125,7 +125,7 @@ Start a web server: python -m http.server 3000 --directory ./js ``` -Open [localhost:3000](http://localhost:3000) in your web browser. +Open [localhost:3000](http://localhost:3000) in your web browser to check the browser version. ### โœจ Format diff --git a/lib/docs/use_javascript.md b/lib/docs/use_javascript.md index 7ef5e50..2a96bea 100644 --- a/lib/docs/use_javascript.md +++ b/lib/docs/use_javascript.md @@ -12,7 +12,56 @@ Install the `npm` package (use `yarn` or `pnpm` if you prefer) to use it from yo npm install @biopragmatics/curies ``` -## ๐Ÿš€ Use it in bare HTML files +## ๐ŸŸข Use it in a NodeJS environment + +There are multiple methods available for creating or importing converters: + +```ts +import {Record, Converter, getOboConverter, getBioregistryConverter} from "@biopragmatics/curies"; + +async function main() { + // Populate from Records + const rec1 = new Record("obo", "http://purl.obolibrary.org/obo/", [], []); + console.log(rec1.toString()); + console.log(rec1.toJs()); + const converter = new Converter(); + converter.addRecord(rec1); + + // Load from a prefix map json (string or URI) + const converterFromMap = await Converter.fromPrefixMap(`{ + "doid": "http://purl.obolibrary.org/obo/MY_DOID_" + }`); + + // Load from an extended prefix map (string or URI) + const converterFromUrl = await Converter.fromExtendedPrefixMap("https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/bioregistry.epm.json") + + // Load from a JSON-LD context (string or URI) + const converterFromJsonld = await Converter.fromJsond("http://purl.obolibrary.org/meta/obo_context.jsonld"); + + // Load from one of the predefined source + const converterFromSource = await getBioregistryConverter(); + + // Chain multiple converters in one + const converter = converterFromMap + .chain(converterFromUrl) + .chain(converterFromSource) + + // Expand CURIE and compress URI + const curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234"); + const uri = converter.expand("doid:1234"); + + // Expand and compress list of CURIEs and URIs + const curies = converter.compressList(["http://purl.obolibrary.org/obo/DOID_1234"]); + const uris = converter.expandList(["doid:1234"]); +} +main(); +``` + +## ๐ŸฆŠ Use it in a browser + +When using in a client browser you will need to initialize the wasm binary with `await init()`, after that you can use the same functions as in the NodeJS environments. + +### ๐Ÿš€ In bare HTML files You can easily import the NPM package from a CDN, and work with `curies` from a simple `index.html` file: @@ -33,21 +82,12 @@ You can easily import the NPM package from a CDN, and work with `curies` from a async function main() { await init(); - const rec1 = new Record("obo", "http://purl.obolibrary.org/obo/", [], []); - console.log(rec1.toString()); - console.log(rec1.toJs()); - - // Populate the Converter with records, or import existing converters: - // const converter = new Converter(); - // converter.addRecord(rec1); - const converter = await getOboConverter(); - console.log(converter.toString()) - const compressedUri = converter.compress("http://purl.obolibrary.org/obo/DOID_1234"); - const expandedUri = converter.expand("DOID:1234"); - document.getElementById("compressed").innerText = compressedUri; - document.getElementById("expanded").innerText = expandedUri; + const curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234"); + const uri = converter.expand("DOID:1234"); + document.getElementById("compressed").innerText = curie; + document.getElementById("expanded").innerText = uri; } main(); @@ -63,7 +103,7 @@ npx http-server python -m http.server ``` -## โš›๏ธ Use from any JavaScript framework +### โš›๏ธ From any JavaScript framework It can be used from any JavaScript framework, or NodeJS. @@ -81,12 +121,12 @@ For example, to use it in a nextjs react app: npm install --save @biopragmatics/curies ``` -3. Add code, e.g. in `src/app/page.tsx`: +3. Add code, e.g. in `src/app/page.tsx` running on the client: ```typescript 'use client' import { useEffect, useState } from 'react'; - import init, { Converter, Record } from "@biopragmatics/curies"; + import init, { getBioregistryConverter } from "@biopragmatics/curies"; export default function Home() { const [output, setOutput] = useState(''); @@ -94,18 +134,10 @@ For example, to use it in a nextjs react app: // Initialize the wasm library and use it init().then(async () => { - const rec1 = new Record("obo", "http://purl.obolibrary.org/obo/", [], []); - console.log(rec1.toString()); - console.log(rec1.toJs()); - - // Populate the Converter with records, or import existing converters: - const converter = new Converter(); - converter.addRecord(rec1); - console.log(converter.toString()) - - const compressedUri = converter.compress("http://purl.obolibrary.org/obo/DOID_1234"); - const expandedUri = converter.expand("DOID:1234"); - setOutput(compressedUri); + const converter = await getBioregistryConverter(); + const curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234"); + const uri = converter.expand("doid:1234"); + setOutput(`${curie}: ${uri}`); }); }, []); diff --git a/lib/docs/use_rust.md b/lib/docs/use_rust.md index 1a6bfe9..49e8a31 100644 --- a/lib/docs/use_rust.md +++ b/lib/docs/use_rust.md @@ -2,13 +2,56 @@ [![crates.io](https://img.shields.io/crates/v/curies.svg)](https://crates.io/crates/curies) -You can use the Rust crate to work with CURIEs: +## ๐Ÿ› ๏ธ General usage + +You can use the Rust crate to work with CURIEs: import converters, compress URIs, expand CURIEs. + +```rust +use curies::{Converter, Record, sources::get_bioregistry_converter}; +use std::collections::HashSet; + +async fn usage_example() -> Result<(), Box> { + + // Load from a prefix map json (string or URI) + let converterFromMap = Converter::from_prefix_map(r#"{ + "doid": "http://purl.obolibrary.org/obo/MY_DOID_" +}"#).await?; + + // Load from an extended prefix map (string or URI) + let converterFromUrl = Converter::from_extended_prefix_map("https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/bioregistry.epm.json").await?; + + // Load from a JSON-LD context (string or URI) + let converterFromJsonld = Converter::from_jsonld("http://purl.obolibrary.org/meta/obo_context.jsonld").await?; + + // Load from one of the predefined source + let converterFromSource = get_bioregistry_converter().await?; + + // Chain multiple converters in one + let converter = Converter::chain(vec![converterFromMap, converterFromUrl, converterFromSource])?; + + let uri = converter.expand("doid:1234")?; + println!("Expanded CURIE: {}", uri); + + let curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234")?; + println!("Compressed URI: {}", curie); + Ok(()) +} + +let rt = tokio::runtime::Runtime::new().unwrap(); +rt.block_on(async { + usage_example().await +}).unwrap(); +``` + +## ๐Ÿ—๏ธ Build a converter + +You can also build a `Converter` from scratch: ```rust use curies::{Converter, Record}; use std::collections::HashSet; -fn example() -> Result<(), Box> { +fn build_example() -> Result<(), Box> { let mut converter = Converter::default(); let record1 = Record { @@ -21,7 +64,6 @@ fn example() -> Result<(), Box> { let record2 = Record::new("obo", "http://purl.obolibrary.org/obo/"); converter.add_record(record1)?; converter.add_record(record2)?; - converter.build(); let uri = converter.expand("doid:1234")?; println!("Expanded CURIE: {}", uri); @@ -30,7 +72,7 @@ fn example() -> Result<(), Box> { println!("Compressed URI: {}", curie); Ok(()) } -example().unwrap(); +build_example().unwrap(); ``` ## ๐Ÿ“– API reference diff --git a/lib/src/api.rs b/lib/src/api.rs index e21a335..7b61427 100644 --- a/lib/src/api.rs +++ b/lib/src/api.rs @@ -1,12 +1,15 @@ +//! API for `Converter` and `Record` + use crate::error::CuriesError; use crate::fetch::{ExtendedPrefixMapSource, PrefixMapSource}; +use ptrie::Trie; use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::{HashMap, HashSet}; use std::fmt; use std::sync::Arc; -use trie_rs::{Trie, TrieBuilder}; +// use trie_rs::{Trie, TrieBuilder}; /// A CURIE `Record`, containing its prefixes and URI prefixes, /// used by `Converters` to resolve CURIEs and URIs. @@ -72,7 +75,6 @@ impl fmt::Display for Record { /// pattern: None, /// }; /// converter.add_record(record1)?; -/// converter.build(); /// /// let uri = converter.expand("doid:1234")?; /// assert_eq!(uri, "http://purl.obolibrary.org/obo/DOID_1234"); @@ -83,11 +85,12 @@ impl fmt::Display for Record { /// } /// use_converter().unwrap(); /// ``` +#[derive(Debug, Clone)] pub struct Converter { records: Vec>, prefix_map: HashMap>, uri_map: HashMap>, - trie: Trie, + trie: Trie>, delimiter: String, } @@ -107,7 +110,8 @@ impl Converter { records: Vec::new(), prefix_map: HashMap::new(), uri_map: HashMap::new(), - trie: TrieBuilder::new().build(), + // trie: TrieBuilder::new().build(), + trie: Trie::>::new(), delimiter: delimiter.to_string(), } } @@ -139,7 +143,6 @@ impl Converter { converter.add_record(Record::new(&prefix, &uri_prefix_str))?; } } - converter.build(); Ok(converter) } @@ -173,7 +176,6 @@ impl Converter { _ => continue, } } - converter.build(); Ok(converter) } @@ -191,14 +193,13 @@ impl Converter { /// let converter = Converter::from_extended_prefix_map("https://raw.github.com/biopragmatics/bioregistry/main/exports/contexts/bioregistry.epm.json"); /// ``` pub async fn from_extended_prefix_map( - data: T, + prefix_map: T, ) -> Result { - let records = data.fetch().await?; + let records = prefix_map.fetch().await?; let mut converter = Converter::default(); for record in records { converter.add_record(record)?; } - converter.build(); Ok(converter) } @@ -227,12 +228,15 @@ impl Converter { self.records.push(rec.clone()); self.prefix_map.insert(rec.prefix.clone(), rec.clone()); - self.uri_map.insert(rec.uri_prefix.clone(), rec.clone()); for prefix in &rec.prefix_synonyms { self.prefix_map.insert(prefix.clone(), rec.clone()); } + self.uri_map.insert(rec.uri_prefix.clone(), rec.clone()); + self.trie + .insert(rec.uri_prefix.clone().chars(), rec.clone()); for uri_prefix in &rec.uri_prefix_synonyms { self.uri_map.insert(uri_prefix.clone(), rec.clone()); + self.trie.insert(uri_prefix.chars(), rec.clone()); } Ok(()) } @@ -242,18 +246,6 @@ impl Converter { self.add_record(Record::new(prefix, uri_prefix)) } - /// Build trie search once all `Records` have been added - pub fn build(&mut self) { - let mut trie_builder = TrieBuilder::new(); - for record in &self.records { - trie_builder.push(&record.uri_prefix); - for uri_prefix in &record.uri_prefix_synonyms { - trie_builder.push(uri_prefix); - } - } - self.trie = trie_builder.build(); - } - /// Chain multiple `Converters` into a single `Converter`. The first `Converter` in the list is used as the base. /// If the same prefix is found in multiple converters, the first occurrence is kept, /// but the `uri_prefix` and synonyms are added as synonyms if they are different. @@ -307,8 +299,7 @@ impl Converter { updated_record .prefix_synonyms .extend(record.prefix_synonyms.clone()); - base_converter.delete_record(&updated_record.prefix)?; - base_converter.add_record(updated_record)?; + base_converter.update_record(updated_record)?; } } else { // If the prefix does not exist, add the record @@ -316,36 +307,53 @@ impl Converter { } } } - base_converter.build(); Ok(base_converter) } - /// Delete a `Record` from the `Converter` based on its prefix. + /// Update a `Record` in the `Converter`. /// /// ``` /// use curies::{Converter, Record}; /// /// let mut converter = Converter::default(); - /// assert!(converter.delete_record("DOID").is_err()); + /// let record = Record::new("doid", "http://purl.obolibrary.org/obo/DOID_"); + /// converter.add_record(record.clone()).unwrap(); + /// assert!(converter.update_record(record).is_ok()); /// ``` - pub fn delete_record(&mut self, prefix: &str) -> Result<(), CuriesError> { - // Check if the record exists - let record = match self.prefix_map.get(prefix) { - Some(record) => Arc::clone(record), - None => return Err(CuriesError::NotFound(prefix.to_string())), - }; - // Remove the record from the records vector, prefix map, and uri map - self.records.retain(|r| r.prefix != prefix); - self.prefix_map.remove(&record.prefix); - self.uri_map.remove(&record.uri_prefix); - // Also remove any synonyms from the maps - for p_synonym in &record.prefix_synonyms { - self.prefix_map.remove(p_synonym); + pub fn update_record(&mut self, record: Record) -> Result<(), CuriesError> { + let rec = Arc::new(record); + // Update the record in the records vector + if let Some(pos) = self.records.iter().position(|r| r.prefix == rec.prefix) { + self.records[pos] = rec.clone(); + } else { + return Err(CuriesError::NotFound(rec.prefix.clone())); } - for u_synonym in &record.uri_prefix_synonyms { - self.uri_map.remove(u_synonym); + // Update the maps and trie + self.prefix_map.insert(rec.prefix.clone(), rec.clone()); + self.uri_map.insert(rec.uri_prefix.clone(), rec.clone()); + for prefix in &rec.prefix_synonyms { + self.prefix_map.insert(prefix.clone(), rec.clone()); + } + for uri_prefix in &rec.uri_prefix_synonyms { + self.uri_map.insert(uri_prefix.clone(), rec.clone()); + } + if self + .trie + .set_value(rec.uri_prefix.chars(), rec.clone()) + .is_err() + { + self.trie.insert(rec.uri_prefix.chars(), rec.clone()); + } + + for uri_prefix in &rec.uri_prefix_synonyms { + if self + .trie + .set_value(uri_prefix.chars(), rec.clone()) + .is_err() + { + self.trie.insert(uri_prefix.chars(), rec.clone()); + } } - self.build(); Ok(()) } @@ -366,13 +374,18 @@ impl Converter { } /// Find corresponding CURIE `Record` given a complete URI - pub fn find_by_uri(&self, uri: &str) -> Result<&Arc, CuriesError> { - let matching_uris = self.trie.common_prefix_search(uri); - let utf8_uri = match matching_uris.last() { - Some(u) => Ok(u), + pub fn find_by_uri(&self, uri: &str) -> Result, CuriesError> { + // let matching_uris = self.trie.common_prefix_search(uri); + // println!("{:?}", matching_uris); + // let utf8_uri = match matching_uris.last() { + // Some(u) => Ok(u), + // None => Err(CuriesError::NotFound(uri.to_string())), + // }; + // self.find_by_uri_prefix(std::str::from_utf8(utf8_uri?)?) + match self.trie.find_longest_prefix(uri.chars()) { + Some(rec) => Ok(rec), None => Err(CuriesError::NotFound(uri.to_string())), - }; - self.find_by_uri_prefix(std::str::from_utf8(utf8_uri?)?) + } } /// Validate an id against a `Record` regex pattern if it exists @@ -403,7 +416,7 @@ impl Converter { .find_map(|synonym| uri.strip_prefix(synonym)) }) .ok_or_else(|| CuriesError::NotFound(uri.to_string()))?; - self.validate_id(id, record)?; + self.validate_id(id, &record)?; Ok(format!("{}{}{}", &record.prefix, self.delimiter, id)) } diff --git a/lib/src/error.rs b/lib/src/error.rs index fcd7418..4c2463c 100644 --- a/lib/src/error.rs +++ b/lib/src/error.rs @@ -1,3 +1,5 @@ +//! Errors thrown by the library + use std::error::Error; use std::fmt; use std::str::Utf8Error; diff --git a/lib/src/fetch.rs b/lib/src/fetch.rs index 9ab0867..eb03e63 100644 --- a/lib/src/fetch.rs +++ b/lib/src/fetch.rs @@ -1,3 +1,5 @@ +//! Traits and functions for fetching data from HTTP or file system + use async_trait::async_trait; use serde_json::Value; use std::collections::HashMap; diff --git a/lib/tests/curies_test.rs b/lib/tests/curies_test.rs index a170860..37b6511 100644 --- a/lib/tests/curies_test.rs +++ b/lib/tests/curies_test.rs @@ -1,7 +1,4 @@ -use curies::{ - sources::{get_bioregistry_converter, get_go_converter}, - Converter, Record, -}; +use curies::{sources::get_bioregistry_converter, Converter, Record}; use serde_json::Value; use std::{ collections::{HashMap, HashSet}, @@ -23,14 +20,21 @@ fn new_empty_converter() -> Result<(), Box> { uri_prefix: "http://purl.obolibrary.org/obo/".to_string(), prefix_synonyms: HashSet::from(["OBO".to_string()]), uri_prefix_synonyms: HashSet::from(["https://identifiers.org/obo/"].map(String::from)), + pattern: None, + }; + let record3 = Record { + prefix: "wrongpattern".to_string(), + uri_prefix: "http://purl.obolibrary.org/wrongpattern/".to_string(), + prefix_synonyms: HashSet::new(), + uri_prefix_synonyms: HashSet::new(), pattern: Some("\\".to_string()), // Wrong pattern for test }; assert!(format!("{}", record1).starts_with("Prefix: doid")); assert!(format!("{}", converter).starts_with("Converter contains")); converter.add_record(record1.clone())?; converter.add_record(record2)?; - converter.build(); - assert_eq!(converter.len(), 2); + converter.add_record(record3)?; + assert_eq!(converter.len(), 3); assert!(!converter.is_empty()); // Find Record by prefix or URI @@ -65,6 +69,11 @@ fn new_empty_converter() -> Result<(), Box> { converter.compress("https://identifiers.org/DOID/1234")?, "doid:1234" ); + assert_eq!( + converter.compress("http://purl.obolibrary.org/obo/1234")?, + "obo:1234" + ); + assert_eq!( converter .compress_list(["http://wrong/1234", "https://identifiers.org/DOID/1234"].to_vec()), @@ -96,23 +105,23 @@ fn new_empty_converter() -> Result<(), Box> { .map_err(|e| assert!(e.to_string().starts_with("Invalid CURIE"))) .is_err()); assert!(converter.find_by_uri_prefix("wrong").is_err()); - assert!(converter.expand("obo:1234").is_err()); - let record3 = Record { + assert!(converter.expand("wrongpattern:1234").is_err()); + let record4 = Record { prefix: "wrong".to_string(), uri_prefix: "http://wrong.org/".to_string(), prefix_synonyms: HashSet::new(), uri_prefix_synonyms: HashSet::from(["https://identifiers.org/obo/"].map(String::from)), pattern: None, }; - assert!(converter.add_record(record3).is_err()); - let record4 = Record { + assert!(converter.add_record(record4).is_err()); + let record5 = Record { prefix: "wrong".to_string(), uri_prefix: "http://wrong.org/".to_string(), prefix_synonyms: HashSet::from(["OBO".to_string()]), uri_prefix_synonyms: HashSet::new(), pattern: None, }; - assert!(converter.add_record(record4).is_err()); + assert!(converter.add_record(record5).is_err()); Ok(()) } @@ -136,6 +145,10 @@ async fn from_prefix_map_converter() -> Result<(), Box> { converter.compress("http://purl.obolibrary.org/obo/DOID_1234")?, "DOID:1234" ); + assert_eq!( + converter.compress("http://purl.obolibrary.org/obo/1234")?, + "OBO:1234" + ); assert!(Converter::from_jsonld(prefix_map).await.is_err()); let prefix_map: HashMap = HashMap::new(); assert!(Converter::from_jsonld(prefix_map).await.is_err()); @@ -214,10 +227,9 @@ async fn from_converter_errors() -> Result<(), Box> { #[tokio::test] async fn chain_converters() -> Result<(), Box> { - let mut converter = Converter::chain(vec![ + let converter = Converter::chain(vec![ get_bioregistry_converter().await?, Converter::from_extended_prefix_map(Path::new("tests/resources/extended_map.json")).await?, - // get_go_converter().await?, ])?; assert_eq!( converter.compress("http://purl.obolibrary.org/obo/SPECIAL_DOID_1234")?, @@ -228,6 +240,6 @@ async fn chain_converters() -> Result<(), Box> { "http://purl.obolibrary.org/obo/GO_1234567" ); assert!(Converter::chain(vec![]).is_err()); - assert!(converter.delete_record("Wrong").is_err()); + // assert!(converter.delete_record("Wrong").is_err()); Ok(()) } diff --git a/python/src/lib.rs b/python/src/lib.rs index 8c9e8d4..dfa575b 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -90,7 +90,6 @@ impl ConverterPy { fn add_record(&mut self, record: RecordPy) -> PyResult<()> { self.converter .add_record(record.record) - .map(|_| self.converter.build()) .map_err(|e| PyErr::new::(e.to_string())) } diff --git a/scripts/benchmark_rust.py b/scripts/benchmark_rust.py index 31eb98e..4501a5b 100644 --- a/scripts/benchmark_rust.py +++ b/scripts/benchmark_rust.py @@ -1,8 +1,11 @@ +import time from curies_rs import Converter url = "https://raw.githubusercontent.com/biopragmatics/bioregistry/main/exports/contexts/bioregistry.epm.json" converter = Converter.load_extended_prefix_map(url) +start_time = time.time() curie = converter.compress("http://purl.obolibrary.org/obo/DOID_1234") +print(f"Compress took {(time.time() - start_time) * 1000}ms") print(curie) diff --git a/scripts/build-js.sh b/scripts/build-js.sh index 3be60bb..87fd8ea 100755 --- a/scripts/build-js.sh +++ b/scripts/build-js.sh @@ -3,7 +3,7 @@ set -e cd js -npm run build +npm run test python3 -m http.server # Or npm run start