diff --git a/docs/concepts/other.md b/docs/concepts/other.md index 0fac3aec1..382cd20f2 100644 --- a/docs/concepts/other.md +++ b/docs/concepts/other.md @@ -208,6 +208,10 @@ Vector _("Embedding")_ databases of possible future interest: ## AI +### Retrieval-Augmented Generation (RAG) and [Entity Resolution](https://github.com/GavinMendelGleason/blog/blob/main/entries/entity_resolution.md) + +* [TerminusDB VectorLink](https://terminusdb.com/vectorlink/), see [Overview](https://terminusdb.com/blog/terminusdb-internals/) #commercial #[opensource](https://github.com/terminusdb/terminusdb) #trial + ### Symbolic AI Reasoning (GOFAI?) [Semantic Reasoning](https://en.wikipedia.org/wiki/Semantic_reasoner) through [modus ponens](https://en.wikipedia.org/wiki/Modus_ponens) of an [Inference Engine](https://en.wikipedia.org/wiki/Inference_engine) by @@ -215,6 +219,7 @@ _Forward Chaining; also see Backward Chaining, Backtracking, Backpropagation - [TBD](https://en.wikipedia.org/wiki/Symbolic_artificial_intelligence)._ * [Mangle](https://github.com/google/mangle) #OpenSource #Google +* [Leuchtkraft](https://github.com/Wuelle/Leuchtkraft) #OpenSource ## Platforms diff --git a/java/dev/enola/common/io/iri/IRI.java b/java/dev/enola/common/io/iri/IRI.java index d95c81388..8e0d939fa 100644 --- a/java/dev/enola/common/io/iri/IRI.java +++ b/java/dev/enola/common/io/iri/IRI.java @@ -18,7 +18,6 @@ package dev.enola.common.io.iri; import com.google.common.base.CharMatcher; -import com.google.common.collect.ImmutableMultimap; import com.google.common.net.HostAndPort; import com.google.common.net.HostSpecifier; import com.google.common.net.InetAddresses; @@ -111,6 +110,8 @@ public final class IRI implements Comparable { // TODO Actually fully read https://url.spec.whatwg.org first.. :=) // TODO Research existing implementations for inspiration... + // - https://developer.android.com/reference/android/net/Uri, with + // https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/android/net/Uri.java // - https://github.com/square/okhttp/issues/1486 // - https://github.com/palominolabs/url-builder // - https://github.com/dmfs/uri-toolkit @@ -240,30 +241,55 @@ public Builder newBuilder() { } public String scheme() { - if (scheme == null) scheme = find_scheme(); + // NB: Schema must be (very limited) ASCII, only; there's no decoding, here. + if (scheme == null) { + if (string == null) scheme = ""; + else { + var p = string.indexOf(':'); + if (p == -1) scheme = ""; + else scheme = normalizeScheme(string.substring(0, p).trim()); + } + } return scheme; } - private String find_scheme() { - return null; // TODO - } - public boolean hasScheme(String scheme) { - // TODO Implement more optimized - return scheme().equals(scheme); + assert scheme.contains(":"); + return normalizeScheme(scheme()).equals(normalizeScheme(scheme)); } public boolean isAbsolute() { return !scheme().isBlank(); } - // /** Scheme specific part is just everything after the : colon of the scheme. */ - /* public CharSequence schemeSpecificPart() { - return null; // TODO - } */ + public boolean isRelative() { + return !isAbsolute(); + } public String authority() { - return null; // TODO + // TODO Decoding! + if (authority == null) { + if (string == null) authority = ""; + else { + var len = string.length(); + var start = string.indexOf(':') + 1; + if (start == 0 || start == len) authority = ""; + else { + while (start < len) { + if (string.charAt(start) == '/') ++start; + else break; + } + var end = start + 1; + while (end < len) { + if (string.charAt(end) == '/') ++end; + else break; + } + // TODO ? if (scheme == endAuthority - 1) authority = ""; + authority = string.substring(start, end); + } + } + } + return authority; } public String path() { @@ -278,14 +304,10 @@ public String fragment() { return null; // TODO } - // TODO Allow both & and ; as query delimiters?! - public ImmutableMultimap queryMap() { - return null; // TODO - } + // Allows (interprets) only '&' and not ';' as query delimiter! + // public ImmutableMultimap queryMap() { return null; } // TODO needed? - public ImmutableMultimap queryParameter(String key) { - return null; // TODO - } + // public ImmutableMultimap queryParameter(String key) { return null; } // TODO? public IRI base() { return null; // TODO as in URIs.base() @@ -308,14 +330,34 @@ public IRI relativize(IRI iri) { @Override public String toString() { - if (string == null) string = stringify(); + // TODO Encoding! Offer x2 different toString! + if (string == null) { + var sb = new StringBuilder(); + if (!scheme().isBlank()) { + sb.append(scheme()); + sb.append(':'); + } + if (!authority().isBlank()) { + sb.append("//"); + sb.append(authority()); + sb.append('/'); + } + if (!path().isBlank()) { + sb.append(path); + } + if (!query().isBlank()) { + sb.append('?'); + sb.append(query()); + } + if (!fragment().isBlank()) { + sb.append('#'); + sb.append(fragment()); + } + string = sb.toString(); + } return string; } - private String stringify() { - return "TODO"; - } - public URI toURI() throws URISyntaxException { return new URI(toString()); } @@ -323,12 +365,16 @@ public URI toURI() throws URISyntaxException { public IRI normalize() { // TODO Keep result in a lazily initialized field? But... memory?! var builder = newBuilder(); - builder.scheme(scheme().toLowerCase(Locale.ROOT)); + builder.scheme(normalizeScheme(scheme())); // TODO ... FIXME // TODO Should we drop default ports for a few well-known schemes? return builder.build(); } + private static String normalizeScheme(String scheme) { + return scheme.toLowerCase(Locale.ROOT); + } + /** Equality check, with {@link #normalize()}-ation. */ public boolean equalsNormalized(IRI o) { return this.normalize().toString().equals(o.normalize().toString()); @@ -364,7 +410,7 @@ public void validate() throws ValidationException { var scheme = scheme(); if (isAbsolute() && scheme.isBlank()) throw new ValidationException(this, "Blank scheme"); if (isAbsolute() && !CharAscii.INSTANCE.matchesAllOf(scheme)) - throw new ValidationException(this, "Invalid scheme: " + scheme); + throw new ValidationException(this, "Invalid non-ASCII [a-zA-Z0-9] scheme: " + scheme); try { var authority = authority(); diff --git a/java/dev/enola/common/io/iri/IRITest.java b/java/dev/enola/common/io/iri/IRITest.java index 6c5680213..de0c4c942 100644 --- a/java/dev/enola/common/io/iri/IRITest.java +++ b/java/dev/enola/common/io/iri/IRITest.java @@ -29,6 +29,9 @@ public class IRITest { + // TODO https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json + // see https://github.com/web-platform-tests/wpt/tree/master/url + record TestIRI( boolean validIRI, boolean validURI, @@ -43,6 +46,19 @@ record TestIRI( // TODO Use e.g. a CSV instead of coding these out here? TestIRI[] tests = new TestIRI[] { + new TestIRI(true, true, "", "", "", "", "", "", ""), + new TestIRI(true, true, "relative", "relative", "", "", "relative", "", ""), + new TestIRI(true, true, "schema:", "schema:", "schema", "", "", "", ""), + new TestIRI( + true, + true, + "schema:authority", + "schema:authority", + "schema", + "authority", + "", + "", + ""), new TestIRI( true, true, @@ -52,7 +68,17 @@ record TestIRI( "enola.dev", "", "", - "") + ""), + new TestIRI( + true, + true, + "hTtPs://enola.dev/index.html?query#fragment", + "https://enola.dev/index.html?query#query", + "https", + "enola.dev", + "/index.html", + "query", + "query") }; // TODO Test handling of + or %20 for space in path, query and fragment