Skip to content

Commit

Permalink
feat (core): Initial very rough TBC implementation of new IRI class
Browse files Browse the repository at this point in the history
  • Loading branch information
vorburger committed Aug 17, 2024
1 parent 8eacb9c commit 68df42b
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 28 deletions.
5 changes: 5 additions & 0 deletions docs/concepts/other.md
Original file line number Diff line number Diff line change
Expand Up @@ -208,13 +208,18 @@ Vector _("Embedding")_ databases of possible future interest:

## AI

### Retrieval-Augmented Generation (RAG) and [Entity Resolution](https://github.com/GavinMendelGleason/blog/blob/main/entries/entity_resolution.md)

* [TerminusDB VectorLink](https://terminusdb.com/vectorlink/), see [Overview](https://terminusdb.com/blog/terminusdb-internals/) #commercial #[opensource](https://github.com/terminusdb/terminusdb) #trial

### Symbolic AI Reasoning (GOFAI?)

[Semantic Reasoning](https://en.wikipedia.org/wiki/Semantic_reasoner) through [modus ponens](https://en.wikipedia.org/wiki/Modus_ponens) of an [Inference Engine](https://en.wikipedia.org/wiki/Inference_engine) by
_Forward Chaining; also see Backward Chaining, Backtracking, Backpropagation -
[TBD](https://en.wikipedia.org/wiki/Symbolic_artificial_intelligence)._

* [Mangle](https://github.com/google/mangle) #OpenSource #Google
* [Leuchtkraft](https://github.com/Wuelle/Leuchtkraft) #OpenSource

## Platforms

Expand Down
100 changes: 73 additions & 27 deletions java/dev/enola/common/io/iri/IRI.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package dev.enola.common.io.iri;

import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.net.HostAndPort;
import com.google.common.net.HostSpecifier;
import com.google.common.net.InetAddresses;
Expand Down Expand Up @@ -111,6 +110,8 @@ public final class IRI implements Comparable<IRI> {
// TODO Actually fully read https://url.spec.whatwg.org first.. :=)

// TODO Research existing implementations for inspiration...
// - https://developer.android.com/reference/android/net/Uri, with
// https://cs.android.com/android/platform/superproject/main/+/main:frameworks/base/core/java/android/net/Uri.java
// - https://github.com/square/okhttp/issues/1486
// - https://github.com/palominolabs/url-builder
// - https://github.com/dmfs/uri-toolkit
Expand Down Expand Up @@ -240,30 +241,55 @@ public Builder newBuilder() {
}

public String scheme() {
if (scheme == null) scheme = find_scheme();
// NB: Schema must be (very limited) ASCII, only; there's no decoding, here.
if (scheme == null) {
if (string == null) scheme = "";
else {
var p = string.indexOf(':');
if (p == -1) scheme = "";
else scheme = normalizeScheme(string.substring(0, p).trim());
}
}
return scheme;
}

private String find_scheme() {
return null; // TODO
}

public boolean hasScheme(String scheme) {
// TODO Implement more optimized
return scheme().equals(scheme);
assert scheme.contains(":");
return normalizeScheme(scheme()).equals(normalizeScheme(scheme));
}

public boolean isAbsolute() {
return !scheme().isBlank();
}

// /** Scheme specific part is just everything after the : colon of the scheme. */
/* public CharSequence schemeSpecificPart() {
return null; // TODO
} */
public boolean isRelative() {
return !isAbsolute();
}

public String authority() {
return null; // TODO
// TODO Decoding!
if (authority == null) {
if (string == null) authority = "";
else {
var len = string.length();
var start = string.indexOf(':') + 1;
if (start == 0 || start == len) authority = "";
else {
while (start < len) {
if (string.charAt(start) == '/') ++start;
else break;
}
var end = start + 1;
while (end < len) {
if (string.charAt(end) == '/') ++end;
else break;
}
// TODO ? if (scheme == endAuthority - 1) authority = "";
authority = string.substring(start, end);
}
}
}
return authority;
}

public String path() {
Expand All @@ -278,14 +304,10 @@ public String fragment() {
return null; // TODO
}

// TODO Allow both & and ; as query delimiters?!
public ImmutableMultimap<String, String> queryMap() {
return null; // TODO
}
// Allows (interprets) only '&' and not ';' as query delimiter!
// public ImmutableMultimap<String, String> queryMap() { return null; } // TODO needed?

public ImmutableMultimap<String, String> queryParameter(String key) {
return null; // TODO
}
// public ImmutableMultimap<String, String> queryParameter(String key) { return null; } // TODO?

public IRI base() {
return null; // TODO as in URIs.base()
Expand All @@ -308,27 +330,51 @@ public IRI relativize(IRI iri) {

@Override
public String toString() {
if (string == null) string = stringify();
// TODO Encoding! Offer x2 different toString!
if (string == null) {
var sb = new StringBuilder();
if (!scheme().isBlank()) {
sb.append(scheme());
sb.append(':');
}
if (!authority().isBlank()) {
sb.append("//");
sb.append(authority());
sb.append('/');
}
if (!path().isBlank()) {
sb.append(path);
}
if (!query().isBlank()) {
sb.append('?');
sb.append(query());
}
if (!fragment().isBlank()) {
sb.append('#');
sb.append(fragment());
}
string = sb.toString();
}
return string;
}

private String stringify() {
return "TODO";
}

public URI toURI() throws URISyntaxException {
return new URI(toString());
}

public IRI normalize() {
// TODO Keep result in a lazily initialized field? But... memory?!
var builder = newBuilder();
builder.scheme(scheme().toLowerCase(Locale.ROOT));
builder.scheme(normalizeScheme(scheme()));
// TODO ... FIXME
// TODO Should we drop default ports for a few well-known schemes?
return builder.build();
}

private static String normalizeScheme(String scheme) {
return scheme.toLowerCase(Locale.ROOT);
}

/** Equality check, with {@link #normalize()}-ation. */
public boolean equalsNormalized(IRI o) {
return this.normalize().toString().equals(o.normalize().toString());
Expand Down Expand Up @@ -364,7 +410,7 @@ public void validate() throws ValidationException {
var scheme = scheme();
if (isAbsolute() && scheme.isBlank()) throw new ValidationException(this, "Blank scheme");
if (isAbsolute() && !CharAscii.INSTANCE.matchesAllOf(scheme))
throw new ValidationException(this, "Invalid scheme: " + scheme);
throw new ValidationException(this, "Invalid non-ASCII [a-zA-Z0-9] scheme: " + scheme);

try {
var authority = authority();
Expand Down
28 changes: 27 additions & 1 deletion java/dev/enola/common/io/iri/IRITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@

public class IRITest {

// TODO https://github.com/web-platform-tests/wpt/blob/master/url/resources/urltestdata.json
// see https://github.com/web-platform-tests/wpt/tree/master/url

record TestIRI(
boolean validIRI,
boolean validURI,
Expand All @@ -43,6 +46,19 @@ record TestIRI(
// TODO Use e.g. a CSV instead of coding these out here?
TestIRI[] tests =
new TestIRI[] {
new TestIRI(true, true, "", "", "", "", "", "", ""),
new TestIRI(true, true, "relative", "relative", "", "", "relative", "", ""),
new TestIRI(true, true, "schema:", "schema:", "schema", "", "", "", ""),
new TestIRI(
true,
true,
"schema:authority",
"schema:authority",
"schema",
"authority",
"",
"",
""),
new TestIRI(
true,
true,
Expand All @@ -52,7 +68,17 @@ record TestIRI(
"enola.dev",
"",
"",
"")
""),
new TestIRI(
true,
true,
"hTtPs://enola.dev/index.html?query#fragment",
"https://enola.dev/index.html?query#query",
"https",
"enola.dev",
"/index.html",
"query",
"query")
};

// TODO Test handling of + or %20 for space in path, query and fragment
Expand Down

0 comments on commit 68df42b

Please sign in to comment.