From 0fcb078b95a224f167968bd54b96e8fc7c10cdbb Mon Sep 17 00:00:00 2001 From: Justin Date: Thu, 2 Mar 2023 15:34:01 -0500 Subject: [PATCH] jaro --- project.clj | 3 ++- src/datething/jaro.clj | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 src/datething/jaro.clj diff --git a/project.clj b/project.clj index 4ac5514..b398b7e 100644 --- a/project.clj +++ b/project.clj @@ -3,9 +3,10 @@ :url "https://github.com/justin2004/datething" :dependencies [[org.clojure/clojure "1.10.0"] [wit/duckling "0.4.24"] + [info.debatty/java-string-similarity "2.0.0"] [org.apache.jena/apache-jena-libs "4.2.0" :extension "pom"]] :repl-options {:init-ns datething.parse} :profile {:uberjar {:aot :all :debug true}} - :main datething.parse) ; i think i just need this to get AOT when i run `lein uberjar` + :main datething.jaro) ; i think i just need this to get AOT when i run `lein uberjar` ; TODO just do AOT instead diff --git a/src/datething/jaro.clj b/src/datething/jaro.clj new file mode 100644 index 0000000..a48d110 --- /dev/null +++ b/src/datething/jaro.clj @@ -0,0 +1,38 @@ +(ns datething.jaro + (:gen-class + :extends org.apache.jena.sparql.function.FunctionBase2) + (:require [clojure.pprint :as pp])) + +; (def jw (new info.debatty.java.stringsimilarity.JaroWinkler)) +; (.distance jw "atlanta, ga" "atlanta") + +(defn -exec ^org.apache.jena.sparql.expr.NodeValue [^org.apache.jena.sparql.expr.NodeValue & v ] + (do + ; (.println java.lang.System/err "you passed in") + ; (.println java.lang.System/err v) + (let [asked0 (.asVar (.get (.getList (nth v 2)) ; asked is the Var passed into the function + 0)) + asked1 (.asVar (.get (.getList (nth v 2)) ; asked is the Var passed into the function + 1)) + looked0 (str (.getLiteralValue (.get (nth v 1) ; looked is the Var's value as found (looked up) in the bindings + asked0))) + looked1 (str (.getLiteralValue (.get (nth v 1) ; looked is the Var's value as found (looked up) in the bindings + asked1))) + jaroDistance (.distance (new info.debatty.java.stringsimilarity.JaroWinkler) + looked0 looked1) ] + + ; (.println java.lang.System/err "asked0") + ; (.println java.lang.System/err looked0) + ; (.println java.lang.System/err "asked1") + ; (.println java.lang.System/err looked1) + (new org.apache.jena.sparql.expr.nodevalue.NodeValueFloat jaroDistance)))) + +(comment (defn -exec ^org.apache.jena.sparql.expr.NodeValue [^org.apache.jena.sparql.expr.NodeValue & v] + (do (let [asked (.asVar (.get (.getList (nth v 2)) ; asked is the Var passed into the function + 0)) + looked (str (.getLiteralValue (.get (nth v 1) ; looked is the Var's value as found (looked up) in the bindings + asked))) + parsed (str (:value (:value (first (duck/parse :en$core ; parsed is the parsed Var's value + looked + [:time])))))] + (new org.apache.jena.sparql.expr.nodevalue.NodeValueString parsed)))))