diff --git a/deps.edn b/deps.edn index 8bad737..c156458 100644 --- a/deps.edn +++ b/deps.edn @@ -12,6 +12,11 @@ :jar {:replace-deps {com.github.seancorfield/depstar {:mvn/version "2.0.193"}} :exec-fn hf.depstar/jar :exec-args {:jar "TightlyPackedTrie.jar" :sync-pom true}} + :runner {:extra-deps {com.cognitect/test-runner + {:git/url "https://github.com/cognitect-labs/test-runner" + :sha "b6b3193fcc42659d7e46ecd1884a228993441182"}} + :main-opts ["-m" "cognitect.test-runner" + "-d" "test"]} :deploy {:replace-deps {slipset/deps-deploy {:mvn/version "0.1.5"}} :exec-fn deps-deploy.deps-deploy/deploy :exec-args {:installer :remote diff --git a/src/com/owoga/tightly_packed_trie.clj b/src/com/owoga/tightly_packed_trie.clj index 09706f9..f215f41 100644 --- a/src/com/owoga/tightly_packed_trie.clj +++ b/src/com/owoga/tightly_packed_trie.clj @@ -7,6 +7,8 @@ (:import (java.io ByteArrayOutputStream ByteArrayInputStream DataOutputStream DataInputStream))) +#_(set! *warn-on-reflection* true) + ;; A trie data structure that can be converted to ;; a contiguous array of bytes while maintaining ;; efficient lookups. @@ -106,6 +108,7 @@ :count freq})) (declare -value) +(declare children-) (deftype TightlyPackedTrie [^java.nio.ByteBuffer byte-buffer ^Integer key @@ -137,28 +140,7 @@ value-decode-fn)] (trie/lookup child (rest ks)))))))) (children [self] - (wrap-byte-buffer - byte-buffer - (.limit byte-buffer limit) - (.position byte-buffer address) - (let [val (value-decode-fn byte-buffer) - size-of-index (encoding/decode byte-buffer)] - (.limit byte-buffer ^Integer (+ (.position byte-buffer) - size-of-index)) - (loop [children []] - (if (= (.position byte-buffer) (.limit byte-buffer)) - children - (let [child-key (encoding/decode-number-from-tightly-packed-trie-index byte-buffer) - child-offset (encoding/decode-number-from-tightly-packed-trie-index byte-buffer)] - (recur - (conj - children - (TightlyPackedTrie. - byte-buffer - child-key - (- address child-offset) - (.capacity byte-buffer) - value-decode-fn))))))))) + (children- byte-buffer address limit value-decode-fn)) clojure.lang.ILookup (valAt [self ks] @@ -207,6 +189,34 @@ (.position byte-buffer ^Integer (.address trie)) (value-decode-fn byte-buffer)))) +(defn children- [^java.nio.ByteBuffer + byte-buffer + ^Integer address + ^Integer limit + value-decode-fn] + (wrap-byte-buffer + byte-buffer + (.limit byte-buffer limit) + (.position byte-buffer address) + (let [val (value-decode-fn byte-buffer) + size-of-index (encoding/decode byte-buffer)] + (.limit byte-buffer ^Integer (+ (.position byte-buffer) + size-of-index)) + (loop [children []] + (if (= (.position byte-buffer) (.limit byte-buffer)) + children + (let [child-key (encoding/decode-number-from-tightly-packed-trie-index byte-buffer) + child-offset (encoding/decode-number-from-tightly-packed-trie-index byte-buffer)] + (recur + (conj + children + (TightlyPackedTrie. + byte-buffer + child-key + (- address child-offset) + (.capacity byte-buffer) + value-decode-fn))))))))) + (defmethod print-method TightlyPackedTrie [trie ^java.io.Writer w] (print-method (into {} trie) w)) diff --git a/src/com/owoga/trie.clj b/src/com/owoga/trie.clj index 40d6467..6d00844 100644 --- a/src/com/owoga/trie.clj +++ b/src/com/owoga/trie.clj @@ -253,6 +253,17 @@ (make-trie) (partition 2 ks)))) +(defn make-trie' + "nil root key instead of empty list, clearer interface" + ([] + (->Trie nil nil (sorted-map))) + ([& ks] + (reduce + (fn [t kv] + (conj t kv)) + (make-trie) + (partition 2 ks)))) + (comment (make-trie "do" "do" "dot" "dot" "dog" "dog") ;; => {[\d \o \g] "dog", [\d \o \t] "dot", [\d \o] "do"} @@ -282,3 +293,33 @@ (into (make-trie))) ;; => {[1 2 2] 244, [1 2 3] 246, [1 2 4] 248, [1 2] 24} ) + +(defn trie->children-at-depth + [[[node & nodes] & stack] [parent & parents] min-depth max-depth] + (let [current-depth (count parents)] + (cond + (and node (< current-depth max-depth)) + (trie->children-at-depth + (into (into stack (list nodes)) + (list (children node))) + (cons node (if parent (cons parent parents) nil)) + min-depth + max-depth) + (and parent (some? (get parent [])) (>= current-depth min-depth)) + (lazy-seq + (cons (clojure.lang.MapEntry. + (rest (reverse (map #(.key %) (cons parent parents)))) + (get parent [])) + (trie->children-at-depth stack (sequence parents) min-depth max-depth))) + parent + (trie->children-at-depth stack (sequence parents) min-depth max-depth) + :else + nil))) + +(defn children-at-depth + ([trie depth] + (children-at-depth trie depth (inc depth))) + ([trie min-depth max-depth] + (trie->children-at-depth + `((~trie)) '() min-depth max-depth))) + diff --git a/src/com/owoga/trie/math.clj b/src/com/owoga/trie/math.clj index 4df2792..4ff8e39 100644 --- a/src/com/owoga/trie/math.clj +++ b/src/com/owoga/trie/math.clj @@ -27,6 +27,7 @@ (->Vose N alias prob) (let [^doubles ps (->> dist (map (partial * N)) + (map float) (into-array Double/TYPE)) [small large] (loop [i 0 @@ -65,7 +66,7 @@ (->Vose N alias prob))))) (defn from-weights [ws] - (let [tot (reduce + 0.0 ws)] + (let [tot (apply + ws)] (assert (> tot 0) "Can't Vose RNG from 0 weights.") (let [dist (map #(/ % tot) ws)] (make-vose (vec dist))))) diff --git a/test/tightly_packed_trie/core_test.clj b/test/tightly_packed_trie/core_test.clj deleted file mode 100644 index 3b0d2bc..0000000 --- a/test/tightly_packed_trie/core_test.clj +++ /dev/null @@ -1,21 +0,0 @@ -(ns tightly-packed-trie.core-test - "Basic tests for the primary API of `next.jdbc`." - (:require [clojure.test :refer [deftest is testing use-fixtures]] - [com.owoga.tightly-packed-trie.core :as tpt])) - -(deftest basic-tests - ;; use ds-opts instead of (ds) anywhere you want default options applied: - (testing "map-based trie" - (let [trie (tpt/trie)] - (testing "key not found" - (is (thrown-with-msg? - Exception - #"Key not found" - (get trie '("foo")))) - (is (= :not-found - (get trie '("foo") :not-found)))) - (testing "conjing to trie" - (let [trie (conj trie '("d" "o" "g" "dog"))] - (is (instance? com.owoga.tightly_packed_trie.core.Trie (get trie '("d" "o" "g")))) - (is (= (tpt/as-map (get trie '("d" "o" "g"))) - {"g" {:value "dog" :count 1}}))))))) diff --git a/test/tightly_packed_trie_test.clj b/test/tightly_packed_trie_test.clj index bad1a05..13a7438 100644 --- a/test/tightly_packed_trie_test.clj +++ b/test/tightly_packed_trie_test.clj @@ -81,6 +81,18 @@ [[] nil]) (seq (trie/lookup initialized-trie [1 2]))))))) +(deftest children-at-depth-tests + (let [initialized-trie (->> (trie/make-trie '(1) 1 '(1 2 3) 123 '(1 2 1) 121 '(1 2 2) 122 '(1 3 1) 131 + '(1 2 3 4) 1234 + '(1 2 3 4 5 6) 123456) + (#(tpt/tightly-packed-trie % value-encode-fn value-decode-fn)))] + (testing "children at depth" + (is (= '([(1) 1]) + (trie/children-at-depth initialized-trie 0))) + (is (= '([(1 2 3 4 5 6) 123456] + [(1 2 3 4) 1234]) + (trie/children-at-depth initialized-trie 4 6)))))) + (comment (let [trie (trie/make-trie '(1 2 3) 123 '(1 2 1) 121 '(1 2 2) 122 '(1 3 1) 131) tpt (tpt/tightly-packed-trie trie value-encode-fn value-decode-fn) @@ -94,4 +106,18 @@ (encode/decode-number-from-tightly-packed-trie-index byte-buffer) (encode/decode-number-from-tightly-packed-trie-index byte-buffer)])) + (let [trie (trie/make-trie '(1) 1 '(1 2) 12 '(1 3) 13 '(2 3) 23 '(5) 5 '(6 7 8) 678) + tpt (tpt/tightly-packed-trie trie value-encode-fn value-decode-fn)] + (get tpt [])) + + (let [trie (trie/make-trie '(1) 1 '(1 2) 12 '(1 3) 13 '(2 3) 23 '(5) 5 '(6 7 8) 678) + tpt (tpt/tightly-packed-trie trie value-encode-fn value-decode-fn)] + (tpt/trie->children-at-depth (list (list tpt)) '() 3 5)) + ) + +(let [trie (trie/make-trie '(1 2 3) 123 '(1 2 1) 121 '(1 2 2) 122 '(1 3 1) 131) + initialized-trie (-> trie + (tpt/tightly-packed-trie value-encode-fn value-decode-fn))] + [initialized-trie + trie]) diff --git a/test/trie_test.clj b/test/trie_test.clj index 32b2170..4953bb1 100644 --- a/test/trie_test.clj +++ b/test/trie_test.clj @@ -33,3 +33,16 @@ (seq initialized-trie))) (is (= '([[1 2] 12] [[1] 1]) (seq (assoc initialized-trie '(1) 1))))))) + +(deftest children-at-depth-tests + (let [initialized-trie (->> (trie/make-trie '(1) 1 '(1 2 3) 123 '(1 2 1) 121 '(1 2 2) 122 '(1 3 1) 131 + '(1 2 3 4) 1234 + '(1 2 3 4 5 6) 123456))] + (testing "children at depth" + (is (= '([(1) 1]) + (trie/children-at-depth initialized-trie 0))) + (is (= '([(1 2 3 4 5 6) 123456] + [(1 2 3 4) 1234]) + (trie/children-at-depth initialized-trie 4 6))) + (is (= nil (trie/children-at-depth initialized-trie -1))) + (is (= nil (trie/children-at-depth initialized-trie 5 4))))))