diff --git a/README.org b/README.org index 0e00d8a..5f4fb3a 100644 --- a/README.org +++ b/README.org @@ -194,3 +194,5 @@ If you're familiar with binary search over sorted lists, you'll know this is a c **** How is a trie faster? +* Development + diff --git a/pom.xml b/pom.xml index 0f76cd6..e32f161 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ jar com.owoga tightly-packed-trie - 0.2.1 + 0.2.2 tightly-packed-trie scm:git:git://github.com/eihli/clj-tightly-packed-trie.git diff --git a/src/com/owoga/tightly_packed_trie.clj b/src/com/owoga/tightly_packed_trie.clj index 3b45a61..dbb5f8b 100644 --- a/src/com/owoga/tightly_packed_trie.clj +++ b/src/com/owoga/tightly_packed_trie.clj @@ -208,19 +208,40 @@ (valAt [self ks not-found] (or (get self ks) not-found)) + clojure.lang.Counted + (count [trie] + (count (seq trie))) + clojure.lang.Seqable (seq [trie] - (->> trie - (#(trie->depth-first-post-order-traversable-zipperable-vector - [] - % - value-decode-fn)) - zip/vector-zip - (iterate zip/next) - (take-while (complement zip/end?)) - (map zip/node) - (filter (partial instance? clojure.lang.MapEntry)) - (#(if (empty? %) nil %))))) + (let [step (fn step [path [[node & nodes] & stack] [parent & parents]] + (cond + node + (step (conj path (.key node)) + (into (into stack (list nodes)) + (list (trie/children node))) + (cons node (cons parent parents))) + (and parent (not= 0 (.key parent))) + (lazy-seq + (cons (clojure.lang.MapEntry. + (rest path) + (let [byte-buffer (.byte-buffer parent)] + (wrap-byte-buffer + byte-buffer + (.limit byte-buffer (.limit parent)) + (.position byte-buffer (.address parent)) + (value-decode-fn byte-buffer)))) + (step (pop path) + stack + parents))) + :else nil))] + (step [] (list (list trie)) '())))) + +(defmethod print-method TightlyPackedTrie [trie ^java.io.Writer w] + (print-method (into {} trie) w)) + +(defmethod print-dup TightlyPackedTrie [trie ^java.io.Writer w] + (print-ctor trie (fn [o w] (print-dup (into {} trie) w)) w)) (defn tightly-packed-trie [trie value-encode-fn value-decode-fn] diff --git a/src/com/owoga/trie.clj b/src/com/owoga/trie.clj index 4880cc6..02290f6 100644 --- a/src/com/owoga/trie.clj +++ b/src/com/owoga/trie.clj @@ -1,31 +1,4 @@ -(ns com.owoga.trie - (:require [clojure.zip :as zip])) - -(defn -trie->depth-first-post-order-traversable-zipperable-vector - ([path node] - (vec - (map - (fn [[k v]] - [(-trie->depth-first-post-order-traversable-zipperable-vector (conj path k) v) - (clojure.lang.MapEntry. (conj path k) (.value v))]) - (.children- node))))) - -(defn trie->depth-first-post-order-traversable-zipperable-vector - [path node] - (if (.value node) - [(-trie->depth-first-post-order-traversable-zipperable-vector - path node) - (clojure.lang.MapEntry. path (.value node))] - (-trie->depth-first-post-order-traversable-zipperable-vector - path node))) - -(defn depth-first-post-order-traversable-zipperable-vector->trie - [cls [children [key node]]] - (sorted-map - (last key) - (cls (.key node) (.value node) - (into (sorted-map) - (map depth-first-post-order-traversable-zipperable-vector->trie children))))) +(ns com.owoga.trie) (declare ->Trie) @@ -61,47 +34,32 @@ (fn [[k child]] (Trie. k (.value child) - #_(sorted-map) (.children- child))) children-)) (lookup [trie k] - (loop [k' k - trie' trie] + (loop [k k + trie trie] (cond ;; Allows `update` to work the same as with maps... can use `fnil`. ;; (nil? trie') (throw (Exception. (format "Key not found: %s" k))) - (nil? trie') nil - (empty? k') - (Trie. (.key trie') - (.value trie') - (.children- trie')) + (nil? trie) nil + (empty? k) + (Trie. (.key trie) + (.value trie) + (.children- trie)) :else (recur - (rest k') - (get (.children- trie') (first k')))))) + (rest k) + (get (.children- trie) (first k)))))) clojure.lang.ILookup (valAt [trie k] - (loop [k' k - trie' trie] - (cond - ;; Allows `update` to work the same as with maps... can use `fnil`. - ;; (nil? trie') (throw (Exception. (format "Key not found: %s" k))) - (nil? trie') nil - (empty? k') (.value trie') - :else (recur - (rest k') - (get (.children- trie') (first k')))))) + (if-let [node (lookup trie k)] + (.value node) + nil)) (valAt [trie k not-found] - (loop [k' k - trie' trie] - (cond - (nil? trie') not-found - (empty? k') (.value trie') - :else (recur - (rest k') - (get (.children- trie') (first k')))))) + (or (get trie k) not-found)) clojure.lang.IPersistentCollection (cons [trie entry] @@ -110,10 +68,8 @@ (assoc trie (first entry) (.value (second entry))) :else (assoc trie (first entry) (second entry)))) - (empty [trie] (Trie. key nil (sorted-map))) - (equiv [trie o] (and (= (.value trie) (.value o)) @@ -145,20 +101,39 @@ (without [trie key] (-without trie key)) + java.lang.Iterable + (iterator [trie] + (.iterator (seq trie))) + clojure.lang.Counted (count [trie] (count (seq trie))) clojure.lang.Seqable (seq [trie] - (->> trie - ((partial trie->depth-first-post-order-traversable-zipperable-vector [])) - zip/vector-zip - (iterate zip/next) - (take-while (complement zip/end?)) - (map zip/node) - (filter (partial instance? clojure.lang.MapEntry)) - (#(if (empty? %) nil %))))) + (let [step (fn step [path [[node & nodes] & stack] [parent & parents]] + (cond + node + (step (conj path (.key node)) + (into (into stack (list nodes)) + (list (children node))) + (cons node (cons parent parents))) + (and parent (not= '() (.key parent))) + (lazy-seq + (cons (clojure.lang.MapEntry. + (rest path) + (.value parent)) + (step (pop path) + stack + parents))) + :else nil))] + (step [] (list (list trie)) '())))) + +(defmethod print-method Trie [trie ^java.io.Writer w] + (print-method (into {} trie) w)) + +(defmethod print-dup Trie [trie ^java.io.Writer w] + (print-ctor trie (fn [o w] (print-dup (into {} trie) w)) w)) (defn make-trie ([] diff --git a/test/tightly_packed_trie_test.clj b/test/tightly_packed_trie_test.clj index ad68e22..04bacbc 100644 --- a/test/tightly_packed_trie_test.clj +++ b/test/tightly_packed_trie_test.clj @@ -31,8 +31,17 @@ (testing "ITrie" (testing "lookup" (is (= nil (trie/lookup empty-trie [1]))) + + ;; A `get` of a node returns the value at the node. (is (= 1 (get (trie/lookup initialized-trie [1]) []))) - (is (= 12 (get (trie/lookup initialized-trie [1]) [2])))) + (is (= 12 (get (trie/lookup initialized-trie [1]) [2]))) + + ;; A `seq` of a node is a depth-first post-order traversal of its descendants. + (is (= '([[2] 12] [[3] 13] [[] 1]) + (seq (trie/lookup initialized-trie [1]))))) + + ;; The children of a node are only the immediate children and + ;; the root node's value is excluded. (testing "children" (is (= '(12 13) (map #(get % []) @@ -61,9 +70,14 @@ [[1 2] nil] [[1 3 1] 131] [[1 3] nil] - [[1] nil] - [[] nil]) - (seq initialized-trie)))))) + [[1] nil]) + (seq initialized-trie)))) + (testing "Seq on lookup" + (is (= '([[1] 121] + [[2] 122] + [[3] 123] + [[] nil]) + (seq (trie/lookup initialized-trie [1 2]))))))) (comment (let [trie (trie/make-trie '(1 2 3) 123 '(1 2 1) 121 '(1 2 2) 122 '(1 3 1) 131) diff --git a/test/trie_test.clj b/test/trie_test.clj index 187367b..32b2170 100644 --- a/test/trie_test.clj +++ b/test/trie_test.clj @@ -13,9 +13,9 @@ (testing "dissoc" (let [expected (-> (trie/make-trie) (assoc '(1) 1)) - trie (-> (trie/make-trie) - (assoc '(1) 1) - (assoc '(1 3) 13))] + trie (-> (trie/make-trie) + (assoc '(1) 1) + (assoc '(1 3) 13))] (is (= expected (dissoc trie '(1 3)))))) (testing "ILookup" (is (= 12 (get initialized-trie '(1 2)))) @@ -30,5 +30,6 @@ (is (= 2 (count initialized-trie)))) (testing "Seqable" (is (= '([[1 2] 12] [[1] nil]) - (seq initialized-trie)))))) - + (seq initialized-trie))) + (is (= '([[1 2] 12] [[1] 1]) + (seq (assoc initialized-trie '(1) 1)))))))