Search TPT

main
Eric Ihli 4 years ago
parent 809a30b8a3
commit ded92cc0a2

@ -17,6 +17,19 @@
(bit-shift-right (bit-shift-right
(bit-and mask b) (bit-and mask b)
n)))) n))))
(reduce (fn [a _] (bit-or 1 (bit-shift-left a 1))) 0 (range 2))
(defn ones-mask [n]
(reduce (fn [a _] (bit-or 1 (bit-shift-left a 1))) 0 (range n)))
(defn bit-slice
"Start is least-significant bit.
(bit-slice 2 6 10101010)
-> ,1010,
"
[start end b]
(let [mask (bit-shift-left (ones-mask (- end start)) start)]
(bit-shift-right (bit-and b mask) start)))
(defn as-binary-string [b] (defn as-binary-string [b]
(string/replace (string/replace
@ -92,6 +105,29 @@
;; => ([0 1] [1 1] [127 1] [128 2] [257 2] [9876543210 5]) ;; => ([0 1] [1 1] [127 1] [128 2] [257 2] [9876543210 5])
) )
(defn combine-significant-bits [num-significant-bits & bytes]
(reduce
(fn [a b]
(bit-or b (bit-shift-left a num-significant-bits)))
bytes))
(comment
(let [b1 (bits "0110110")
b2 (bits "1001001")
;; remove 2 flag bits
slice (partial bit-slice 0 6)
b1' (slice b1)
b2' (slice b2)]
(map
as-binary-string
[b1
b2
b1'
b2'
(combine-significant-bits 6 b1' b2' )]))
;; => ("00110110" "01001001" "00110110" "00001001" "110110001001")
)
(defn byte-buffer-variable-length-decode (defn byte-buffer-variable-length-decode
[bb] [bb]
(let [combine (fn [n b] (let [combine (fn [n b]

@ -500,33 +500,37 @@
byte-arr)) byte-arr))
(defn decode-key [bb max-position] (defn decode-key [bb max-position]
(loop [bytes []] (let [slice (partial tpt/bit-slice 0 7)
(println (.position bb) (map int bytes)) combine (partial tpt/combine-significant-bits 7)]
(cond (loop [bytes []]
(or (< max-position (.position bb)) (println (.position bb) (map int bytes))
(zero? (.remaining bb))) (cond
(first (tpt/vb-decode-1 (byte-array bytes))) (or (< max-position (.position bb))
(zero? (.remaining bb)))
(apply combine (map slice bytes))
(offset-byte? (.get bb (.position bb))) (offset-byte? (.get bb (.position bb)))
(first (tpt/vb-decode-1 (byte-array bytes))) (apply combine (map slice bytes))
:else :else
(recur (conj bytes (.get bb)))))) (recur (conj bytes (.get bb)))))))
(defn decode-offset [bb max-position] (defn decode-offset [bb max-position]
(loop [bytes []] (let [slice (partial tpt/bit-slice 0 7)
(println (.position bb) (map int bytes)) combine (partial tpt/combine-significant-bits 7)]
(println "max" max-position) (loop [bytes []]
(cond (println (.position bb) (map int bytes))
(or (< max-position (.position bb)) (println "max" max-position)
(zero? (.remaining bb))) (cond
(first (tpt/vb-decode-1 (byte-array bytes))) (or (< max-position (.position bb))
(zero? (.remaining bb)))
(apply combine (map slice bytes))
(key-byte? (.get bb (.position bb))) (key-byte? (.get bb (.position bb)))
(first (tpt/vb-decode-1 (byte-array bytes))) (apply combine (map slice bytes))
:else :else
(recur (conj bytes (.get bb)))))) (recur (conj bytes (.get bb)))))))
(defn rewind-to-key [bb stop] (defn rewind-to-key [bb stop]
(loop [] (loop []
@ -540,57 +544,91 @@
(recur)))))) (recur))))))
(defn find-key-in-index (defn find-key-in-index
[bb target-key max-address] [bb target-key max-address not-found]
(println target-key "pos" (.position bb)) (println target-key "pos" (.position bb))
(loop [previous-key nil (loop [previous-key nil
min-position (.position bb) min-position (.position bb)
max-position max-address max-position max-address]
mid-position (+ min-position (quot 2 (- max-position min-position)))] (if (zero? (- max-position min-position))
(Thread/sleep 20) not-found
(println min-position mid-position max-position) (let [mid-position (+ min-position (quot 2 (- max-position min-position)))]
(.position bb mid-position) (Thread/sleep 20)
(let [bb (rewind-to-key bb min-position) (println min-position mid-position max-position)
_ (println "rewound to key") (.position bb mid-position)
current-key (decode-key bb max-position) (let [bb (rewind-to-key bb min-position)
_ (println "cur key" current-key)] _ (println "rewound to key")
(println "keys" current-key target-key) current-key (decode-key bb max-position)
(cond _ (println "cur key" current-key)]
(= current-key target-key) (println "keys" current-key target-key)
(do (println "=") (cond
(.position bb (decode-offset bb max-position)) (= current-key target-key)
bb) (decode-offset bb max-position)
(= current-key previous-key)
(throw "Key not found.") (= current-key previous-key)
(< current-key target-key) (throw "Key not found.")
(recur
current-key (< current-key target-key)
mid-position (recur
max-position current-key
(+ mid-position (quot 2 (- max-position mid-position)))) max-position
(> current-key target-key) (+ mid-position (quot 2 (- max-position mid-position))))
(recur
current-key (> current-key target-key)
min-position (recur
mid-position current-key
(+ min-position (quot 2 (- mid-position min-position)))))))) min-position
(+ min-position (quot 2 (- mid-position min-position))))))))))
(deftype TightlyPackedTrie [byte-buffer] (deftype TightlyPackedTrie [byte-buffer]
clojure.lang.ILookup clojure.lang.ILookup
(valAt [_ ks] (valAt [_ ks]
(let [root-address (.getInt byte-buffer 0)] (let [root-address (.getInt byte-buffer 0)
orig-ks ks]
(.position byte-buffer root-address)
(loop [ks ks]
(let [current-address (.position byte-buffer)]
(if (empty? ks)
(let [value (tpt/byte-buffer-variable-length-decode byte-buffer)
freq (tpt/byte-buffer-variable-length-decode byte-buffer)]
{:value value
:count freq})
(let [val (tpt/byte-buffer-variable-length-decode byte-buffer)
freq (tpt/byte-buffer-variable-length-decode byte-buffer)
size-of-index (tpt/byte-buffer-variable-length-decode byte-buffer)
_ (println "val" val "freq" freq "size" size-of-index)
offset (find-key-in-index
byte-buffer
(first ks)
(+ (.position byte-buffer) size-of-index)
:not-found)]
(if (= offset :not-found)
(throw (Exception. (format "Index not found %s" orig-ks)))
(do (.position byte-buffer (- current-address offset))
(recur (rest ks))))))))))
(valAt [_ ks not-found]
(let [root-address (.getInt byte-buffer 0)
orig-ks ks]
(.position byte-buffer root-address) (.position byte-buffer root-address)
(loop [ks ks] (loop [ks ks]
(if (empty? ks) (let [current-address (.position byte-buffer)]
(let [value (tpt/byte-buffer-variable-length-decode byte-buffer) (if (empty? ks)
freq (tpt/byte-buffer-variable-length-decode byte-buffer)] (let [value (tpt/byte-buffer-variable-length-decode byte-buffer)
{:value value freq (tpt/byte-buffer-variable-length-decode byte-buffer)]
:count freq}) {:value value
(let [val (tpt/byte-buffer-variable-length-decode byte-buffer) :count freq})
freq (tpt/byte-buffer-variable-length-decode byte-buffer) (let [val (tpt/byte-buffer-variable-length-decode byte-buffer)
size-of-index (tpt/byte-buffer-variable-length-decode byte-buffer) freq (tpt/byte-buffer-variable-length-decode byte-buffer)
_ (println "val" val "freq" freq "size" size-of-index) size-of-index (tpt/byte-buffer-variable-length-decode byte-buffer)
bb (find-key-in-index byte-buffer (first ks) (+ (.position byte-buffer) size-of-index))] _ (println "val" val "freq" freq "size" size-of-index)
(recur (rest ks)))))))) offset (find-key-in-index
byte-buffer
(first ks)
(+ (.position byte-buffer) size-of-index)
:not-found)]
(if (= offset :not-found)
not-found
(do (.position byte-buffer (- current-address offset))
(recur (rest ks)))))))))))
(comment (comment
(let [v1 '(1 2 1 121) (let [v1 '(1 2 1 121)
@ -602,8 +640,8 @@
vect (as-vec t3) vect (as-vec t3)
packed (tightly-packed-trie t3) packed (tightly-packed-trie t3)
tpt (->TightlyPackedTrie packed)] tpt (->TightlyPackedTrie packed)]
(println packed) (as-map (as-byte-array t3))
(get tpt '(1))) (get tpt '(1 2 1)))
) )

Loading…
Cancel
Save