Experiments in memory efficient data structures
parent
2a2d5adc35
commit
3c3bc377c0
@ -0,0 +1,100 @@
|
||||
(ns com.owoga.prhyme.data.scratch
|
||||
(:require [clojure.java.io :as io]
|
||||
[cljol.dig9 :as d]
|
||||
[com.owoga.prhyme.data.dictionary :as dict])
|
||||
(:import (java.nio ByteBuffer)
|
||||
(java.io FileInputStream
|
||||
FileOutputStream
|
||||
DataOutputStream
|
||||
ByteArrayOutputStream)))
|
||||
|
||||
(def symbols
|
||||
(-> (io/reader (io/resource "cmudict-0.7b.symbols"))
|
||||
line-seq))
|
||||
|
||||
(count dict/prhyme-dict)
|
||||
(type dict/prhyme-dict)
|
||||
(def foo (take 5 dict/prhyme-dict))
|
||||
|
||||
(count foo)
|
||||
|
||||
(def g (d/sum foo))
|
||||
|
||||
(def g (d/sum (into {} (take 13000 dict/prhyme-dict))))
|
||||
(let [buf (ByteBuffer/allocate 1024)]
|
||||
(with-open [in (FileInputStream. "src/com/owoga/prhyme/data/random.txt")
|
||||
out (FileOutputStream. "src/com/owoga/prhyme/data/random.out")]
|
||||
(loop [len (.. in (getChannel) (read buf))]
|
||||
(cond
|
||||
(= len -1) (println "done")
|
||||
:else
|
||||
(do
|
||||
(println (format "read %d" len))
|
||||
(.clear buf)
|
||||
(recur (.. in (getChannel) (read buf))))))))
|
||||
|
||||
(defn ow-output-stream []
|
||||
(let [baos (ByteArrayOutputStream.)
|
||||
dos (DataOutputStream. baos)]
|
||||
dos))
|
||||
|
||||
(defn symbol->byte-array [s]
|
||||
(let [b (.getBytes s)
|
||||
r (cons (count b) b)]
|
||||
(byte-array r)))
|
||||
|
||||
(defn symbols->byte-array [s]
|
||||
(let [baos (ByteArrayOutputStream.)
|
||||
dos (DataOutputStream. baos)]
|
||||
(run!
|
||||
(fn [[i sym]]
|
||||
(let [b (.getBytes sym)]
|
||||
(.write dos (byte 0))
|
||||
(.write dos (byte (inc (count b))))
|
||||
(.write dos (byte i))
|
||||
(.writeBytes dos sym)))
|
||||
(map vector (range) s))
|
||||
(.flush dos)
|
||||
(.toByteArray baos)))
|
||||
|
||||
(def sym-array (symbols->byte-array symbols))
|
||||
|
||||
(defn sym-array-get [sym-array n]
|
||||
(loop [start 0
|
||||
end (count sym-array)]
|
||||
(let [mid (loop [mid (+ start (quot (- end start) 2))]
|
||||
(let [cur (aget sym-array mid)]
|
||||
(cond
|
||||
(= cur 0) (inc mid)
|
||||
:else (recur (dec mid)))))
|
||||
id (aget sym-array (inc mid))]
|
||||
(cond
|
||||
(or (= start (dec (count sym-array)))
|
||||
(= end 1))
|
||||
-1
|
||||
(= id n) (let [l (dec (aget sym-array mid))
|
||||
ba (byte-array l)]
|
||||
(run!
|
||||
#(aset ba % (aget sym-array (+ (+ 2 mid) %)))
|
||||
(range l))
|
||||
(apply str (map char ba)))
|
||||
(= mid start) (recur (+ start (aget sym-array mid)) end)
|
||||
(> id n) (recur start mid)
|
||||
(< id n) (recur mid end)))))
|
||||
|
||||
|
||||
(sym-array-get sym-array 1)
|
||||
|
||||
(map identity (.getBytes (last symbols)))
|
||||
|
||||
(map identity sym-array)
|
||||
(let [baos (ByteArrayOutputStream.)
|
||||
dos (DataOutputStream. baos)]
|
||||
(.writeInt dos 0)
|
||||
(run!
|
||||
#(.write dos (symbol->byte-array %))
|
||||
symbols)
|
||||
(.flush dos)
|
||||
(with-open [out (io/output-stream "src/com/owoga/prhyme/data/scratch.out")]
|
||||
(let [ba (.toByteArray baos)]
|
||||
(.write out ba 0 (count ba)))))
|
Loading…
Reference in New Issue