|
|
@ -104,7 +104,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
(defn create-trie-from-texts [texts]
|
|
|
|
(defn create-trie-from-texts [texts]
|
|
|
|
(->> texts
|
|
|
|
(->> texts
|
|
|
|
(map #(n-to-m-grams 1 4 %))
|
|
|
|
(map #(n-to-m-grams 1 5 %))
|
|
|
|
(apply concat)
|
|
|
|
(apply concat)
|
|
|
|
(map prep-ngram-for-trie)
|
|
|
|
(map prep-ngram-for-trie)
|
|
|
|
(reduce
|
|
|
|
(reduce
|
|
|
@ -141,7 +141,7 @@
|
|
|
|
(inc i))))))
|
|
|
|
(inc i))))))
|
|
|
|
|
|
|
|
|
|
|
|
(def trie
|
|
|
|
(def trie
|
|
|
|
(let [texts (->> (dark-corpus-file-seq 500 500)
|
|
|
|
(let [texts (->> (dark-corpus-file-seq 0 1000)
|
|
|
|
(map slurp))]
|
|
|
|
(map slurp))]
|
|
|
|
(create-trie-from-texts texts)))
|
|
|
|
(create-trie-from-texts texts)))
|
|
|
|
|
|
|
|
|
|
|
@ -177,13 +177,11 @@
|
|
|
|
(map (fn [[k v]]
|
|
|
|
(map (fn [[k v]]
|
|
|
|
(let [k (map #(get trie-database %) k)]
|
|
|
|
(let [k (map #(get trie-database %) k)]
|
|
|
|
[k v])))
|
|
|
|
[k v])))
|
|
|
|
(into (trie/make-trie)))
|
|
|
|
(into (trie/make-trie)))]
|
|
|
|
tightly-packed-trie
|
|
|
|
|
|
|
|
(tpt/tightly-packed-trie
|
|
|
|
(tpt/tightly-packed-trie
|
|
|
|
tight-ready-trie
|
|
|
|
tight-ready-trie
|
|
|
|
encode-fn
|
|
|
|
encode-fn
|
|
|
|
decode-fn)]
|
|
|
|
decode-fn)))
|
|
|
|
tight-ready-trie))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(defn key-get-in-tpt [tpt db ks]
|
|
|
|
(defn key-get-in-tpt [tpt db ks]
|
|
|
|
(let [id (map #(get-in db [(list %) :id]) ks)
|
|
|
|
(let [id (map #(get-in db [(list %) :id]) ks)
|
|
|
@ -198,9 +196,6 @@
|
|
|
|
{ks (assoc v :value (get db id))}))
|
|
|
|
{ks (assoc v :value (get db id))}))
|
|
|
|
|
|
|
|
|
|
|
|
(comment
|
|
|
|
(comment
|
|
|
|
(trie/lookup tightly-packed-trie [1 28 9])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(def example-story
|
|
|
|
(def example-story
|
|
|
|
(loop [generated-text [(get trie-database "<s>")]
|
|
|
|
(loop [generated-text [(get trie-database "<s>")]
|
|
|
|
i 0]
|
|
|
|
i 0]
|
|
|
|