Gen from grammar

main
Eric Ihli 4 years ago
parent 47a29e96a9
commit 1c3a07708a

@ -9,7 +9,8 @@
[taoensso.nippy :as nippy]
[com.owoga.prhyme.nlp.tag-sets.treebank-ii :as tb2]
[clojure.zip :as zip]
[cljol.dig9 :as d]))
[cljol.dig9 :as d]
[com.owoga.prhyme.util.math :as math]))
(def re-word
"Regex for tokenizing a string into words
@ -196,6 +197,15 @@
[k (string/lower-case (first v))]
[k v]))
(defn flatten-trie-entry-to-all-subkeys
[[k v]]
(loop [result []
k k]
(if (empty? k)
result
(recur (conj result [k v])
(rest k)))))
(defn process-text
"Processes text into key value pairs where
the keys are parts-of-speech paths and the values
@ -221,15 +231,6 @@
(mapv (fn [[k v]]
(clojure.lang.MapEntry. (into (vec k) [v]) v)))))
(defn flatten-trie-entry-to-all-subkeys
[[k v]]
(loop [result []
k k]
(if (empty? k)
result
(recur (conj result [k v])
(rest k)))))
(comment
(flatten-trie-entry-to-all-subkeys
'[(TOP S NP) (NP PP)])
@ -299,7 +300,7 @@
(fn [trie entries]
(reduce
(fn [trie [k v]]
(update trie k (fnil #(update % 1 inc) [k 0])))
(update trie k (fnil inc 0)))
trie
entries)))
(trie/make-trie)
@ -309,6 +310,7 @@
(time
(def test-load-trie
(into (trie/make-trie) (nippy/thaw-from-file "/tmp/test-trie.bin"))))
(take 20 test-load-trie)
(time (do
(d/sum [test-trie])
@ -403,46 +405,205 @@
:else
(recur (zip/next zipper)))))
(pick-grammar test-trie @test-database)
(->> (trie/lookup test-trie [7 8 8 8])
(trie/children)
(map #(get % []))
(remove nil?))
(->> (trie/lookup test-trie [7])
(trie/children)
(map #(get % []))
(remove nil?));; => ([[7 2] 3484]
;; [[7 4] 2027]
;; [[7 6] 91]
;; [[7 16] 25]
;; [[7 21] 9]
(@test-database 2)
(->> [[1 [[2 [[3]]]]]]
(zip/vector-zip)
zip/down
zip/down
zip/right
zip/down
zip/down
zip/right
zip/down
zip/down
zip/path
(map first)
(filter integer?)
)
(let [queue (clojure.lang.PersistentQueue/EMPTY)]
(peek (pop (into queue [1 2 3]))))
(->> (map #(get % []) (trie/children (trie/lookup test-trie [7])))
(remove nil?))
(get @test-database 7)
(take 10 (pick-grammar test-trie))
(defn grammar-children
[k]
(sort-by
(comp - last)
(map #(vector (.key %) (@test-database (.key %)) (get % []))
(remove (comp nil? #(get % [])) (trie/children (trie/lookup test-trie k))))))
(defn grammar-branch?
[trie database k]
(vector (database (last k))))
(defn children
[trie database k]
(->> (trie/lookup trie k)
(trie/children)
(map #(vector (.key %) (get % [])))
(remove (comp nil? second))
(sort-by (comp - second))))
(defn choose
[trie database k]
(math/weighted-selection
second
(children trie database k)))
(comment
(choose test-trie @test-database [1])
(let [z1 (zip/vector-zip [1 [2]])
z2 (zip/vector-zip [3 [4]])]
(->> z1
zip/down
zip/right
zip/down
(#(zip/insert-right % (zip/node z2)))
(zip/root))))
(defn generate
[trie database zipper]
(let [k (map first (zip/path zipper))]
(do (Thread/sleep 10) (println k))
(if (vector? (database (last k)))
(loop [zipper zipper]
(let [children (last (map first (zip/path zipper)))]
(Thread/sleep 50) (println children (zip/root zipper))
(if (empty? children)
zipper
(recur
(-> zipper
zip/up
(zip/append-child [(first children)])
(zip/down)
(zip/rightmost)
(zip/down)
(#(generate trie database %))
(zip/up)
(zip/up)
(zip/down)
(zip/replace (subvec 1 children)))))))
(zip/insert-right
zipper
(choose trie database k)))))
(defn generate
[trie database zipper]
(cond
(zip/end? zipper)
(zip/root zipper)
(seqable? (zip/node zipper))
(recur trie database (zip/next zipper))
(symbol? (zip/node zipper))
(recur trie database (zip/next zipper))
(symbol? (database (zip/node zipper)))
(let [sym (database (zip/node zipper))
sym-path (->> (map first (zip/path zipper))
butlast
(filter symbol?)
(#(concat % (list sym))))
path (map database sym-path)
choice (first (choose trie database path))]
(recur
trie
database
(-> zipper
(zip/replace
[sym choice])
(zip/root)
(zip/vector-zip))))
(string? (database (zip/node zipper)))
(let [terminal (database (zip/node zipper))
path (->> (map first (zip/path zipper))
butlast
(filter symbol?))]
(recur
trie
database
(-> zipper
(zip/replace
terminal)
(zip/next)
(zip/root)
(zip/vector-zip))))
:else
(recur
trie
database
(-> zipper
(zip/replace
(mapv
database
(database (zip/node zipper))))
(zip/next)
(zip/root)
(zip/vector-zip)))))
(comment
(trie/lookup test-trie [1])
(->> (generate test-trie @test-database (zip/vector-zip [1]))
(zip/vector-zip)
(iterate zip/next)
(take-while (complement zip/end?))
(map zip/node)
(filter string?))
(-> [:a [:b] [:b]]
zip/vector-zip
zip/down
zip/right
zip/right
zip/down
zip/path)
;; => [[:a [:b] [:b]] [:b]]
(-> [:a [:b] [:b]]
zip/vector-zip
zip/down
zip/right
zip/down
zip/path)
;; => [[:a [:b] [:b]] [:b]]
(@test-database 2)
(->> [1 [2]]
zip/vector-zip
zip/down
zip/right
zip/path
#_(map first))
(pick-grammar test-trie @test-database)
(get test-trie [1 3 62])
(map @test-database ['NN])
(@test-database "time")
(take 5 test-trie)
(->> (trie/lookup test-trie [1])
(trie/children)
(map #(vector (.key %) (get % [])))
(remove nil?))
(->> (trie/lookup test-trie [7])
(trie/children)
(map #(get % []))
(remove nil?)) ;; => ([[7 2] 3484]
;; [[7 4] 2027]
;; [[7 6] 91]
;; [[7 16] 25]
;; [[7 21] 9]
(@test-database 2)
(->> [[1 [[2 [[3]]]]]]
(zip/vector-zip)
zip/down
zip/down
zip/right
zip/down
zip/down
zip/right
zip/down
zip/down
zip/path
(map first)
(filter integer?)
)
(let [queue (clojure.lang.PersistentQueue/EMPTY)]
(peek (pop (into queue [1 2 3]))))
(->> (map #(get % []) (trie/children (trie/lookup test-trie [7])))
(remove nil?))
(get @test-database 7)
(take 10 (pick-grammar test-trie)))
(comment
(let [database (atom {:next-id 1})

@ -706,8 +706,8 @@
zip/seq-zip
(iterate zip/next)
(take-while (complement zip/end?))
(filter #(string? (zip/node %)))
(map zip/node)))
(map zip/node)
(filter string?)))
(comment
(let [corpus ["this is a test"

@ -319,7 +319,7 @@
(map
(fn [r]
(* (inc r) (/ (lm (inc r)) (lm r))))
(partition 2 1 (conj rs (inc (peek rs)))))]))
rs)]))
(comment
(let [rs [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26]

Loading…
Cancel
Save