Add nippy for pos and grammar freqs
parent
75218e770b
commit
c193152a79
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,2 @@
|
||||
(ns com.owoga.prhyme.corpus.db
|
||||
(:require [integrant.core :as ig]))
|
@ -0,0 +1,332 @@
|
||||
(ns com.owoga.prhyme.nlg.prhyme-nlg
|
||||
(:require [clojure.zip :as zip]
|
||||
[clojure.string :as string]
|
||||
[taoensso.timbre :as timbre]
|
||||
[examples.core :as examples]
|
||||
[taoensso.nippy :as nippy]
|
||||
[com.owoga.prhyme.nlp.core :as nlp]
|
||||
[com.owoga.prhyme.util.weighted-rand :as weighted-rand]
|
||||
[clojure.set :as set]))
|
||||
|
||||
(defn update-values [m f & args]
|
||||
(reduce
|
||||
(fn [acc [k v]]
|
||||
(assoc acc k (apply f v args)))
|
||||
{}
|
||||
m))
|
||||
|
||||
(defn generate
|
||||
[pos-path->word-freqs
|
||||
pos->word-freqs
|
||||
target-parse-tree]
|
||||
(loop [parse-zipper (zip/seq-zip target-parse-tree)]
|
||||
(cond
|
||||
(zip/end? parse-zipper) (zip/root parse-zipper)
|
||||
|
||||
(zip/branch? parse-zipper)
|
||||
(recur (zip/next parse-zipper))
|
||||
|
||||
(string? (zip/node parse-zipper))
|
||||
(recur (zip/next parse-zipper))
|
||||
|
||||
(and (symbol? (zip/node parse-zipper))
|
||||
(or (pos->word-freqs (zip/node parse-zipper))
|
||||
(pos-path->word-freqs (seq (map first (zip/path parse-zipper))))))
|
||||
(let [target-path (seq (map first (zip/path parse-zipper)))
|
||||
target-pos (zip/node parse-zipper)
|
||||
pos-path-word (pos-path->word-freqs target-path)
|
||||
pos-word (pos->word-freqs target-pos)]
|
||||
(timbre/info "Choosing POS for" target-path)
|
||||
(let [selection (weighted-rand/weighted-selection-from-map
|
||||
(merge-with +
|
||||
(update-values pos-path-word * 2)
|
||||
pos-word))]
|
||||
(timbre/info "from" (take 5
|
||||
(merge-with +
|
||||
(update-values pos-path-word * 2)
|
||||
pos-word)))
|
||||
(timbre/info "Chose " selection)
|
||||
(recur
|
||||
(-> parse-zipper
|
||||
zip/up
|
||||
(#(zip/replace % (list (zip/node (zip/down %)) selection)))
|
||||
zip/next
|
||||
zip/next))))
|
||||
|
||||
:else
|
||||
(recur (zip/next parse-zipper)))))
|
||||
|
||||
(defn next-word [zipper]
|
||||
(->> zipper
|
||||
nlp/iter-zip
|
||||
(filter #(string? (zip/node %)))
|
||||
first
|
||||
(#(if (nil? %) :end (zip/node %)))))
|
||||
|
||||
(defn next-two-words [nodes]
|
||||
(timbre/info
|
||||
(apply list (->> nodes
|
||||
(map zip/node))))
|
||||
(->> nodes
|
||||
(filter #(string? (zip/node %)))
|
||||
(take 2)
|
||||
(map #(if (nil? %) :end (zip/node %)))))
|
||||
|
||||
(comment
|
||||
(let [zipper (zip/seq-zip '(TOP (S (NN "Eric") (VBZ "is") (JJ "testing"))))]
|
||||
(->> zipper
|
||||
nlp/iter-zip
|
||||
reverse
|
||||
next-two-words))
|
||||
|
||||
)
|
||||
|
||||
(defn set-last [zipper f]
|
||||
(let [last-node (->> zipper
|
||||
(iterate zip/next)
|
||||
(take-while (complement zip/end?))
|
||||
last
|
||||
zip/prev)]
|
||||
(-> last-node
|
||||
(zip/replace (f last-node)))))
|
||||
|
||||
(comment
|
||||
(let [zipper (zip/seq-zip '(TOP (S (NN) (VBZ "is") (JJ))))]
|
||||
(-> zipper
|
||||
(set-last (fn [node] (list (zip/node (zip/next node)) "bad")))
|
||||
zip/root))
|
||||
|
||||
)
|
||||
|
||||
(defn generate-with-markov
|
||||
[pos-path->word-freqs
|
||||
pos->word-freqs
|
||||
target-parse-tree
|
||||
markov]
|
||||
(loop [parse-zipper (zip/seq-zip target-parse-tree)]
|
||||
(cond
|
||||
(zip/end? parse-zipper) (zip/root parse-zipper)
|
||||
|
||||
(zip/branch? parse-zipper)
|
||||
(recur (zip/next parse-zipper))
|
||||
|
||||
(string? (zip/node parse-zipper))
|
||||
(recur (zip/next parse-zipper))
|
||||
|
||||
(and (symbol? (zip/node parse-zipper))
|
||||
(or (pos->word-freqs (zip/node parse-zipper))
|
||||
(pos-path->word-freqs (seq (map first (zip/path parse-zipper))))))
|
||||
(let [target-path (seq (map first (zip/path parse-zipper)))
|
||||
target-pos (zip/node parse-zipper)
|
||||
pos-path-word (pos-path->word-freqs target-path)
|
||||
pos-word (pos->word-freqs target-pos)
|
||||
markov-options (markov (reverse (next-two-words (nlp/iter-zip
|
||||
parse-zipper
|
||||
zip/prev
|
||||
nil?))))]
|
||||
(timbre/info "Markov options are"
|
||||
(apply list (next-two-words (nlp/iter-zip
|
||||
parse-zipper
|
||||
zip/prev
|
||||
nil?)))
|
||||
(apply list (take 3 markov-options)))
|
||||
(timbre/info "Choosing POS for" target-path)
|
||||
(let [selection (weighted-rand/weighted-selection-from-map
|
||||
(merge-with
|
||||
*
|
||||
(update-values markov-options * 10)
|
||||
(update-values pos-path-word * 2)
|
||||
pos-word))]
|
||||
(timbre/info "from" (apply
|
||||
list
|
||||
(take
|
||||
5
|
||||
(merge-with
|
||||
*
|
||||
(update-values markov-options * 10)
|
||||
(update-values pos-path-word * 2)
|
||||
pos-word))))
|
||||
(timbre/info "Chose " selection)
|
||||
(recur
|
||||
(-> parse-zipper
|
||||
zip/up
|
||||
(#(zip/replace % (list (zip/node (zip/down %)) selection)))
|
||||
zip/next
|
||||
zip/next))))
|
||||
|
||||
:else
|
||||
(recur (zip/next parse-zipper)))))
|
||||
|
||||
(defn generate-with-markov-with-custom-progression
|
||||
"Sams as above, but with next/prev and stop fns"
|
||||
[next
|
||||
prev
|
||||
next-stop?
|
||||
prev-stop?
|
||||
pos-path->word-freqs
|
||||
pos->word-freqs
|
||||
parse-zipper
|
||||
markov]
|
||||
(loop [parse-zipper parse-zipper]
|
||||
(cond
|
||||
(nil? (next parse-zipper)) (zip/root parse-zipper)
|
||||
|
||||
(next-stop? parse-zipper) (zip/root parse-zipper)
|
||||
|
||||
(zip/branch? parse-zipper)
|
||||
(recur (next parse-zipper))
|
||||
|
||||
(string? (zip/node parse-zipper))
|
||||
(recur (next parse-zipper))
|
||||
|
||||
(and (symbol? (zip/node parse-zipper))
|
||||
(or (pos->word-freqs (zip/node parse-zipper))
|
||||
(pos-path->word-freqs (seq (map first (zip/path parse-zipper))))))
|
||||
(let [target-path (seq (map first (zip/path parse-zipper)))
|
||||
target-pos (zip/node parse-zipper)
|
||||
pos-path-word (pos-path->word-freqs target-path)
|
||||
pos-word (pos->word-freqs target-pos)
|
||||
pos-map (merge-with
|
||||
(fn [a b] (* 1.5 (+ a b)))
|
||||
pos-path-word
|
||||
pos-word)
|
||||
markov-options (markov (reverse
|
||||
(next-two-words (nlp/iter-zip
|
||||
parse-zipper
|
||||
prev
|
||||
prev-stop?))))
|
||||
selection-possibilities (merge-with
|
||||
(fn [a b]
|
||||
(let [max-pos (apply max (vals pos-map))]
|
||||
(+ a b max-pos)))
|
||||
pos-map
|
||||
markov-options)]
|
||||
(timbre/info "Markov options are"
|
||||
(apply list (next-two-words (nlp/iter-zip
|
||||
parse-zipper
|
||||
prev
|
||||
prev-stop?)))
|
||||
(apply list (take 10 markov-options)))
|
||||
(timbre/info "Choosing POS for" target-path)
|
||||
(let [selection (weighted-rand/weighted-selection-from-map
|
||||
selection-possibilities)]
|
||||
(timbre/info
|
||||
"Most likely selection possibilities"
|
||||
(apply list (take 5 (reverse (sort-by second selection-possibilities)))))
|
||||
(timbre/info "Chose " selection)
|
||||
(recur
|
||||
(-> parse-zipper
|
||||
zip/up
|
||||
(#(zip/replace % (list (zip/node (zip/down %)) selection)))
|
||||
zip/down
|
||||
next
|
||||
next))))
|
||||
|
||||
:else
|
||||
(recur (next parse-zipper)))))
|
||||
|
||||
(comment
|
||||
(let [structure '(TOP (S (NP (DT) (JJ) (NN))
|
||||
(VP (VBZ))
|
||||
(NP (DT) (JJ) (NN))))
|
||||
structure (-> structure
|
||||
zip/seq-zip
|
||||
nlp/iter-zip
|
||||
last)
|
||||
pos-freqs (examples/pos-paths->pos-freqs
|
||||
examples/t1)]
|
||||
(repeatedly
|
||||
10
|
||||
(fn []
|
||||
(->> (generate-with-markov-with-custom-progression
|
||||
zip/prev
|
||||
zip/next
|
||||
nil?
|
||||
zip/end?
|
||||
examples/t1
|
||||
pos-freqs
|
||||
structure
|
||||
examples/darkov-2)
|
||||
nlp/leaf-nodes
|
||||
(string/join " ")))))
|
||||
|
||||
(timbre/set-level! :info)
|
||||
(timbre/set-level! :error)
|
||||
|
||||
(let [pos-path->word-freqs
|
||||
{'(S N) {"Eric" 1 "Edgar" 2}
|
||||
'(S V) {"tests" 2 "runs" 1}}
|
||||
pos->word-freqs
|
||||
{'N {"Edward" 1}
|
||||
'V {"breaks" 1}}
|
||||
target-parse-tree
|
||||
'(S (N) (V))]
|
||||
(-> (generate
|
||||
pos-path->word-freqs
|
||||
pos->word-freqs
|
||||
target-parse-tree)))
|
||||
(time (def example-pos-freqs examples/example-pos-freqs))
|
||||
(nippy/thaw)
|
||||
(nippy/freeze-to-file "resources/1000-pos-path-freqs.nip" example-pos-freqs)
|
||||
|
||||
(time (def example-structures examples/example-structures))
|
||||
(weighted-rand/weighted-selection-from-map
|
||||
example-structures)
|
||||
|
||||
|
||||
|
||||
(take 5 examples/t2)
|
||||
(let [structure '(TOP (S (NP (DT) (JJ) (NN))
|
||||
(VP (VBZ))
|
||||
(NP (DT) (JJ) (NN))))
|
||||
structure (-> structure
|
||||
zip/seq-zip
|
||||
nlp/iter-zip
|
||||
last)
|
||||
pos-freqs (examples/pos-paths->pos-freqs
|
||||
examples/t1)]
|
||||
(repeatedly
|
||||
10
|
||||
(fn []
|
||||
(->> (generate-with-markov-with-custom-progression
|
||||
zip/prev
|
||||
zip/next
|
||||
nil?
|
||||
zip/end?
|
||||
examples/t1
|
||||
pos-freqs
|
||||
structure
|
||||
examples/darkov-2)
|
||||
nlp/leaf-nodes
|
||||
(string/join " ")))))
|
||||
|
||||
(repeatedly
|
||||
10
|
||||
(fn []
|
||||
(let [structure (weighted-rand/weighted-selection-from-map
|
||||
(->> examples/t2
|
||||
(sort-by second)
|
||||
(reverse)
|
||||
(take 20)))
|
||||
structure (-> structure
|
||||
zip/seq-zip
|
||||
nlp/iter-zip
|
||||
last)
|
||||
pos-freqs (examples/pos-paths->pos-freqs
|
||||
examples/t1)]
|
||||
(repeatedly
|
||||
10
|
||||
(fn []
|
||||
(->> (generate-with-markov-with-custom-progression
|
||||
zip/prev
|
||||
zip/next
|
||||
nil?
|
||||
zip/end?
|
||||
examples/t1
|
||||
pos-freqs
|
||||
structure
|
||||
examples/darkov-2)
|
||||
nlp/leaf-nodes
|
||||
(string/join " ")))))))
|
||||
|
||||
)
|
Loading…
Reference in New Issue