Nicely working markov demo

main
Eric Ihli 3 years ago
parent 01280ff29c
commit 7006069aa3

@ -399,22 +399,12 @@
(let [files (->> "dark-corpus" (let [files (->> "dark-corpus"
io/file io/file
file-seq file-seq
(eduction (xf-file-seq 0 1000))) (eduction (xf-file-seq 0 5000)))
[trie database] (train-backwards files 1 4 "/tmp/trie.bin" "/tmp/database.bin" "/tmp/tpt.bin")])) [trie database] (train-backwards files 1 4 "/tmp/trie.bin" "/tmp/database.bin" "/tmp/tpt.bin")]))
(def trie (into (trie/make-trie) (nippy/thaw-from-file "/tmp/trie.bin"))) (def markov-trie (into (trie/make-trie) (nippy/thaw-from-file "/tmp/trie.bin")))
(def database (nippy/thaw-from-file "/tmp/database.bin"))
(take 5 trie) (def markov-tight-trie (tpt/tightly-packed-trie markov-trie encode-fn (decode-fn db)))
;; => ([(0 0 0 1) [1 2]]
;; [(0 0 0 3) [3 1]]
;; [(0 0 0 4) [4 1]]
;; [(0 0 0 5) [5 8]]
;; [(0 0 0 10) [10 1]])
(def tight (tpt/tightly-packed-trie trie encode-fn (decode-fn db)))
tight
(def db (nippy/thaw-from-file "/tmp/database.bin"))
(db 4)
) )
@ -432,13 +422,7 @@
(let [database (atom (nippy/thaw-from-file "/tmp/database.bin"))] (let [database (atom (nippy/thaw-from-file "/tmp/database.bin"))]
(gen-rhyme-model prhyme/phrase->all-flex-rhyme-tailing-consonants-phones database "/tmp/rhyme-trie.bin")) (gen-rhyme-model prhyme/phrase->all-flex-rhyme-tailing-consonants-phones database "/tmp/rhyme-trie.bin"))
(def rt (into (trie/make-trie) (nippy/thaw-from-file "/tmp/rhyme-trie.bin"))) (def rhyme-trie (into (trie/make-trie) (nippy/thaw-from-file "/tmp/rhyme-trie.bin")))
(take 100 rt)
(prhyme/phrase->all-flex-rhyme-tailing-consonants-phones "brasilia")
(phonetics/get-phones "brasilia")
) )
(defn choice->n-gram (defn choice->n-gram
@ -470,7 +454,8 @@
[trie phones] [trie phones]
(->> (trie/lookup trie phones) (->> (trie/lookup trie phones)
(remove (comp nil? second)) (remove (comp nil? second))
(map #(update % 0 into (reverse phones))))) (map #(update % 0 into (reverse phones)))
(map #(update % 0 vec))))
(comment (comment
(let [rhyme-trie (trie/make-trie ["G" "AA1" "B"] "bog" ["G" "AO1" "B"] "bog" (let [rhyme-trie (trie/make-trie ["G" "AA1" "B"] "bog" ["G" "AO1" "B"] "bog"
@ -519,7 +504,9 @@
(defn get-next-markov (defn get-next-markov
"Weighted selection from markov model with backoff. "Weighted selection from markov model with backoff.
Expects markov key/values to be [k1 k2 k3] [<value> freq]." Expects markov key/values to be [k1 k2 k3] [<value> freq]."
[markov-trie seed] ([markov-trie seed]
(get-next-markov markov-trie seed (constantly false)))
([markov-trie seed remove-fn]
(let [seed (take-last 3 seed) (let [seed (take-last 3 seed)
node (trie/lookup markov-trie seed) node (trie/lookup markov-trie seed)
children (and node children (and node
@ -529,20 +516,21 @@
; Get key and frequency of each child ; Get key and frequency of each child
[(.key child) [(.key child)
(get child [])])) (get child [])]))
(remove (comp nil? second))))] (remove (comp nil? second))
(remove remove-fn)))]
(cond (cond
; If we've never seen this n-gram, fallback to n-1-gram ; If we've never seen this n-gram, fallback to n-1-gram
(nil? node) (recur markov-trie (rest seed)) (nil? node) (recur markov-trie (rest seed) remove-fn)
(seq children) (seq children)
(if (< (rand) (/ (apply max (map (comp second second) children)) (if (< (rand) (/ (apply max (map (comp second second) children))
(apply + (map (comp second second) children)))) (apply + (map (comp second second) children))))
(recur markov-trie (rest seed)) (recur markov-trie (rest seed) remove-fn)
(first (math/weighted-selection (comp second second) children))) (first (math/weighted-selection (comp second second) children)))
(> (count seed) 0) (> (count seed) 0)
(recur markov-trie (rest seed)) (recur markov-trie (rest seed) remove-fn)
; If we have a node but no children, or if we don't have a seed, ; If we have a node but no children, or if we don't have a seed,
; we don't know how to handle that situation. ; we don't know how to handle that situation.
:else (throw (Exception. "Error"))))) :else (throw (Exception. "Error"))))))
(defn normalized-frequencies (defn normalized-frequencies
[coll] [coll]
@ -588,14 +576,18 @@
((fn [[phones words]] ((fn [[phones words]]
[[phones] (rand-nth (vec words))])))] [[phones] (rand-nth (vec words))])))]
(loop [phrase [rhyme]] (loop [phrase [rhyme]]
(if (<= target-sentence-syllable-count (if (or (= prhyme/BOS (second (peek phrase)))
(<= target-sentence-syllable-count
(prhyme/count-syllables-of-phrase (prhyme/count-syllables-of-phrase
(string/join " " (map second phrase)))) (string/join " " (map second phrase)))))
phrase phrase
(recur (recur
(conj (conj
phrase phrase
(let [word (get-next-markov markov-trie (map second phrase))] (let [word (get-next-markov
markov-trie
(into (mapv second phrase)
(vec (repeat (dec n-gram-rank) prhyme/EOS))))]
[(phonetics/get-phones word) word]))))))) [(phonetics/get-phones word) word])))))))
(comment (comment
@ -628,7 +620,7 @@
(comp - second) (comp - second)
(normalized-frequencies (normalized-frequencies
(repeatedly (repeatedly
1000 10
#(map #(map
second second
(generate-n-syllable-sentence-rhyming-with (generate-n-syllable-sentence-rhyming-with
@ -645,3 +637,57 @@
;; [("fun" "run" "has") 0.001]) ;; [("fun" "run" "has") 0.001])
) )
(defn tightly-generate-n-syllable-sentence-rhyming-with
"It's difficult to mix a tight trie with rhymes. You need
to convert ids using the database."
[database
markov-trie
rhyme-trie
target-rhyme
n-gram-rank
target-rhyme-syllable-count
target-sentence-syllable-count]
(let [rhyme (->> (rhyme-choices-walking-target-rhyme rhyme-trie target-rhyme)
rand-nth
((fn [[phones words]]
[[phones] (rand-nth (vec words))])))]
(loop [phrase [rhyme]]
(if (or (= prhyme/BOS (second (peek phrase)))
(<= target-sentence-syllable-count
(prhyme/count-syllables-of-phrase
(string/join " " (map second phrase)))))
phrase
(recur
(conj
phrase
(let [word (database
(get-next-markov
markov-trie
(into (vec (repeat (dec n-gram-rank) (database prhyme/EOS)))
(mapv (comp database second) phrase))
(fn [[lookup [word frequency]]]
(= (database prhyme/EOS) word))))]
[(phonetics/get-phones word) word])))))))
;;;; Demo
;;;;
(comment
(let [target-rhyme ["N" "AH1" "F"]]
(->> (repeatedly
10
#(->> (tightly-generate-n-syllable-sentence-rhyming-with
database
markov-trie
rhyme-trie
target-rhyme
3
3
7)
(map second)
reverse))
(map (partial remove #{prhyme/BOS}))
(map (partial string/join " "))))
)

@ -9,6 +9,9 @@
[com.owoga.prhyme.util :as u] [com.owoga.prhyme.util :as u]
[com.owoga.prhyme.syllabify :as s])) [com.owoga.prhyme.syllabify :as s]))
(def BOS "<s>")
(def EOS "</s>")
;;; Typical rhyme model (explanation of following 3 functions) ;;; Typical rhyme model (explanation of following 3 functions)
;; ;;
;; In the typical theory of syllable structure, the general structure of a ;; In the typical theory of syllable structure, the general structure of a
@ -261,7 +264,7 @@
(defn remove-non-primary-stress (defn remove-non-primary-stress
[phones] [phones]
(map (mapv
#(string/replace % #"[02-9]" "") #(string/replace % #"[02-9]" "")
phones)) phones))
@ -430,10 +433,13 @@
(map phonetics/get-phones) (map phonetics/get-phones)
(map first) (map first)
(mapcat syllabify/syllabify) (mapcat syllabify/syllabify)
(remove empty?) ;; Handle empty string.
count)) count))
(comment (comment
(count-syllables-of-phrase "police can bother me") ;; => 6 (count-syllables-of-phrase "police can bother me") ;; => 6
(count-syllables-of-phrase "to be, or not... to be");; => 6
(count-syllables-of-phrase "");; => 1
) )
(defn words-by-rime* [words] (defn words-by-rime* [words]

Loading…
Cancel
Save