More markov code

main
Eric Ihli 3 years ago
parent f42cdfb59a
commit 1c423f49e9

@ -4,12 +4,14 @@
[com.owoga.prhyme.data.dictionary :as dict]
[com.owoga.prhyme.nlp.core :as nlp]
[com.owoga.prhyme.data-transform :as data-transform]
[com.owoga.prhyme.util.math :as math]
[com.owoga.trie :as trie]
[com.owoga.tightly-packed-trie :as tpt]
[com.owoga.tightly-packed-trie.encoding :as encoding]
[clojure.string :as string]
[clojure.java.io :as io]
[com.owoga.phonetics :as phonetics]
[com.owoga.phonetics.syllabify :as syllabify]
[taoensso.nippy :as nippy]))
(defn clean-text [text]
@ -438,3 +440,150 @@
(phonetics/get-phones "brasilia")
)
(defn choice->n-gram
[{:keys [database]} choice]
(map database (first choice)))
(defn weighted-selection-from-choices
[choices]
(math/weighted-selection
(comp second second)
choices))
(ns-unmap (find-ns 'com.owoga.corpus.markov) 'rhyme-choices)
(defmulti rhyme-choices
"Returns a list of words that end with the same phones
as the target. If the target is a string, converts the string to phones."
(fn [trie target] (class target)))
(defmethod rhyme-choices String
[trie phrase]
(let [phones (phonetics/get-phones phrase)]
(->> phones
(map reverse)
(mapcat (partial rhyme-choices trie))
(remove empty?))))
(defmethod rhyme-choices :default
[trie phones]
(->> (trie/lookup trie phones)
(remove (comp nil? second))
(map #(update % 0 into (reverse phones)))))
(comment
(let [rhyme-trie (trie/make-trie ["G" "AA1" "B"] "bog" ["G" "AO1" "B"] "bog"
["T" "AA1" "H"] "hot" ["G" "AO1" "F"] "fog")]
[(rhyme-choices rhyme-trie ["G" "AO1"])
(rhyme-choices rhyme-trie "fog")
(rhyme-choices rhyme-trie "bog")])
;; => [([("G" "AO1" "B") "bog"] [("G" "AO1" "F") "fog"])
;; ([("G" "AO1" "F") "fog"])
;; ([("G" "AA1" "B") "bog"] [("G" "AO1" "B") "bog"])]
)
(defn rhyme-choices-walking-target-rhyme
"All target rhymes need to be in phone form.
If we try to turn string form into phone form,
we'd sometimes be forced to deal with multiple pronunciations.
By only handling phone form here, the caller can handle multiple pronunciations.
Makes for a cleaner API."
[trie target-rhyme]
(loop [target-rhyme target-rhyme
result []]
(let [choices (rhyme-choices trie target-rhyme)]
(println target-rhyme choices result)
(if (or (empty? target-rhyme) (prhyme/last-primary-stress? (reverse target-rhyme)))
(into result choices)
(recur (butlast target-rhyme)
(into result choices))))))
(comment
(let [words ["bloodclot" "woodrot" "moonshot" "dot" "bog" "pat" "pot" "lot"]
phones (mapcat prhyme/phrase->all-flex-rhyme-tailing-consonants-phones words)
rhyme-trie (reduce
(fn [trie [phones word]]
(update trie phones (fnil conj #{}) [phones word]))
(trie/make-trie)
(map #(update % 0 reverse) phones))]
(rhyme-choices-walking-target-rhyme
rhyme-trie
(reverse (first (first (prhyme/phrase->all-flex-rhyme-tailing-consonants-phones "tight knot"))))))
;; => [[("T" "AA1" "AH1") #{[("T" "AA1" "AH1") "bloodclot"]}]
;; [("T" "AA1" "UH1") #{[("T" "AA1" "UH1") "woodrot"]}]
;; [("T" "AA1" "UW1") #{[("T" "AA1" "UW1") "moonshot"]}]
;; [("T" "AA1")
;; #{[("T" "AA1") "dot"] [("T" "AA1") "pot"] [("T" "AA1") "lot"]}]]
)
(defn get-next-markov
[{:keys [trie database] :as context} seed]
(let [seed (take-last 3 seed)
node (trie/lookup trie seed)
children (and node
(->> node
trie/children
(map (fn [^com.owoga.trie.ITrie child]
[(.key child)
(get child [])]))
(remove (comp nil? second))
(remove
(fn [[k v]]
(#{1 38} k)))))]
(cond
(nil? node) (recur context (rest seed))
(seq children)
(if (< (rand) (/ (apply max (map (comp second second) children))
(apply + (map (comp second second) children))))
(recur context (rest seed))
(first (math/weighted-selection (comp second second) children)))
(> (count seed) 0)
(recur context (rest seed))
:else (throw (Exception. "Error")))))
(defn get-next-markov-from-phrase-backwards
[{:keys [database trie] :as context} phrase n]
(let [word-ids (->> phrase
(#(string/split % #" "))
(take n)
(reverse)
(map database))]
(database (get-next-markov context word-ids))))
(defn generate-n-syllable-sentence-rhyming-with
[context target-phrase n-gram-rank target-rhyme-syllable-count target-sentence-syllable-count]
(if (string? target-phrase)
(let [target-phrase-words (string/split target-phrase #" ")
reversed-target-phrase (string/join " " (reverse target-phrase-words))
target-rhyme
(->> (prhyme/take-words-amounting-to-at-least-n-syllables
reversed-target-phrase
target-rhyme-syllable-count)
(#(string/split % #" "))
reverse
(string/join " "))
rhyming-n-gram (->> (rhyming-n-gram-choices context target-rhyme)
(weighted-selection-from-choices)
(choice->n-gram context)
(string/join " "))]
(loop [phrase rhyming-n-gram]
(if (<= target-sentence-syllable-count (prhyme/count-syllables-of-phrase phrase))
phrase
(recur
(str (get-next-markov-from-phrase-backwards context phrase n-gram-rank)
" "
phrase)))))
(let [target-rhyme
(->> (prhyme/take-n-syllables target-phrase target-rhyme-syllable-count))
rhyming-n-gram (->> (rhyming-n-gram-choices context target-rhyme)
(weighted-selection-from-choices)
(choice->n-gram context)
(string/join " "))]
(loop [phrase rhyming-n-gram]
(if (<= target-sentence-syllable-count (prhyme/count-syllables-of-phrase phrase))
phrase
(recur
(str (get-next-markov-from-phrase-backwards context phrase n-gram-rank)
" "
phrase)))))))

@ -228,6 +228,16 @@
[phone]
(phonetics/vowel (string/replace phone #"\d" "")))
(defn primary-stress?
[phone]
(re-find #"1" phone))
(defn last-primary-stress?
[phones]
(and (seq phones)
(primary-stress? (first phones))
(not-any? primary-stress? (rest phones))))
(defn take-vowels-and-tail-consonants
"HOPSCOTCH -> AA1 AA2 CH
@ -343,6 +353,89 @@
(phrase->flex-rhyme-phones "hog")
)
;;;; Utilities
;;
;;
(defn phrase->flex-rhyme-phones
"Takes a space-seperated string of words
and returns the concatenation of the words
vowel phones.
Returns them in reversed order so they
are ready to be used in a lookup of a rhyme trie.
"
[phrase]
(->> phrase
(#(string/split % #" "))
(map (comp syllabify/syllabify first phonetics/get-phones))
(map (partial reduce into []))
(map #(filter (partial re-find #"\d") %))
(flatten)
(map #(string/replace % #"\d" ""))
(reverse)))
(defn take-words-amounting-to-at-least-n-syllables
"This function is nice to grab the tail end of a sentence for making a good rhyme.
If the sentence ends with a single-syllable word, like 'me', but a more
interesting n-gram like 'bother me', then you might want to explore the rhymes
available for the last N syllables. Sure, a word like 'poverty' would show up if you
got all rhymes for 'me'. But you'd have to filter through a lot more less-than-great
rhymes before you see it."
[phrase n]
(letfn [(phones [word]
[word (first (phonetics/get-phones word))])
(syllables [[word phones]]
[word (syllabify/syllabify phones)])]
(->> phrase
(#(string/split % #" "))
(map phones)
(map syllables)
(reduce
(fn [result [word syllables]]
(if (<= n (count (mapcat second result)))
(reduced result)
(conj result [word syllables])))
[])
(map first)
(string/join " "))))
(comment
(take-words-amounting-to-at-least-n-syllables
"police can bother me" 3);; => "police can"
(take-words-amounting-to-at-least-n-syllables
"police can bother me" 4);; => "police can bother"
)
(defn take-n-syllables
"Returns the vowel sounds that make up the last n syllables.
Doesn't return stress."
[phrase n]
(if (string? phrase)
(->> phrase
(phrase->flex-rhyme-phones)
(take n)
(reverse))
(take-last n phrase)))
(comment
(take-n-syllables "bother me" 2);; => ("ER" "IY")
)
(defn count-syllables-of-phrase
[phrase]
(->> phrase
(#(string/split % #" "))
(map phonetics/get-phones)
(map first)
(mapcat syllabify/syllabify)
count))
(comment
(count-syllables-of-phrase "police can bother me") ;; => 6
)
(defn words-by-rime* [words]
(let [words-with-rime (->> words
(map rest)

@ -2,6 +2,7 @@
(:require [clojure.zip :as zip]
[clojure.string :as string]
[taoensso.timbre :as timbre]
[com.owoga.prhyme.core :as prhyme]
[com.owoga.prhyme.util.math :as math]
[com.owoga.phonetics.syllabify :as owoga.syllabify]
[com.owoga.phonetics :as owoga.phonetics]
@ -17,76 +18,6 @@
[com.owoga.tightly-packed-trie :as tpt]
[com.owoga.prhyme.nlp.tag-sets.treebank-ii :as tb2]))
;;;; Utilities
;;
;;
(defn phrase->flex-rhyme-phones
"Takes a space-seperated string of words
and returns the concatenation of the words
vowel phones.
Returns them in reversed order so they
are ready to be used in a lookup of a rhyme trie.
"
[phrase]
(->> phrase
(#(string/split % #" "))
(map (comp owoga.syllabify/syllabify first owoga.phonetics/get-phones))
(map (partial reduce into []))
(map #(filter (partial re-find #"\d") %))
(flatten)
(map #(string/replace % #"\d" ""))
(reverse)))
(defn take-words-amounting-to-at-least-n-syllables
"This function is nice to grab the tail end of a sentence for making a good rhyme.
If the sentence ends with a single-syllable word, like 'me', but a more
interesting n-gram like 'bother me', then you might want to explore the rhymes
available for the last N syllables. Sure, a word like 'poverty' would show up if you
got all rhymes for 'me'. But you'd have to filter through a lot more less-than-great
rhymes before you see it."
[phrase n]
(letfn [(phones [word]
[word (first (owoga.phonetics/get-phones word))])
(syllables [[word phones]]
[word (owoga.syllabify/syllabify phones)])]
(->> phrase
(#(string/split % #" "))
(map phones)
(map syllables)
(reduce
(fn [result [word syllables]]
(if (<= n (count (mapcat second result)))
(reduced result)
(conj result [word syllables])))
[])
(map first)
(string/join " "))))
(comment
(take-words-amounting-to-at-least-n-syllables
"police can bother me" 3);; => "police can"
(take-words-amounting-to-at-least-n-syllables
"police can bother me" 4);; => "police can bother"
)
(defn take-n-syllables
"Returns the vowel sounds that make up the last n syllables.
Doesn't return stress."
[phrase n]
(if (string? phrase)
(->> phrase
(phrase->flex-rhyme-phones)
(take n)
(reverse))
(take-last n phrase)))
(comment
(take-n-syllables "bother me" 2);; => ("ER" "IY")
)
;;;; Much of the code below is related to grammar generation.
(defn update-values [m f & args]
@ -583,7 +514,7 @@
(defn markov-generate-grammar-with-rhyming-tail
[grammar-trie grammar-database rhyme-trie rhyme-database rhyme-target zipper]
(let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target)
(let [rhyme-phones (prhyme/phrase->flex-rhyme-phones rhyme-target)
rhyme-options (examples.tpt/rhyming-n-gram-choices
{:database rhyme-database
:flex-rhyme-trie rhyme-trie}
@ -1005,7 +936,7 @@
(defn markov-complete-grammar-with-rhyming-tail
[grammar-trie grammar-database rhyme-trie rhyme-database grammar rhyme-target]
(let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target)
(let [rhyme-phones (prhyme/phrase->flex-rhyme-phones rhyme-target)
rhyme-options (examples.tpt/rhyming-n-gram-choices
{:database rhyme-database
:flex-rhyme-trie rhyme-trie}

Loading…
Cancel
Save