diff --git a/src/com/owoga/corpus/markov.clj b/src/com/owoga/corpus/markov.clj index 2927c29..2f6aabc 100644 --- a/src/com/owoga/corpus/markov.clj +++ b/src/com/owoga/corpus/markov.clj @@ -4,12 +4,14 @@ [com.owoga.prhyme.data.dictionary :as dict] [com.owoga.prhyme.nlp.core :as nlp] [com.owoga.prhyme.data-transform :as data-transform] + [com.owoga.prhyme.util.math :as math] [com.owoga.trie :as trie] [com.owoga.tightly-packed-trie :as tpt] [com.owoga.tightly-packed-trie.encoding :as encoding] [clojure.string :as string] [clojure.java.io :as io] [com.owoga.phonetics :as phonetics] + [com.owoga.phonetics.syllabify :as syllabify] [taoensso.nippy :as nippy])) (defn clean-text [text] @@ -438,3 +440,150 @@ (phonetics/get-phones "brasilia") ) + +(defn choice->n-gram + [{:keys [database]} choice] + (map database (first choice))) + +(defn weighted-selection-from-choices + [choices] + (math/weighted-selection + (comp second second) + choices)) + +(ns-unmap (find-ns 'com.owoga.corpus.markov) 'rhyme-choices) + +(defmulti rhyme-choices + "Returns a list of words that end with the same phones + as the target. If the target is a string, converts the string to phones." + (fn [trie target] (class target))) + +(defmethod rhyme-choices String + [trie phrase] + (let [phones (phonetics/get-phones phrase)] + (->> phones + (map reverse) + (mapcat (partial rhyme-choices trie)) + (remove empty?)))) + +(defmethod rhyme-choices :default + [trie phones] + (->> (trie/lookup trie phones) + (remove (comp nil? second)) + (map #(update % 0 into (reverse phones))))) + +(comment + (let [rhyme-trie (trie/make-trie ["G" "AA1" "B"] "bog" ["G" "AO1" "B"] "bog" + ["T" "AA1" "H"] "hot" ["G" "AO1" "F"] "fog")] + [(rhyme-choices rhyme-trie ["G" "AO1"]) + (rhyme-choices rhyme-trie "fog") + (rhyme-choices rhyme-trie "bog")]) + ;; => [([("G" "AO1" "B") "bog"] [("G" "AO1" "F") "fog"]) + ;; ([("G" "AO1" "F") "fog"]) + ;; ([("G" "AA1" "B") "bog"] [("G" "AO1" "B") "bog"])] + ) + +(defn rhyme-choices-walking-target-rhyme + "All target rhymes need to be in phone form. + If we try to turn string form into phone form, + we'd sometimes be forced to deal with multiple pronunciations. + By only handling phone form here, the caller can handle multiple pronunciations. + Makes for a cleaner API." + [trie target-rhyme] + (loop [target-rhyme target-rhyme + result []] + (let [choices (rhyme-choices trie target-rhyme)] + (println target-rhyme choices result) + (if (or (empty? target-rhyme) (prhyme/last-primary-stress? (reverse target-rhyme))) + (into result choices) + (recur (butlast target-rhyme) + (into result choices)))))) + +(comment + (let [words ["bloodclot" "woodrot" "moonshot" "dot" "bog" "pat" "pot" "lot"] + phones (mapcat prhyme/phrase->all-flex-rhyme-tailing-consonants-phones words) + rhyme-trie (reduce + (fn [trie [phones word]] + (update trie phones (fnil conj #{}) [phones word])) + (trie/make-trie) + (map #(update % 0 reverse) phones))] + (rhyme-choices-walking-target-rhyme + rhyme-trie + (reverse (first (first (prhyme/phrase->all-flex-rhyme-tailing-consonants-phones "tight knot")))))) + ;; => [[("T" "AA1" "AH1") #{[("T" "AA1" "AH1") "bloodclot"]}] + ;; [("T" "AA1" "UH1") #{[("T" "AA1" "UH1") "woodrot"]}] + ;; [("T" "AA1" "UW1") #{[("T" "AA1" "UW1") "moonshot"]}] + ;; [("T" "AA1") + ;; #{[("T" "AA1") "dot"] [("T" "AA1") "pot"] [("T" "AA1") "lot"]}]] + ) + +(defn get-next-markov + [{:keys [trie database] :as context} seed] + (let [seed (take-last 3 seed) + node (trie/lookup trie seed) + children (and node + (->> node + trie/children + (map (fn [^com.owoga.trie.ITrie child] + [(.key child) + (get child [])])) + (remove (comp nil? second)) + (remove + (fn [[k v]] + (#{1 38} k)))))] + (cond + (nil? node) (recur context (rest seed)) + (seq children) + (if (< (rand) (/ (apply max (map (comp second second) children)) + (apply + (map (comp second second) children)))) + (recur context (rest seed)) + (first (math/weighted-selection (comp second second) children))) + (> (count seed) 0) + (recur context (rest seed)) + :else (throw (Exception. "Error"))))) + +(defn get-next-markov-from-phrase-backwards + [{:keys [database trie] :as context} phrase n] + (let [word-ids (->> phrase + (#(string/split % #" ")) + (take n) + (reverse) + (map database))] + (database (get-next-markov context word-ids)))) + +(defn generate-n-syllable-sentence-rhyming-with + [context target-phrase n-gram-rank target-rhyme-syllable-count target-sentence-syllable-count] + (if (string? target-phrase) + (let [target-phrase-words (string/split target-phrase #" ") + reversed-target-phrase (string/join " " (reverse target-phrase-words)) + target-rhyme + (->> (prhyme/take-words-amounting-to-at-least-n-syllables + reversed-target-phrase + target-rhyme-syllable-count) + (#(string/split % #" ")) + reverse + (string/join " ")) + rhyming-n-gram (->> (rhyming-n-gram-choices context target-rhyme) + (weighted-selection-from-choices) + (choice->n-gram context) + (string/join " "))] + (loop [phrase rhyming-n-gram] + (if (<= target-sentence-syllable-count (prhyme/count-syllables-of-phrase phrase)) + phrase + (recur + (str (get-next-markov-from-phrase-backwards context phrase n-gram-rank) + " " + phrase))))) + (let [target-rhyme + (->> (prhyme/take-n-syllables target-phrase target-rhyme-syllable-count)) + rhyming-n-gram (->> (rhyming-n-gram-choices context target-rhyme) + (weighted-selection-from-choices) + (choice->n-gram context) + (string/join " "))] + (loop [phrase rhyming-n-gram] + (if (<= target-sentence-syllable-count (prhyme/count-syllables-of-phrase phrase)) + phrase + (recur + (str (get-next-markov-from-phrase-backwards context phrase n-gram-rank) + " " + phrase))))))) diff --git a/src/com/owoga/prhyme/core.clj b/src/com/owoga/prhyme/core.clj index f0788b0..0051336 100644 --- a/src/com/owoga/prhyme/core.clj +++ b/src/com/owoga/prhyme/core.clj @@ -228,6 +228,16 @@ [phone] (phonetics/vowel (string/replace phone #"\d" ""))) +(defn primary-stress? + [phone] + (re-find #"1" phone)) + +(defn last-primary-stress? + [phones] + (and (seq phones) + (primary-stress? (first phones)) + (not-any? primary-stress? (rest phones)))) + (defn take-vowels-and-tail-consonants "HOPSCOTCH -> AA1 AA2 CH @@ -343,6 +353,89 @@ (phrase->flex-rhyme-phones "hog") ) + +;;;; Utilities +;; +;; + +(defn phrase->flex-rhyme-phones + "Takes a space-seperated string of words + and returns the concatenation of the words + vowel phones. + + Returns them in reversed order so they + are ready to be used in a lookup of a rhyme trie. + " + [phrase] + (->> phrase + (#(string/split % #" ")) + (map (comp syllabify/syllabify first phonetics/get-phones)) + (map (partial reduce into [])) + (map #(filter (partial re-find #"\d") %)) + (flatten) + (map #(string/replace % #"\d" "")) + (reverse))) + +(defn take-words-amounting-to-at-least-n-syllables + "This function is nice to grab the tail end of a sentence for making a good rhyme. + If the sentence ends with a single-syllable word, like 'me', but a more + interesting n-gram like 'bother me', then you might want to explore the rhymes + available for the last N syllables. Sure, a word like 'poverty' would show up if you + got all rhymes for 'me'. But you'd have to filter through a lot more less-than-great + rhymes before you see it." + [phrase n] + (letfn [(phones [word] + [word (first (phonetics/get-phones word))]) + (syllables [[word phones]] + [word (syllabify/syllabify phones)])] + (->> phrase + (#(string/split % #" ")) + (map phones) + (map syllables) + (reduce + (fn [result [word syllables]] + (if (<= n (count (mapcat second result))) + (reduced result) + (conj result [word syllables]))) + []) + (map first) + (string/join " ")))) + +(comment + (take-words-amounting-to-at-least-n-syllables + "police can bother me" 3);; => "police can" + (take-words-amounting-to-at-least-n-syllables + "police can bother me" 4);; => "police can bother" + ) + +(defn take-n-syllables + "Returns the vowel sounds that make up the last n syllables. + Doesn't return stress." + [phrase n] + (if (string? phrase) + (->> phrase + (phrase->flex-rhyme-phones) + (take n) + (reverse)) + (take-last n phrase))) + +(comment + (take-n-syllables "bother me" 2);; => ("ER" "IY") + ) + +(defn count-syllables-of-phrase + [phrase] + (->> phrase + (#(string/split % #" ")) + (map phonetics/get-phones) + (map first) + (mapcat syllabify/syllabify) + count)) + +(comment + (count-syllables-of-phrase "police can bother me") ;; => 6 + ) + (defn words-by-rime* [words] (let [words-with-rime (->> words (map rest) diff --git a/src/com/owoga/prhyme/nlg/prhyme_nlg.clj b/src/com/owoga/prhyme/nlg/prhyme_nlg.clj index a8c04a7..43cf780 100644 --- a/src/com/owoga/prhyme/nlg/prhyme_nlg.clj +++ b/src/com/owoga/prhyme/nlg/prhyme_nlg.clj @@ -2,6 +2,7 @@ (:require [clojure.zip :as zip] [clojure.string :as string] [taoensso.timbre :as timbre] + [com.owoga.prhyme.core :as prhyme] [com.owoga.prhyme.util.math :as math] [com.owoga.phonetics.syllabify :as owoga.syllabify] [com.owoga.phonetics :as owoga.phonetics] @@ -17,76 +18,6 @@ [com.owoga.tightly-packed-trie :as tpt] [com.owoga.prhyme.nlp.tag-sets.treebank-ii :as tb2])) -;;;; Utilities -;; -;; - -(defn phrase->flex-rhyme-phones - "Takes a space-seperated string of words - and returns the concatenation of the words - vowel phones. - - Returns them in reversed order so they - are ready to be used in a lookup of a rhyme trie. - " - [phrase] - (->> phrase - (#(string/split % #" ")) - (map (comp owoga.syllabify/syllabify first owoga.phonetics/get-phones)) - (map (partial reduce into [])) - (map #(filter (partial re-find #"\d") %)) - (flatten) - (map #(string/replace % #"\d" "")) - (reverse))) - -(defn take-words-amounting-to-at-least-n-syllables - "This function is nice to grab the tail end of a sentence for making a good rhyme. - If the sentence ends with a single-syllable word, like 'me', but a more - interesting n-gram like 'bother me', then you might want to explore the rhymes - available for the last N syllables. Sure, a word like 'poverty' would show up if you - got all rhymes for 'me'. But you'd have to filter through a lot more less-than-great - rhymes before you see it." - [phrase n] - (letfn [(phones [word] - [word (first (owoga.phonetics/get-phones word))]) - (syllables [[word phones]] - [word (owoga.syllabify/syllabify phones)])] - (->> phrase - (#(string/split % #" ")) - (map phones) - (map syllables) - (reduce - (fn [result [word syllables]] - (if (<= n (count (mapcat second result))) - (reduced result) - (conj result [word syllables]))) - []) - (map first) - (string/join " ")))) - -(comment - (take-words-amounting-to-at-least-n-syllables - "police can bother me" 3);; => "police can" - (take-words-amounting-to-at-least-n-syllables - "police can bother me" 4);; => "police can bother" - ) - -(defn take-n-syllables - "Returns the vowel sounds that make up the last n syllables. - Doesn't return stress." - [phrase n] - (if (string? phrase) - (->> phrase - (phrase->flex-rhyme-phones) - (take n) - (reverse)) - (take-last n phrase))) - -(comment - (take-n-syllables "bother me" 2);; => ("ER" "IY") - ) - - ;;;; Much of the code below is related to grammar generation. (defn update-values [m f & args] @@ -583,7 +514,7 @@ (defn markov-generate-grammar-with-rhyming-tail [grammar-trie grammar-database rhyme-trie rhyme-database rhyme-target zipper] - (let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target) + (let [rhyme-phones (prhyme/phrase->flex-rhyme-phones rhyme-target) rhyme-options (examples.tpt/rhyming-n-gram-choices {:database rhyme-database :flex-rhyme-trie rhyme-trie} @@ -1005,7 +936,7 @@ (defn markov-complete-grammar-with-rhyming-tail [grammar-trie grammar-database rhyme-trie rhyme-database grammar rhyme-target] - (let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target) + (let [rhyme-phones (prhyme/phrase->flex-rhyme-phones rhyme-target) rhyme-options (examples.tpt/rhyming-n-gram-choices {:database rhyme-database :flex-rhyme-trie rhyme-trie}