From 5d0311904a8d535ceeb27a65945310d209da8f5e Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Sun, 18 Apr 2021 11:16:57 -0500 Subject: [PATCH] Rhyme generation --- dev/examples/core.clj | 12 ++-- dev/examples/tpt.clj | 98 +++++++++++++++++++++++++----- src/com/owoga/prhyme/util/math.clj | 1 - 3 files changed, 90 insertions(+), 21 deletions(-) diff --git a/dev/examples/core.clj b/dev/examples/core.clj index 3438f80..4d3323a 100644 --- a/dev/examples/core.clj +++ b/dev/examples/core.clj @@ -280,7 +280,7 @@ (let [structure (->> documents (drop chunk) (take chunk-size) - structures + grammar-tree-frequencies (reduce (fn [a v] (nlp/deep-merge-with + a v)) @@ -363,13 +363,17 @@ (nlp/deep-merge-with + accum data))) {} documents))) + + (def dark-lyrics-structure-frequencies + (nippy/thaw-from-file "resources/corpus/darklyrics/grammar-tree-freqs.nippy")) + (def popular-structure-freq-data (into {} (take 500 (reverse (sort-by #(second %) structure-freq-data))))) + (take 100 popular-structure-freq-data) (nippy/freeze-to-file "resources/corpus/darklyrics/grammar-tree-freqs.nippy" structure-freq-data) (def t1 (nippy/thaw-from-file "resources/structure-freqs/0.nip")) - structures - (take 100 (reverse (sort-by second structures))) + (do (let [documents (->> "dark-corpus" io/file @@ -426,7 +430,7 @@ (def example-structures (->> corpus (take 100) - structures + grammar-tree-frequencies (reduce (fn [a v] (merge-with + a v)) diff --git a/dev/examples/tpt.clj b/dev/examples/tpt.clj index bd382a6..6039183 100644 --- a/dev/examples/tpt.clj +++ b/dev/examples/tpt.clj @@ -1,6 +1,7 @@ (ns examples.tpt (:require [clojure.string :as string] [clojure.java.io :as io] + [com.owoga.prhyme.nlp.core :as nlp] [taoensso.tufte :as tufte :refer (defnp p profiled profile)] [com.owoga.trie :as trie] [com.owoga.tightly-packed-trie :as tpt] @@ -427,7 +428,10 @@ (@context :database))) nil) -(defn find-rhymes [trie word] +(defn find-rhymes + "Takes a rhyme-trie (perfect or vowel only, for example) + and a word. Returns list of rhyming words." + [trie word] (->> (perfect-rhymes trie (or (dict/cmu-with-stress-map word) (util/get-phones-with-stress word))) (map (comp first second)) @@ -442,7 +446,10 @@ freq])) (remove #(= word (first %))))) -(defn choose-next-word [{:keys [database trie] :as context} n-gram] +(defn choose-next-word + "Given an n-gram of [[word1 freq1] [word2 freq2]] chooses + the next word based on markove data in trie." + [{:keys [database trie] :as context} n-gram] (let [n-gram-ids (->> n-gram (map first) (map database)) node (trie/lookup trie n-gram-ids)] (cond @@ -467,24 +474,83 @@ :else (choose-next-word context (butlast n-gram))))) +(defn remove-sentence-markers [phrase] + (remove (fn [[word _]] (#{"" ""} word)) phrase)) + +(defn valid-sentence? [phrase] + (->> phrase + (map first) + (string/join " ") + (#(string/replace % #"(|)" "")) + (nlp/valid-sentence?))) + +(defn valid-sentences? [phrase] + (let [sentences (->> (util/take-through + #(= (first %) "") + phrase) + (map remove-sentence-markers))] + sentences)) + (defn generate-phrase [{:keys [database trie] :as context} phrase] - (loop [phrase phrase] - (if (< 10 (count phrase)) - phrase - (recur (cons (choose-next-word context (take 3 phrase)) - phrase))))) - -(defn generate-rhyme [{:keys [perfect-rhyme-trie] :as context} phrase] - (let [phrase1 (generate-phrase context phrase) - rhyme (first (find-rhymes perfect-rhyme-trie (first (last phrase1)))) - phrase2 (generate-phrase context (list (last phrase1)))] - [phrase1 phrase2])) + (loop [phrase' (loop [phrase phrase] + (if (< 5 (count phrase)) + phrase + (recur (cons (choose-next-word context (take 3 phrase)) + phrase))))] + (if (valid-sentence? phrase') + phrase' + (recur (loop [phrase phrase] + (if (< 5 (count phrase)) + phrase + (recur (cons (choose-next-word context (take 3 phrase)) + phrase)))))))) + +(defn generate-sentence-backwards + "Given a phrase of [w1 w2 w3] generates a sentence + using a backwards markov." + ([{:keys [database trie] :as context} phrase] + (let [phrase (map (fn [w] + (let [id (database w)] + [w (second (get trie [id]))])) + phrase)] + (loop [phrase' (loop [phrase phrase] + (if (= "" (first (first phrase))) + phrase + (recur (cons (choose-next-word context (take 3 phrase)) + phrase))))] + (if (valid-sentence? phrase') + phrase' + (recur (loop [phrase phrase] + (if (= "" (first (first phrase))) + phrase + (recur (cons (choose-next-word context (take 3 phrase)) + phrase))))))))) + ) + +(defn generate-rhyme + ([context] + (generate-rhyme context [""])) + ([{:keys [perfect-rhyme-trie] :as context} phrase] + (let [phrase1 (generate-sentence-backwards context phrase) + rhyme (second (find-rhymes perfect-rhyme-trie (first (first (take-last 2 phrase1))))) + phrase2 (generate-sentence-backwards context [(first rhyme) ""])] + [phrase1 phrase2]))) (comment (initialize) + (generate-rhyme @context) + + (let [{:keys [database trie rhyme-trie]} @context + phrase [""] + ids (map database phrase)] + (get trie ids)) + (choose-next-word @context (take 3 [["" 509]])) + + (generate-sentence-backwards @context [""]) + + (valid-sentences? (generate-phrase @context '(["bitter" 41]))) + - (generate-phrase @context '(["bitter" 41])) - (generate-rhyme @context '(["bitter" 41])) (choose-next-word @context (take 3 [["theology" 41]])) @@ -616,7 +682,7 @@ (if (> i 20) generated-text (let [children (loop [i 4] - (let [node (p :lookup + (let [node (p :lookup (trie/lookup loaded-tightly-packed-trie (vec (take-last i generated-text)))) diff --git a/src/com/owoga/prhyme/util/math.clj b/src/com/owoga/prhyme/util/math.clj index dde5b41..f530552 100644 --- a/src/com/owoga/prhyme/util/math.clj +++ b/src/com/owoga/prhyme/util/math.clj @@ -390,4 +390,3 @@ [r (/ nr* nr)]) (map vector xs ys (vals sgt)))))) -(discount-coefficient-map )