From 1f7a013dcae8c8522dffd6020802cf831f781dce Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Mon, 21 Jun 2021 19:54:23 -0500 Subject: [PATCH] More markov cleanup --- dev/examples/core.clj | 11 - src/com/owoga/corpus/lovecraft.clj | 279 ------------------ src/com/owoga/corpus/markov.clj | 321 +++++++++++++++++---- src/com/owoga/prhyme/core.clj | 3 +- src/com/owoga/prhyme/gen.clj | 312 -------------------- src/com/owoga/prhyme/limerick.clj | 3 +- test/com/owoga/frp/infrastructure-test.clj | 35 --- 7 files changed, 262 insertions(+), 702 deletions(-) delete mode 100644 src/com/owoga/prhyme/gen.clj delete mode 100644 test/com/owoga/frp/infrastructure-test.clj diff --git a/dev/examples/core.clj b/dev/examples/core.clj index 4d3323a..6e60b3c 100644 --- a/dev/examples/core.clj +++ b/dev/examples/core.clj @@ -4,7 +4,6 @@ [clojure.java.io :as io] [taoensso.nippy :as nippy] [taoensso.timbre :as timbre] - [com.owoga.prhyme.frp :as frp] [com.owoga.prhyme.util :as util] [com.owoga.prhyme.core :as prhyme] [com.owoga.prhyme.data.bigrams :as bigrams] @@ -20,16 +19,6 @@ [clojure.zip :as zip] [clojure.walk :as walk])) -(defn weight-fn [word target result] - (let [rimes (frp/consecutive-matching word target :rimes) - nuclei (frp/consecutive-matching word target :nuclei) - onsets (frp/consecutive-matching word target :onsets) - total (apply + (map count [rimes nuclei onsets]))] - total)) - -(defn pred-fn [word target result] - (< 0 (weight-fn word target result))) - (defn weight-popular [word target result] (if (dict/popular (:normalized-word word)) 10 diff --git a/src/com/owoga/corpus/lovecraft.clj b/src/com/owoga/corpus/lovecraft.clj index 23f149e..dbe9bfd 100644 --- a/src/com/owoga/corpus/lovecraft.clj +++ b/src/com/owoga/corpus/lovecraft.clj @@ -6,19 +6,14 @@ [com.owoga.prhyme.util :as util] [com.owoga.prhyme.core :as prhyme] [com.owoga.prhyme.util.nlp :as nlp] - [com.owoga.prhyme.gen :as gen] [com.owoga.prhyme.generation.weighted-selection :as weighted-selection] [taoensso.tufte :as tufte :refer [defnp p profiled profile]] - [com.owoga.prhyme.frp :as frp] [clojure.java.io :as io])) (tufte/add-basic-println-handler! {}) (def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/") -(def words-map - (into {} (map #(vector (string/lower-case (:word %)) %) frp/words))) - (defn fetch-url [url] (html/html-resource (java.net.URL. url))) @@ -63,277 +58,3 @@ (#(str % "\n")) (append-to-file "lovecraft.txt"))) (take 10 (links)))) - -(defn tokens-from-file [file] - (with-open [r (io/reader file)] - (tokens (slurp r)))) - -(defn window [n] - (fn [coll] - (cond - (empty? coll) [] - (< (count coll) n) [] - :else (cons (take n coll) - (lazy-seq ((window n) (drop n coll))))))) - -(defn markov [tokens] - (->> tokens - (map - (fn [token] - (let [k (butlast token) - v (last token)] - [k v]))) - (reduce - (fn [a [k v]] - (update-in a [k v] (fnil inc 0))) - {}))) - -(defn running-total - ([coll] - (running-total coll 0)) - ([coll last-val] - (cond - (empty? coll) nil - :else (cons (+ last-val (first coll)) - (lazy-seq - (running-total - (rest coll) - (+ last-val (first coll)))))))) - -(defn weighted-rand [weights] - (let [running-weights (running-total weights) - rand-val (rand (last running-weights))] - (loop [i 0] - (if (> (nth running-weights i) rand-val) - i - (recur (inc i)))))) - -(def word-set (into #{} (->> prhyme/words - (map first) - (map string/lower-case) - (map #(string/replace % #"\(\d+\)" ""))))) - -(defn normalize-tokens [tokens] - (->> tokens - (map string/lower-case) - (filter word-set))) - -(defn main [] - (->> (tokens-from-file "lovecraft.txt") - (reverse) - (normalize-tokens) - ((window 2)) - (markov) - (into {}))) - -(defn synonym? - "Given a possibility, like [\"foo\" 3] - which says that foo follows a particular key with - a weight of 3, a word is a synonym of that possibility - if the word is a synonym ." - [p synonyms] - (synonyms p)) - -(defn adjust-for-synonyms - "If a word is in a set of synonyms, adjust its weight upwards." - [synonyms] - (fn [possibilities] - (reduce - (fn [p s] - (if (s p) - (update p s #(* 5 %)) - p)) - possibilities - synonyms))) - -(defn adjust-for-rimes - [target-rime dictionary] - (fn [possibilities] - (into - {} - (map - (fn [[p v]] - (let [possibility (get dictionary p) - factor (count - (frp/consecutive-matching - target-rime - possibility - :rimes))] - [p (* v (max 1 (* factor 4)))])) - possibilities)))) - - -(defonce lovecraft-markov (read-string (slurp "lovecraft.edn"))) - -(defn markov-key [key-fn] - (fn [text] - (key-fn text))) - -(defn gen-from [m p initial] - (loop [r (list initial)] - (cond - (p r) (recur (cons (m (list (first r))) r)) - :else r))) - - -(defn rhyming-words - "List of rhyming words sorted by quality of rhyme." - [target] - (let [target-phrase (->> target - (prhyme/phrase->word frp/words) - (#(assoc % :rimes? true)))] - (->> target-phrase - (#(assoc % :rimes? true)) - (frp/prhyme frp/words) - (sort-by - #(- (count - (frp/consecutive-matching - % - target-phrase - :rimes))))))) - -(defn markov-rhymes [markov-data rhyming-words] - (->> (map - (fn [word] - (->> word - :word - string/lower-case - (#(string/replace % #"\(\d+\)" "")) - (#(vector % (get markov-data (list %)))))) - rhyming-words) - (into #{}) - (remove - (fn [[w p]] - (nil? p))))) - -(defn adjust-for-over-syllables - "Adjust weights to prefer not going over the number - of syllables of the target word." - [target] - (fn [words] - (p :adjust-for-syllables - (map - (fn [word] - (if (or (nil? (:syllable-count word)) - (nil? (:syllables target))) - (println word target)) - (cond - (= (:syllable-count word) (count (:syllables target))) - (as-> word word - (assoc word :weight (* 3 (:weight word))) - (assoc word :adjusted-for-syllables-factor 3)) - - (< (:syllable-count word) (count (:syllables target))) - (as-> word word - (assoc word :weight (* 2 (:weight word))) - (assoc word :adjusted-for-syllables-factor 2)) - - :else - (as-> word word - (assoc word :weight (* 1 (:weight word))) - (assoc word :adjusted-for-syllables-factor 1)))) - words)))) - -(defn adjust-for-membership-1 - [set_ percent] - (let [ratio (- 1 percent)] - (fn [words] - (let [[members non-members] - ((juxt filter remove) - #(set_ (:normalized-word %)) - words) - weight-non-members (apply + (map :weight non-members)) - target-weight-members (* ratio weight-non-members) - count-members (count members) - adjustment-members (/ target-weight-members count-members)] - (concat - (map - (fn [member] - (as-> member member - (assoc member :weight (* adjustment-members (:weight member))) - (assoc member :adjustment-for-membership adjustment-members))) - members) - non-members))))) - -(defn adjust-for-membership [set_] - (fn [words] - (map - (fn [word] - (if (set_ (:normalized-word word)) - (as-> word word - (assoc word :weight (* 2 (:weight word))) - (assoc word :adjust-for-membership-factor 2)) - (assoc word :adjust-for-membership-factor 1))) - words))) - -(defn filter-for-membership [set_] - (fn [words] - (map - (fn [word] - (if-not (set_ (:normalized-word word)) - (as-> word word - (assoc word :weight (* 0.01 (:weight word))) - (assoc word :filter-for-membership-factor 0.01)) - word)) - words))) - -(defn adjust-for-markov [markov-options] - (let [markov-set (into #{} (map first (keys markov-options)))] - (fn [words] - (let [result (map - (fn [word] - (if (markov-set (:normalized-word word)) - (as-> word word - (assoc word :weight (* 100 (:weight word))) - (assoc word :adjust-for-markov-factor 100)) - (assoc word :adjust-for-markov-factor 1))) - words)] - result)))) - -(comment - (let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))] - (take 5 (markov-adjuster frp/words)))) - -(defn adjust-for-membership-1 - [set_ percent] - (let [ratio (- 1 percent)] - (fn [words] - (let [[members non-members] - ((juxt filter remove) - #(set_ (:normalized-word %)) - words) - weight-non-members (apply + (map :weight non-members)) - target-weight-members (* ratio weight-non-members) - count-members (count members) - adjustment-members (/ target-weight-members count-members)] - (concat - (map - (fn [member] - (as-> member member - (assoc member :weight (* adjustment-members (:weight member))) - (assoc member :adjustment-for-membership adjustment-members))) - members) - non-members))))) - -(defn adjust-for-markov-1 - [markov-options percent] - (let [ratio (- 1 percent)] - (fn [words] - (if (nil? markov-options) - words - (let [[markovs non-markovs] - ((juxt filter remove) - #(markov-options (:normalized-word %)) - words) - weight-non-markovs (apply + (map :weight non-markovs)) - target-weight-markovs (* ratio weight-non-markovs) - count-markovs (count markovs) - adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))] - (concat - (map - (fn [markov] - (as-> markov markov - (assoc markov :weight (* adjustment-markovs (:weight markov))) - (assoc markov :adjustment-for-markov adjustment-markovs))) - markovs) - non-markovs)))))) diff --git a/src/com/owoga/corpus/markov.clj b/src/com/owoga/corpus/markov.clj index 786f897..4220d62 100644 --- a/src/com/owoga/corpus/markov.clj +++ b/src/com/owoga/corpus/markov.clj @@ -6,6 +6,7 @@ [com.owoga.trie :as trie] [com.owoga.tightly-packed-trie :as tpt] [com.owoga.tightly-packed-trie.encoding :as encoding] + [clojure.set :as set] [clojure.string :as string] [clojure.java.io :as io] [com.owoga.phonetics :as phonetics] @@ -244,22 +245,43 @@ ;; => [([("G" "AO1" "B") "bog"] [("G" "AO1" "F") "fog"]) ;; ([("G" "AO1" "F") "fog"]) ;; ([("G" "AA1" "B") "bog"] [("G" "AO1" "B") "bog"])] + + (->> (rhyme-choices-walking-target-rhyme + rhyme-trie + ["N" "AH1" "F"] + identity) + rand-nth + ((fn [[phones words]] + [[phones] (rand-nth (vec words))]))) + ) (defn rhyme-choices-walking-target-rhyme "All target rhymes need to be in phone form. + + `target-rhyme`: [N UH1 F] If we try to turn string form into phone form, we'd sometimes be forced to deal with multiple pronunciations. By only handling phone form here, the caller can handle multiple pronunciations. - Makes for a cleaner API." - [trie target-rhyme] - (loop [target-rhyme target-rhyme - result []] - (let [choices (rhyme-choices trie target-rhyme)] - (if (or (empty? target-rhyme) (prhyme/last-primary-stress? (reverse target-rhyme))) - (into result choices) - (recur (butlast target-rhyme) - (into result choices)))))) + Makes for a cleaner API. + + `words-fn` gets passed the result of `rhyme-choices` which has this structures + ([(G AO1 B) bog] [(G AO1 F) fog]) + " + ([trie target-rhyme] + (rhyme-choices-walking-target-rhyme + trie + target-rhyme + identity)) + ([trie target-rhyme words-fn] + (loop [target-rhyme target-rhyme + result []] + (let [choices (words-fn (rhyme-choices trie target-rhyme))] + (if (or (empty? target-rhyme) + (prhyme/last-primary-stress? (reverse target-rhyme))) + (into result choices) + (recur (butlast target-rhyme) + (into result choices))))))) (comment (let [words ["bloodclot" "woodrot" "moonshot" "dot" "bog" "pat" "pot" "lot"] @@ -416,38 +438,127 @@ ) -(defn tightly-generate-n-syllable-sentence-rhyming-with +(defn tightly-generate-n-syllable-sentence "It's difficult to mix a tight trie with rhymes. You need - to convert ids using the database." - [database + to convert ids using the database. + + This is going to generate sentences backwards. + + Generates the following structure: + + [[[[S K AY1]] sky] + [[[DH AH0] [DH AH1] [DH IY0]] the] + [[[K R AE1 K S]] cracks] + [[[G R AW1 N D]] ground] + [[[DH AH0] [DH AH1] [DH IY0]] the] + [[[T UW1] [T IH0] [T AH0]] to] + [[[K IH1 NG D AH0 M]] kingdom] + [[[DH AH0] [DH AH1] [DH IY0]] the] + [[[D IH0 S T R OY1]] destroy]] + " + ([database + markov-trie + n-gram-rank + target-sentence-syllable-count] + (tightly-generate-n-syllable-sentence + database + markov-trie + n-gram-rank + target-sentence-syllable-count + (constantly false))) + ([database + markov-trie + n-gram-rank + target-sentence-syllable-count + markov-remove-fn] + (let [eos (database prhyme/EOS) + bos (database prhyme/BOS)] + (loop [phrase []] + (if (<= target-sentence-syllable-count + (prhyme/count-syllables-of-phrase + (string/join " " (map second phrase)))) + phrase + (recur + (conj + phrase + (let [word (database + (get-next-markov + markov-trie + ; Pad sentence with eos markers since we're working backwards + (into (vec (repeat (dec n-gram-rank) eos)) + (mapv (comp database second) phrase)) + ; Remove eos, bos, and forbidden words + (fn [[lookup [word frequency]]] + (or (markov-remove-fn [lookup [word frequency]]) + (#{eos bos} word)))))] + [(phonetics/get-phones word) word])))))))) + +(comment + (tightly-generate-n-syllable-sentence + database markov-trie - rhyme-trie - target-rhyme - n-gram-rank - target-rhyme-syllable-count - target-sentence-syllable-count] - (let [rhyme (->> (rhyme-choices-walking-target-rhyme rhyme-trie target-rhyme) - rand-nth - ((fn [[phones words]] - [[phones] (rand-nth (vec words))])))] - (loop [phrase [rhyme]] - (if (or (= prhyme/BOS (second (peek phrase))) - (<= target-sentence-syllable-count - (prhyme/count-syllables-of-phrase - (string/join " " (map second phrase))))) - phrase - (recur - (conj - phrase - (let [word (database - (get-next-markov - markov-trie - (into (vec (repeat (dec n-gram-rank) (database prhyme/EOS))) - (mapv (comp database second) phrase)) - (fn [[lookup [word frequency]]] - (= (database prhyme/EOS) word))))] - [(phonetics/get-phones word) word]))))))) + 3 + 10) + + ) +(defn tightly-generate-n-syllable-sentence-rhyming-with + "It's difficult to mix a tight trie with rhymes. You need + to convert ids using the database. + + `rhyme-wordset-fn` will take something that looks like + ([(G AO1 B) bog] [(G AO1 F) fog]) + " + ([database + markov-trie + rhyme-trie + target-rhyme + n-gram-rank + target-rhyme-syllable-count + target-sentence-syllable-count] + (tightly-generate-n-syllable-sentence-rhyming-with + database + markov-trie + rhyme-trie + target-rhyme + n-gram-rank + target-rhyme-syllable-count + target-sentence-syllable-count + (constantly false) + identity)) + ([database + markov-trie + rhyme-trie + target-rhyme + n-gram-rank + target-rhyme-syllable-count + target-sentence-syllable-count + markov-remove-fn + rhyme-wordset-fn] + (let [eos (database prhyme/EOS) + bos (database prhyme/BOS) + rhyme (->> (rhyme-choices-walking-target-rhyme + rhyme-trie + target-rhyme + rhyme-wordset-fn) + rand-nth)] + (loop [phrase [rhyme]] + (if (<= target-sentence-syllable-count + (prhyme/count-syllables-of-phrase + (string/join " " (map second phrase)))) + phrase + (recur + (conj + phrase + (let [word (database + (get-next-markov + markov-trie + (into (vec (repeat (dec n-gram-rank) eos)) + (mapv (comp database second) phrase)) + (fn [[lookup [word frequency]]] + (or (markov-remove-fn [lookup [word frequency]]) + (#{eos bos} word)))))] + [(phonetics/get-phones word) word])))))))) ;;;; Demo ;;;; @@ -462,10 +573,17 @@ target-rhyme 3 3 - 7) + 7 + (fn [[lookup [word freq]]] + (= (database "begun") word)) + (fn [rhyming-words] + (->> (map (fn [[phones wordset]] + [phones (set/difference wordset #{"begun"})]) + rhyming-words) + (remove (fn [[phones wordset]] + (empty? wordset)))))) (map second) reverse)) - (map (partial remove #{prhyme/BOS})) (map (partial string/join " ")))) ;; => ("funeral has just begun" ;; "dead illusion overdone" @@ -497,29 +615,108 @@ reverse)) (map (partial remove #{prhyme/BOS})) (map data-transform/untokenize))) + ) - (rhyme-choices-walking-target-rhyme - rhyme-trie - (->> (prhyme/phrase->all-flex-rhyme-tailing-consonants-phones - "filling") - first - first - reverse)) +(defn sentence->phones + "Sentence is of the format + +[[[[F L OW1]] flow] + [[[AH0 N D] [AE1 N D]] and] + [[[S IY1 K]] seek] + [[[F IH1 NG G ER0 Z]] fingers] + [[[Y AO1 R] [Y UH1 R]] your] + [[[TH R UW1]] through] + [[[S T R EH1 NG K TH] [S T R EH1 NG TH]] + strength] + [[[F AY1 N D]] find] + [[[K AE1 N] [K AH0 N]] can]] + + Returns the concatenated list of phones so you can pluck some off and find + rhymes. + + Note that each word in the sentence can have more than one pronunciation. + This function chooses one randomly. + " + [sentence] + (->> sentence + (map #(update % 0 rand-nth)) + (apply map vector) + ((fn [[phones words]] + [(string/join " " (reduce into [] phones)) (string/join " " words)])) + (first))) + +(defn rhyme-from-scheme + "scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]] + + Will include as many syllables as possible in finding rhymes + and will choose randomly with equal chance from all possible rhymes." + [scheme database markov-trie rhyme-trie] + (loop [scheme scheme + line-phones {} + result []] + (cond + (empty? scheme) result + :else + (let [[pattern syllable-count] (first scheme) + banned-words (into #{} (->> result + (map (comp last last)))) + line (if (nil? (get line-phones pattern)) + ; Here, we need to make a choice about which pronunciation + ; we want to use to build line-phones. Choose randomly. + (tightly-generate-n-syllable-sentence + database + markov-trie + 3 + syllable-count) + (tightly-generate-n-syllable-sentence-rhyming-with + database + markov-trie + rhyme-trie + (take 4 (get line-phones pattern)) + 3 + 3 + syllable-count + (constantly false) + ;; words-fn + ;; ([("G" "AO1" "B") "bog"] [("G" "AO1" "F") "fog"]) + (fn [rhyming-words] + (->> (map (fn [[phones wordset]] + [phones (set/difference + wordset + banned-words)]) + rhyming-words) + (remove (fn [[phones wordset]] + (empty? wordset))))))) + rhyme (reverse (sentence->phones line))] + (recur (rest scheme) + (assoc line-phones pattern rhyme) + (conj result (reverse line))))))) +(comment + (tightly-generate-n-syllable-sentence + database + markov-trie + 3 + 10) - (let [target-rhyme ["IY1" "ER"]] - (->> (repeatedly - 10 - #(->> (tightly-generate-n-syllable-sentence-rhyming-with - database - markov-trie - rhyme-trie - target-rhyme - 3 - 3 - 7) - (map second) - reverse)) - (map (partial remove #{prhyme/BOS})) - (map (partial string/join " ")))) + (rhyme-from-scheme + '[[A 9] [A 9] [B 5] [B 5]] + database + markov-tight-trie + rhyme-trie) + + (tightly-generate-n-syllable-sentence-rhyming-with + database + markov-trie + rhyme-trie + (first + (first + (prhyme/phrase->all-flex-rhyme-tailing-consonants-phones + "bother me"))) + 3 + 3 + 9 + (constantly false) + (fn [[phones wordset]] + (set/difference wordset (into #{} [])))) ) diff --git a/src/com/owoga/prhyme/core.clj b/src/com/owoga/prhyme/core.clj index 11dc173..5808773 100644 --- a/src/com/owoga/prhyme/core.clj +++ b/src/com/owoga/prhyme/core.clj @@ -278,7 +278,7 @@ Returns all possible pronunciations. For hog -> haog, haag. - ROBOT -> '(OW1 AA2 T)" + ROBOT => '([(OW1 AA T) robot] [(OW1 AH T) robot])" [phrase] (->> phrase (#(string/split % #" ")) @@ -300,6 +300,7 @@ (comment (phrase->all-flex-rhyme-tailing-consonants-phones "robot") + ;; => ([("OW1" "AA" "T") "robot"] [("OW1" "AH" "T") "robot"]) ) diff --git a/src/com/owoga/prhyme/gen.clj b/src/com/owoga/prhyme/gen.clj deleted file mode 100644 index 8945a68..0000000 --- a/src/com/owoga/prhyme/gen.clj +++ /dev/null @@ -1,312 +0,0 @@ -(ns com.owoga.prhyme.gen - (:require [clojure.string :as string] - [com.owoga.prhyme.util.math :as math] - [com.owoga.prhyme.util :as util] - [com.owoga.prhyme.util.weighted-rand :as weighted-rand] - [com.owoga.prhyme.nlp.core :as nlp] - [com.owoga.prhyme.frp :as frp] - [com.owoga.prhyme.core :as prhyme])) - -(def words-map - (into {} (map #(vector (string/lower-case (:word %)) %) frp/words))) - -(defn merge-phrase-words - "Given multiple `Word`, like the words for 'well off', create a single `Word` - that is syllabified as ('well' 'off') rather than as the combined ('weh' - 'loff'). Useful for finding single-word rhymes of multiple-word targets. - - An example: 'war on crime' -> 'turpentine'. - As opposed to: 'war on crime' -> 'caw fawn lime'." - [phrase phrase-words] - (loop [merged (first phrase-words) - phrase-words (rest phrase-words)] - (cond - (and (empty? phrase-words) (empty? merged)) nil - (empty? phrase-words) (assoc merged :word phrase) - :else (recur (-> merged - (assoc :syllables (concat (:syllables merged) - (:syllables (first phrase-words)))) - (assoc :syllable-count (+ (:syllable-count merged) - (:syllable-count (first phrase-words)))) - (assoc :rimes (concat (:rimes merged) - (:rimes (first phrase-words)))) - (assoc :onsets (concat (:onsets merged) - (:onsets (first phrase-words)))) - (assoc :nuclei (concat (:nuclei merged) - (:nuclei (first phrase-words))))) - (rest phrase-words))))) - -(defn adjust-for-markov - [markov percent] - (let [target-markov-n (count (first (first markov)))] - (fn [[words target result]] - (if (>= (count result) target-markov-n) - (let [markov-options (markov (->> result - (take target-markov-n) - (map :normalized-word))) - markov-option-avg (/ (apply + (vals markov-options)) - (max 1 (count markov-options)))] - (if (nil? markov-options) - [words target result] - (let [[markovs non-markovs] - ((juxt filter remove) - #(markov-options (:normalized-word %)) - words) - weight-non-markovs (apply + (map :weight non-markovs)) - target-weight-markovs (* 100 percent weight-non-markovs) - count-markovs (count markovs) - adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))] - [(concat - (map - (fn [m] - (let [option (markov-options (:normalized-word m))] - (as-> m m - (assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m))) - (assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs))))) - markovs) - non-markovs) - target - result]))) - [words target result])))) - -(defn adjust-for-markov-with-boundaries - [markov percent] - (let [markov-n (count (first (first markov)))] - (fn [[words target result]] - (let [key (let [k (map :normalized-word (take markov-n result))] - (reverse - (if (> markov-n (count k)) - (concat k (repeat (- markov-n (count k)) nil)) - k))) - markov-options (markov key) - markov-option-avg (/ (apply + (vals markov-options)) - (max 1 (count markov-options)))] - (if (nil? markov-options) - [words target result] - (let [[markovs non-markovs] - ((juxt filter remove) - #(markov-options (:normalized-word %)) - words) - weight-non-markovs (apply + (map :weight non-markovs)) - target-weight-markovs (- (/ weight-non-markovs (- 1 percent)) - weight-non-markovs) - count-markovs (count markovs) - adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))] - [(concat - (map - (fn [m] - (let [option (markov-options (:normalized-word m))] - (as-> m m - (assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m))) - (assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs))))) - markovs) - non-markovs) - target - result])))))) - -(defn adjust-for-rimes - [dictionary percent] - (fn [[words target result]] - (let [words-with-rime-count - (map - (fn [word] - (assoc word :num-matching (count (frp/consecutive-matching target word :rimes)))) - words) - - [rhyming non-rhyming] - ((juxt filter remove) - #(< 0 (:num-matching %)) - words-with-rime-count) - - weight-non-rhyming (apply + (map :weight non-rhyming)) - target-weight-rhyming (* 100 percent weight-non-rhyming) - count-rhyming (count rhyming) - adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))] - [(concat - (map - (fn [word] - (as-> word word - (assoc word :weight (* adjustment-rhyming (:weight word))) - (assoc word :adjustment-for-rimes adjustment-rhyming))) - rhyming) - non-rhyming) - target - result]))) - -(defn attempt-gen-target-by-syllable-count [adj syllable-count words] - (loop [result '()] - (cond - (<= syllable-count (apply + (cons 0 (map :syllable-count result)))) - result - :else - (let [[weighted-words target result] (adj [words nil result])] - (recur (cons (weighted-rand/weighted-selection :weight weighted-words) result)))))) - -(defn gen-sentence-with-syllable-count [adj syllable-count words] - (->> (repeatedly - (fn [] - (attempt-gen-target-by-syllable-count adj syllable-count words))) - (filter #(= syllable-count (apply + (map :syllable-count %)))) - (map #(map :normalized-word %)) - (map #(string/join " " %)) - (filter nlp/valid-sentence?) - first)) - -(defn remove-selection-from-target [target selection] - (->> target - (#(assoc % :syllables (drop-last - (:syllable-count - selection) - (:syllables - target)))) - (#(assoc % :rimes (prhyme/rimes (:syllables %)))) - (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %)))) - (#(assoc % :nuclei (prhyme/nucleus (:syllables %)))))) - -(defn selection-seq - ([words adjust target] - (selection-seq words adjust target '())) - ([words adjust target result] - (let [[weighted-words _ _] (adjust [words target result]) - selection (math/weighted-selection :weight weighted-words) - new-target (remove-selection-from-target target selection) - new-result (cons selection result)] - (cons selection - (lazy-seq (selection-seq words adjust new-target new-result)))))) - -(defn selection-stream - "Continuously make the first selection." - ([words adjust target] - (selection-stream words adjust target '())) - ([words adjust target result] - (let [[weighted-words _ _] (adjust [words target result])] - (repeatedly #(math/weighted-selection :weight weighted-words))))) - -(defn generate-prhyme [words adjust target stop?] - (loop [result '()])) - -(defn prhyme - "2020-10-21 iteration" - [words weights-adjuster target stop?] - (let [target (assoc target :original-syllables (:syllables target)) - words (map #(assoc % :weight 1) words)] - (loop [target target - result '() - sentinel 0] - (if (or (stop? target result) - (> sentinel 5)) - result - (let [[weighted-words _ _] (weights-adjuster [words target result]) - rng (weighted-rand/from-weights (map :weight weighted-words)) - index (weighted-rand/nextr rng nil) - selection (nth weighted-words index) - new-target (->> target - (#(assoc % :syllables (drop-last - (:syllable-count - selection) - (:syllables - target)))) - (#(assoc % :rimes (prhyme/rimes (:syllables %)))) - (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %)))) - (#(assoc % :nuclei (prhyme/nucleus (:syllables %))))) - result (cons selection result)] - (recur new-target result (inc sentinel))))))) - -(defn attempt-gen-rhyme-with-syllable-count [adj syllable-count words target] - (prhyme - words - adj - target - (fn [target result] - (<= syllable-count (apply + (map :syllable-count result)))))) - -(defn gen-rhyme-with-syllable-count [adj syllable-count words target] - (->> (repeatedly - (fn [] - (attempt-gen-rhyme-with-syllable-count adj syllable-count words target))) - (filter #(= syllable-count (apply + (map :syllable-count %)))) - (map #(map :normalized-word %)) - (map #(string/join " " %)) - (filter nlp/valid-sentence?) - first)) - -(defn prhymer [words weights-adjuster target stop] - (cons (prhyme - words - weights-adjuster - target - stop) - (lazy-seq (prhymer words weights-adjuster target stop)))) - -(defn sentence-stop [target] - (fn [inner-target result] - (let [result-sentence (string/join " " (map :normalized-word result))] - (when-not (empty? result) - (or (nlp/valid-sentence? result-sentence) - (< (:syllable-count target) - (apply + (map :syllable-count result))) - (< 5 (count result))))))) - -(defn gen-prhymes [words adjust poem-lines] - (let [words (map #(assoc % :weight 1) words) - words-map (into {} (map #(vector (:normalized-word %) %) words))] - (map (fn [line] - (let [target (prhyme/phrase->word words line) - stop (sentence-stop target) - r (prhymer words adjust target stop)] - (string/join " " (map #(:normalized-word %) (first r))))) - poem-lines))) - -(defn phrase-syllable-count [phrase] - (->> phrase - (#(string/split % #" ")) - (map (partial prhyme/phrase->word frp/words)) - (map :syllable-count) - (apply +))) - -(defn filter-for-syllable-count [syllable-count coll] - (filter #(= syllable-count (phrase-syllable-count %)) coll)) - -(defn syllable-stop - [target] - (fn [inner-target result] - (<= (:syllable-count target) - (apply + (map :syllable-count result))))) - -(defn generate-rhyme-for-phrase - [words adjust phrase] - (let [words (map #(assoc % :weight 1) words) - words-map (into {} (map #(vector (:normalized-word %) %) words)) - target (prhyme/phrase->word words phrase)] - (prhymer words adjust target (syllable-stop target)))) - -#_(defn generate-prhymes [poem] - (let [r (partial generate-rhyme-for-phrase frp/popular adj)] - (fn [] - (->> poem - (map (fn [phrase] - (let [target (prhyme/phrase->word frp/popular phrase)] - (first - (filter - #(and - (or (< 0.9 (rand)) - (nlp/valid-sentence? (string/join " " (map :normalized-word %)))) - (= (:syllable-count target) - (apply + (map :syllable-count %)))) - (r phrase)))))) - (map (fn [line] (map #(:normalized-word %) line))) - (map #(string/join " " %)))))) - -(defn generate-prhymes-darkov [words adj phrase] - (let [target (prhyme/phrase->word words phrase) - r (generate-rhyme-for-phrase words adj target)] - (first - (filter - #(and - (or (< 0.9 (rand)) - (nlp/valid-sentence? (string/join " " (map :normalized-word %)))) - (= (:syllable-count target) - (apply + (map :syllable-count %)))) - r)) - (map (fn [line] (map #(:normalized-word %) line))) - (map #(string/join " " %)))) diff --git a/src/com/owoga/prhyme/limerick.clj b/src/com/owoga/prhyme/limerick.clj index ac212b1..71ae818 100644 --- a/src/com/owoga/prhyme/limerick.clj +++ b/src/com/owoga/prhyme/limerick.clj @@ -1,6 +1,5 @@ (ns com.owoga.prhyme.limerick - (:require [com.owoga.prhyme.gen :as gen] - [com.owoga.prhyme.generation.weighted-selection :as weighted-selection] + (:require [com.owoga.prhyme.generation.weighted-selection :as weighted-selection] [com.owoga.prhyme.util.math :as math] [com.owoga.prhyme.nlp.core :as nlp] [clojure.string :as string] diff --git a/test/com/owoga/frp/infrastructure-test.clj b/test/com/owoga/frp/infrastructure-test.clj deleted file mode 100644 index bb5e96d..0000000 --- a/test/com/owoga/frp/infrastructure-test.clj +++ /dev/null @@ -1,35 +0,0 @@ -(ns com.owoga.frp.infrastructure-test - (:require [com.owoga.frp.infrastructure :as frp] - [clojure.test :refer [deftest is testing]])) - -(deftest test-project - (testing "projection" - (let [Offer (frp/->BaseRelVar 'Offer (atom #{}) '()) - OfferPrices (frp/project Offer [:price])] - (frp/load! Offer #{{:address "123 Fake St." :price 2e5}}) - (is (= @OfferPrices #{{:price 2e5}}))))) - -(deftest test-insert! - (testing "insert!" - (let [Offer (frp/->BaseRelVar 'Offer (atom #{}) '())] - (frp/insert! Offer {:address "123 Fake St." :price 1.5e5}) - (is (= @Offer #{{:address "123 Fake St." :price 1.5e5}}))))) - -(deftest test-defrelvar - (testing "failed constraint raises" - (let [Offer (frp/defrelvar Offer (fn [offers] (map #(> (:price %) 0) offers)))] - (is (thrown-with-msg? - Exception - #"Constraint Exception" - (frp/insert! Offer {:price -1}))))) - (testing "passed constraint doesn't raise" - (let [Offer (frp/defrelvar Offer (fn [offers] (map #(> (:price %) 0) offers)))] - (frp/insert! Offer {:price 20}) - (is (= @Offer #{{:price 20}}))))) - -(deftest test-extend - (testing "extend-" - (let [Offer (frp/->BaseRelVar 'Offer (atom #{}) '())] - (frp/load! Offer #{{:price 1e6}}) - (frp/extend- Offer [:price-band (fn [e] (if (> (:price e) 1e6) :high :low))]) - (is (= :low (-> @Offer first :price-band))))))