From 1cb959010c524f847c3aaacd2e823c4c02ea8a1f Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Tue, 27 Oct 2020 20:04:12 -0700 Subject: [PATCH] WIP --- src/com/owoga/prhyme/core.clj | 137 ++--- src/com/owoga/prhyme/frp.clj | 61 +-- src/com/owoga/prhyme/gen.clj | 42 -- src/com/owoga/prhyme/grammar.clj | 38 -- src/com/owoga/prhyme/lymeric.clj | 21 +- src/com/owoga/prhyme/syllabify.clj | 9 +- src/com/owoga/prhyme/util.clj | 73 +-- src/com/owoga/prhyme/util/lovecraft.clj | 639 ------------------------ 8 files changed, 113 insertions(+), 907 deletions(-) delete mode 100644 src/com/owoga/prhyme/grammar.clj delete mode 100644 src/com/owoga/prhyme/util/lovecraft.clj diff --git a/src/com/owoga/prhyme/core.clj b/src/com/owoga/prhyme/core.clj index 9fba6bf..4d47d73 100644 --- a/src/com/owoga/prhyme/core.clj +++ b/src/com/owoga/prhyme/core.clj @@ -1,31 +1,10 @@ (ns com.owoga.prhyme.core - (:require [clojure.java.io :as io] - [clojure.string :as string] + (:require [clojure.string :as string] [clojure.set :as set] [com.owoga.prhyme.util :as u] - [com.owoga.prhyme.syllabify :as s])) - -(def dictionary - (line-seq (io/reader (io/resource "cmudict_SPHINX_40")))) - -(def words (map u/prepare-word dictionary)) - -(def popular - (set (line-seq (io/reader (io/resource "popular.txt"))))) - -(def adverbs - (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt")))))) - -(def adjectives - (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt")))))) - -(def verbs - (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt")))))) - -(def nouns - (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt")))))) - -(defrecord Word [word syllables syllable-count rimes onsets nuclei]) + [com.owoga.prhyme.syllabify :as s] + [com.owoga.prhyme.data.dictionary :as dict] + [com.owoga.prhyme.data.phonetics :as phonetics])) ;;; Typical rhyme model (explanation of following 3 functions) ;; @@ -48,51 +27,74 @@ (defn rimes [syllables] (->> syllables (map reverse) - (map #(first (u/take-through u/vowel %))) + (map #(first (u/take-through phonetics/vowel %))) (map reverse))) (defn onset+nucleus [syllables] (->> syllables - (map #(first (u/take-through u/vowel %))))) + (map #(first (u/take-through phonetics/vowel %))))) (defn nucleus [syllables] - (map #(list (last (first (u/take-through u/vowel %)))) syllables)) - -(defn make-word [word] - (let [syllables (s/syllabify (rest word)) - rimes (rimes syllables) - onsets (onset+nucleus syllables) - nuclei (nucleus syllables)] - (->> (->Word - (first word) - syllables - (count syllables) - rimes - onsets - nuclei) - (#(assoc % :norm-word (string/lower-case - (string/replace - (:word %) - #"\(\d+\)" - ""))))))) - -(defn make-word-1 [word phonemes] + (map #(list (last (first (u/take-through phonetics/vowel %)))) syllables)) + +(defn merge-phrase-words + "Given multiple `Word`, like the words for 'well off', create a single `Word` + that is syllabified as ('well' 'off') rather than as the combined ('weh' + 'loff'). Useful for finding single-word rhymes of multiple-word targets. + + An example: 'war on crime' -> 'turpentine'. + As opposed to: 'war on crime' -> 'caw fawn lime'." + [phrase phrase-words] + (loop [merged (first phrase-words) + phrase-words (rest phrase-words)] + (cond + (and (empty? phrase-words) (empty? merged)) nil + (empty? phrase-words) (assoc merged :word phrase) + :else (recur (-> merged + (assoc :syllables (concat (:syllables merged) + (:syllables (first phrase-words)))) + (assoc :syllable-count (+ (:syllable-count merged) + (:syllable-count (first phrase-words)))) + (assoc :rimes (concat (:rimes merged) + (:rimes (first phrase-words)))) + (assoc :onsets (concat (:onsets merged) + (:onsets (first phrase-words)))) + (assoc :nuclei (concat (:nuclei merged) + (:nuclei (first phrase-words))))) + (rest phrase-words))))) + +(defn cmu->prhyme [[word & phonemes]] (let [syllables (s/syllabify phonemes) - rimes (rimes syllables) - onsets (onset+nucleus syllables) - nuclei (nucleus syllables)] - (->> (->Word - (string/lower-case word) - syllables - (count syllables) - rimes - onsets - nuclei) - ;; CMU dict has multiple pronounciations for some words. - ;; foobar(1), foobar(2), etc... - ;; it's useful to have the normalized word for situations - ;; when you don't care how it's pronounced. - (#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" "")))))) + rimes (rimes syllables) + onsets (onset+nucleus syllables) + nuclei (nucleus syllables)] + {:word word + :syllables syllables + :syllable-count (count syllables) + :rimes rimes + :onsets onsets + :nuclei nuclei + :weight 1 + :normalized-word (-> word + string/lower-case + (string/replace #"\(\d+\)" ""))})) + +(defn phrase->word + "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word + that has the correct syllables, rimes, onsets, and nucleus. This way we can + rhyme against phrases that aren't in the dictionary, as long as the words that + make up the phrase are in the dictionary. Returns nil if the word is not in + the dictionary." + [words phrase] + (->> (string/split phrase #"[ -]") + (map (fn [phrase-word] + (let [word (first (filter (fn [word] + (= phrase-word (:norm-word word))) + words))] + (if (nil? word) + (cmu->prhyme (cons phrase-word (u/get-phones phrase-word))) + word)))) + (merge-phrase-words phrase))) (defn words-by-rime* [words] (let [words-with-rime (->> words @@ -101,7 +103,7 @@ (map #(map reverse %)) (map #(map (fn [syllable] - (first (u/take-through u/vowel syllable))) %)) + (first (u/take-through phonetics/vowel syllable))) %)) (map #(map reverse %)) (map reverse) (map #(cons %1 %2) (map first words)))] @@ -118,7 +120,7 @@ (cons val (:words existing))) (rest words))))))) -(def words-by-rime (words-by-rime* words)) +(def words-by-rime (words-by-rime* dict/cmu-dict)) (defn words-by-onset-nucleus* [words] (let [words-with-onset-nucleus (->> words @@ -126,7 +128,7 @@ (map s/syllabify) (map #(map (fn [syllable] - (first (u/take-through u/vowel syllable))) + (first (u/take-through phonetics/vowel syllable))) %)) (map #(cons %1 %2) (map first words)))] (loop [by-onset {} @@ -152,7 +154,7 @@ (fn [syllable] (list (last - (first (u/take-through u/vowel syllable))))) + (first (u/take-through phonetics/vowel syllable))))) %)) (map #(cons %1 %2) (map first words)))] (loop [by-nucleus {} @@ -198,7 +200,6 @@ (defn filter-to-syllable-count [n words] (filter (fn [word] (= n (count (s/syllabify (rest word))))) words)) - (defn rhymes? "What does it mean for something to rhyme?" [a b] diff --git a/src/com/owoga/prhyme/frp.clj b/src/com/owoga/prhyme/frp.clj index 9221181..315e6ea 100644 --- a/src/com/owoga/prhyme/frp.clj +++ b/src/com/owoga/prhyme/frp.clj @@ -2,6 +2,7 @@ (:require [clojure.java.io :as io] [clojure.string :as string] [clojure.set :as set] + [com.owoga.prhyme.data.thesaurus :refer [thesaurus]] [com.owoga.prhyme.core :as p] [com.owoga.prhyme.util :as u] [com.owoga.prhyme.syllabify :as s])) @@ -9,35 +10,11 @@ (def dictionary (line-seq (io/reader (io/resource "cmudict_SPHINX_40")))) -(def thesaurus - (->> (line-seq (io/reader (io/resource "mthesaur.txt"))) - (map #(string/split % #",")) - (map #(vector (first %) (rest %))) - (into {}))) - -(defrecord Word [word syllables syllable-count rimes onsets nuclei]) - -(defn make-word [word] - (let [syllables (s/syllabify (rest word)) - rimes (p/rimes syllables) - onsets (p/onset+nucleus syllables) - nuclei (p/nucleus syllables)] - (->> (->Word - (first word) - syllables - (count syllables) - rimes - onsets - nuclei) - (#(assoc % :norm-word (string/lower-case - (string/replace - (:word %) - #"\(\d+\)" - ""))))))) + (def words (->> dictionary (map u/prepare-word) - (map make-word))) + (map p/cmu->prhyme))) (def popular-dict (set (line-seq (io/reader (io/resource "popular.txt"))))) @@ -69,20 +46,6 @@ (:nuclei (first phrase-words))))) (rest phrase-words))))) -(defn phrase->word - "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word - that has the correct syllables, rimes, onsets, and nucleus. This way we can - rhyme against phrases that aren't in the dictionary, as long as the words that - make up the phrase are in the dictionary. Returns nil if the word is not in - the dictionary." - [words phrase] - (->> (string/split phrase #"[ -]") - (map (fn [phrase-word] - (first (filter (fn [word] - (= phrase-word (string/lower-case (:norm-word word)))) - words)))) - (merge-phrase-words phrase))) - (defn partition-word [word] (->> word (:syllables) @@ -155,20 +118,18 @@ (filter (fn [word] (some #(re-matches (re-pattern (str "(?i)" %)) (:word word)) synonyms)) words))) -(defn phrymo [dictionary phrase] - (phrase->word dictionary phrase)) - (comment - (->> (make-word ["foobar" "F" "UW" "B" "AA" "R"]) + (->> (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"]) (#(assoc % :rimes? true)) (prhyme words) (filter #(= (:syllable-count %) 2)) - (sort-by #(consecutive-matching - % - (make-word ["foobar" "F" "UW" "B" "AA" "R"]) - :rimes))) + (sort-by #(count + (consecutive-matching + % + (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"]) + :rimes)))) - (as-> (make-word ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word + (as-> (p/cmu->prhyme ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word (into word {:rimes? true}) (prhyme popular word) (mapcat #(matching-synonyms thesaurus % word) @@ -177,7 +138,7 @@ "distress" "corpse" "necrotic" "zombie" "coma" "monster"])) - (as-> (make-word ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word + (as-> (p/cmu->prhyme ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word (into word {:rimes? true}) (prhyme popular word) (mapcat #(matching-synonyms thesaurus % word) diff --git a/src/com/owoga/prhyme/gen.clj b/src/com/owoga/prhyme/gen.clj index 3a99f49..4be0bce 100644 --- a/src/com/owoga/prhyme/gen.clj +++ b/src/com/owoga/prhyme/gen.clj @@ -36,23 +36,6 @@ (:nuclei (first phrase-words))))) (rest phrase-words))))) -(defn phrase->word - "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word - that has the correct syllables, rimes, onsets, and nucleus. This way we can - rhyme against phrases that aren't in the dictionary, as long as the words that - make up the phrase are in the dictionary. Returns nil if the word is not in - the dictionary." - [words phrase] - (->> (string/split phrase #"[ -]") - (map (fn [phrase-word] - (let [word (first (filter (fn [word] - (= phrase-word (:norm-word word))) - words))] - (if (nil? word) - (frp/make-word (cons phrase-word (util/get-phones phrase-word))) - word)))) - (merge-phrase-words phrase))) - (defn adjust-for-markov [markov percent] (let [target-markov-n (count (first (first markov)))] @@ -121,31 +104,6 @@ target result])))))) -(comment - (let [markov-1-example - {'("dream") {"a" 1} - '("a") {"me" 1}} - markov-2-example - {'(nil nil) {"dream" 1} - '(nil "dream") {"a" 1} - '("dream" "a") {"me" 1} - '("a" "me") {"give" 1} - '("give" nil) {nil 1}} - result-a '() - result-b '({:norm-word "dream", - :weight 9.000000000000002, - :adjustment-for-markov 9.000000000000002}) - words [{:norm-word "dream" :weight 1} - {:norm-word "foo" :weight 1} - {:norm-word "a" :weight 1} - {:norm-word "me" :weight 1} - {:norm-word "give" :weight 1}] - adj (adjust-for-markov-with-boundaries markov-2-example 0.9)] - (adj [words 'target result-b])) - - ((adjust-for-markov-with-boundaries {'("foo" "bar") {}} 0.5) - ['() '() '("hi" "bye" "there")])) - (defn adjust-for-rimes [dictionary percent] (fn [[words target result]] diff --git a/src/com/owoga/prhyme/grammar.clj b/src/com/owoga/prhyme/grammar.clj deleted file mode 100644 index be87408..0000000 --- a/src/com/owoga/prhyme/grammar.clj +++ /dev/null @@ -1,38 +0,0 @@ -(ns com.owoga.prhyme.grammar) - -(def root-states - [{::tk/name :failed - ::tk/transitions [{::tk/on tk/_ ::tk/to :failed}]} - {::tk/name :object - ::tk/transitions [{::tk/on :adjectives ::tk/to :obj-adj} - {::tk/on :nouns ::tk/to :obj-noun} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]} - {::tk/name :obj-adj - ::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]} - {::tk/name :obj-noun - ::tk/transitions [{::tk/on :verbs ::tk/to :verbs} - {::tk/on :adverbs ::tk/to :adverbs} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]} - {::tk/name :verbs - ::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun} - {::tk/on :adjectives ::tk/to :subj-adj} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]} - {::tk/name :adverbs - ::tk/transitions [{::tk/on :verbs ::tk/to :verbs} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]} - {::tk/name :subj-noun - ::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun} - {::tk/on :adjectives ::tk/to :obj-adj} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]} - {::tk/name :subj-adj - ::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun} - {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}]) - -(def root-fsm - {::tk/states root-states - ::tk/action! (fn [{::tk/keys [signal action] :as fsm}] - (case signal - :failed (println "Failed! " signal " " action)) - fsm) - ::tk/state :object}) diff --git a/src/com/owoga/prhyme/lymeric.clj b/src/com/owoga/prhyme/lymeric.clj index bab3f99..458962b 100644 --- a/src/com/owoga/prhyme/lymeric.clj +++ b/src/com/owoga/prhyme/lymeric.clj @@ -2,7 +2,9 @@ (:require [com.owoga.prhyme.gen :as gen] [com.owoga.prhyme.generation.weighted-selection :as weighted-selection] [clojure.string :as string] + [com.owoga.prhyme.core :as prhyme] [com.owoga.prhyme.frp :as frp] + [com.owoga.prhyme.util :as util] [taoensso.nippy :as nippy] [clojure.java.io :as io])) @@ -21,15 +23,12 @@ banned-words (into #{} (->> result (map #(string/split % #" ")) (map #(last %)))) - adj (apply - comp - (remove - nil? - [(weighted-selection/adjust-for-markov - markov - 0.99) - (when (rhymes pattern) - (weighted-selection/adjust-for-tail-rhyme 0.99))])) + adj (util/comp-rnil + (weighted-selection/adjust-for-markov + markov + 0.99) + (when (rhymes pattern) + (weighted-selection/adjust-for-tail-rhyme 0.99))) rhyme (if (nil? (get rhymes pattern)) (gen/gen-sentence-with-syllable-count adj @@ -40,7 +39,7 @@ syllable-count (remove #(banned-words (:norm-word %)) base-words) - (frp/phrase->word frp/words (get rhymes pattern))))] + (prhyme/phrase->word frp/words (get rhymes pattern))))] (recur (rest scheme) (assoc rhymes pattern rhyme) (conj result rhyme))))))) @@ -55,7 +54,7 @@ (->> (gen/selection-seq (map #(assoc % :weight 1) frp/words) (weighted-selection/adjust-for-rhymes 0.99) - (frp/phrase->word frp/words "hi there my boy")) + (prhyme/phrase->word frp/words "hi there my boy")) (take 3)) ["bishop larch smitten us dwell" diff --git a/src/com/owoga/prhyme/syllabify.clj b/src/com/owoga/prhyme/syllabify.clj index e286ff4..9f8868f 100644 --- a/src/com/owoga/prhyme/syllabify.clj +++ b/src/com/owoga/prhyme/syllabify.clj @@ -1,5 +1,6 @@ (ns com.owoga.prhyme.syllabify - (:require [com.owoga.prhyme.util :as p])) + (:require [com.owoga.prhyme.data.phonetics :as phonetics] + [com.owoga.prhyme.util :as util])) ;; ER is not yet handled properly. ;; PARENTHESES is syllabified as ("P" "ER" "IH" "N") ("TH" "UH") ("S" "IY" "S") ;; Glides are also broken. "R OY AH L" gets syllabified as a single syllable. @@ -35,16 +36,16 @@ (def lax-vowels #{"EH" "IH" "AE" "AH" "UH"}) (defn sonority [phone] - (.indexOf sonority-hierarchy (p/phonemap phone))) + (.indexOf sonority-hierarchy (phonetics/phonemap phone))) (defn vowel? [phone] - (p/vowel phone)) + (phonetics/vowel phone)) (defn >sonorous [a b] (> (sonority a) (sonority b))) (defn slurp-rime [phones] - (let [splits (p/take-through vowel? phones)] + (let [splits (util/take-through vowel? phones)] [(first splits) (flatten (rest splits))])) (defn slurp-onset [phones] diff --git a/src/com/owoga/prhyme/util.clj b/src/com/owoga/prhyme/util.clj index 0abda38..0b0068f 100644 --- a/src/com/owoga/prhyme/util.clj +++ b/src/com/owoga/prhyme/util.clj @@ -1,41 +1,14 @@ (ns com.owoga.prhyme.util (:require [clojure.java.io :as io] [clojure.string :as string] - [clojure.set :as set] - [clojure.zip :as z]) - (:import (com.sun.speech.freetts.lexicon LetterToSoundImpl) - (com.sun.speech.freetts.en.us CMULexicon) - (java.io File))) + [clojure.set :as set]) + (:import (com.sun.speech.freetts.en.us CMULexicon))) (defn prepare-word "Splits whitespace-separated fields into a sequence." [line] (string/split line #"[\t ]")) -(def dictionary - (line-seq (io/reader (io/resource "cmudict_SPHINX_40")))) - -(def words (map prepare-word dictionary)) - -(def words-map - (into {} (map #(vector (string/lower-case (first %)) {:phonemes (rest %)}) words))) - -(def popular - (set (line-seq (io/reader (io/resource "popular.txt"))))) - -(def adverbs - (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt")))))) - -(def adjectives - (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt")))))) - -(def verbs - (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt")))))) - -(def nouns - (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt")))))) - - (CMULexicon. "cmulex" true) (def cmu-lexicon (CMULexicon/getInstance true)) @@ -54,24 +27,6 @@ (map convert-to-sphinx) (map string/upper-case))) -(def phonemap - (->> (io/reader (io/resource "cmudict-0.7b.phones")) - (line-seq) - (map #(string/split % #"\t")) - (into {}))) - -(def long-vowel #{"EY" "IY" "AY" "OW" "UW"}) - -(def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"}) - -(def vowel (set/union long-vowel short-vowel)) - -(def consonant (set/difference (into #{} (keys phonemap)) vowel)) - -(def syllable-end (set/union consonant long-vowel)) - -(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"}) - (defn window [n] (fn [coll] (cond @@ -80,6 +35,22 @@ :else (cons (take n coll) (lazy-seq ((window n) (rest coll))))))) +(defn comp-rnil + "Compose functions, ignoring nil values." + [& functions] + (apply comp (remove nil? functions))) + +(defn reduce-while + [pred f val coll] + (loop [val val + coll coll] + (cond + (empty? coll) val + (pred val) + (let [new-val (f val (first coll))] + (recur new-val (rest coll))) + :else val))) + (defn extend-coll [coll val n] (concat (repeat n val) coll @@ -96,12 +67,6 @@ (defn clean-text [text] (string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" ""))) -(defn english? [text] - (let [words (string/split text #"\s+") - english-words - (->> words (filter #(words-map (string/lower-case %))))] - (< 0.7 (/ (count english-words) (max 1 (count words)))))) - (defn padr [val n coll] (concat coll (repeat n val))) @@ -160,8 +125,6 @@ (defn count-pred [pred coll] (count (filter pred coll))) -(def count-vowels (partial count-pred vowel)) - (defn single? [coll] (= 1 (count coll))) (defn partitions diff --git a/src/com/owoga/prhyme/util/lovecraft.clj b/src/com/owoga/prhyme/util/lovecraft.clj deleted file mode 100644 index f6953d9..0000000 --- a/src/com/owoga/prhyme/util/lovecraft.clj +++ /dev/null @@ -1,639 +0,0 @@ -(ns com.owoga.prhyme.util.lovecraft - (:require [net.cgrand.enlive-html :as html] - [clojure.string :as string] - [com.owoga.prhyme.util.weighted-rand :as wr] - [com.owoga.prhyme.core :as prhyme] - [com.owoga.prhyme.util.nlp :as nlp] - [taoensso.tufte :as tufte :refer [defnp p profiled profile]] - [com.owoga.prhyme.frp :as frp] - [clojure.java.io :as io] - [clojure.set :as set])) - -(tufte/add-basic-println-handler! {}) - -(def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/") - -(def words-map - (into {} (map #(vector (string/lower-case (:word %)) %) frp/words))) - -(defn fetch-url [url] - (html/html-resource (java.net.URL. url))) - -(comment - (fetch-url *base-url*)) - -(defn links [] - (map - #(str *base-url* (first (html/attr-values % :href))) - (html/select - (fetch-url *base-url*) - [:li :> [:a (html/attr? :href)]]))) - -(defn contentful-sections [nodes] - (->> nodes - (map html/text) - (filter #(> (count %) 100)))) - -(defn text-from-link [link] - (->> (html/select - (fetch-url link) - [:body]) - (first) - (html/text) - ((fn [s] (string/replace s #"[\s\u00A0]+" " "))))) - -(defn cleanup [content] - (-> content - (string/replace #"Return to.*$" "") - (string/replace #"Home.*?This Site" "") - (string/replace #"[^a-zA-Z -]+" ""))) - -(defn tokens [content] - (string/split content #"\s+")) - -(defn append-to-file [filepath text] - (with-open [w (io/writer filepath :append true)] - (.write w text))) - -(defn scrape [] - (run! - (fn [link] - (->> (text-from-link link) - (cleanup) - (#(str % "\n")) - (append-to-file "lovecraft.txt"))) - (take 10 (links)))) - -(defn tokens-from-file [file] - (with-open [r (io/reader file)] - (tokens (slurp r)))) - -(defn window [n] - (fn [coll] - (cond - (empty? coll) [] - (< (count coll) n) [] - :else (cons (take n coll) - (lazy-seq ((window n) (drop n coll))))))) - -(defnp markov [tokens] - (->> tokens - (map - (fn [token] - (let [k (butlast token) - v (last token)] - [k v]))) - (reduce - (fn [a [k v]] - (update-in a [k v] (fnil inc 0))) - {}))) - -(defnp running-total - ([coll] - (running-total coll 0)) - ([coll last-val] - (cond - (empty? coll) nil - :else (cons (+ last-val (first coll)) - (lazy-seq - (running-total - (rest coll) - (+ last-val (first coll)))))))) - -(defnp weighted-rand [weights] - (let [running-weights (running-total weights) - rand-val (rand (last running-weights))] - (loop [i 0] - (if (> (nth running-weights i) rand-val) - i - (recur (inc i)))))) - -(defnp choose-from-markov-possibilities [possibilities] - (if (empty? possibilities) - nil - (let [weights (vals possibilities) - rng (wr/from-weights weights) - index (wr/nextr rng nil)] - (nth (keys possibilities) index)))) - -(def word-set (into #{} (->> prhyme/words - (map first) - (map string/lower-case) - (map #(string/replace % #"\(\d+\)" ""))))) - -(defn normalize-tokens [tokens] - (->> tokens - (map string/lower-case) - (filter word-set))) - -(defn main [] - (->> (tokens-from-file "lovecraft.txt") - (reverse) - (normalize-tokens) - ((window 2)) - (markov) - (into {}))) - -(defn make-markov-picker [markov-data] - (fn [k] - (choose-from-markov-possibilities - (get markov-data k {})))) - -(defn synonym? - "Given a possibility, like [\"foo\" 3] - which says that foo follows a particular key with - a weight of 3, a word is a synonym of that possibility - if the word is a synonym ." - [p synonyms] - (synonyms p)) - -(defnp adjust-for-synonyms - "If a word is in a set of synonyms, adjust its weight upwards." - [synonyms] - (fn [possibilities] - (reduce - (fn [p s] - (if (s p) - (update p s #(* 5 %)) - p)) - possibilities - synonyms))) - -(defnp adjust-for-rimes - [target-rime dictionary] - (fn [possibilities] - (into - {} - (map - (fn [[p v]] - (let [possibility (get dictionary p) - factor (count - (frp/consecutive-matching - target-rime - possibility - :rimes))] - [p (* v (max 1 (* factor 4)))])) - possibilities)))) - -(comment - ((adjust-for-synonyms #{"war" "famine"}) - {"war" 1 - "disease" 3}) - ;; => {"war" 5, "disease" 3} - ((adjust-for-rimes - (frp/make-word ["magic" "M" "AE" "JH" "IH" "K"]) - words-map) - {"tragic" 3 - "trick" 2 - "foo" 1}) - ;; => {"tragic" 24, "trick" 8, "foo" 1} - ) - -(defonce lovecraft-markov (read-string (slurp "lovecraft.edn"))) -(defonce markover (make-markov-picker lovecraft-markov)) - -(defn markov-key [key-fn] - (fn [text] - (key-fn text))) - -(defn gen-from [m p initial] - (loop [r (list initial)] - (cond - (p r) (recur (cons (m (list (first r))) r)) - :else r))) - - -(defn rhyming-words - "List of rhyming words sorted by quality of rhyme." - [target] - (let [target-phrase (->> target - (frp/phrase->word frp/words) - (#(assoc % :rimes? true)))] - (->> target-phrase - (#(assoc % :rimes? true)) - (frp/prhyme frp/words) - (sort-by - #(- (count - (frp/consecutive-matching - % - target-phrase - :rimes))))))) - -(defn markov-rhymes [markov-data rhyming-words] - (->> (map - (fn [word] - (->> word - :word - string/lower-case - (#(string/replace % #"\(\d+\)" "")) - (#(vector % (get markov-data (list %)))))) - rhyming-words) - (into #{}) - (remove - (fn [[w p]] - (nil? p))))) - -(defn markov-gen [markov-data initial] - (let [m (make-markov-picker markov-data)] - (loop [r initial] - (if (> (count r) 5) - r - (recur (cons (m (list (first r))) - r)))))) - -(defn make-rhymes [markov-data target] - (let [target-word (frp/phrase->word frp/words target) - rhyming-words (rhyming-words target) - markov--rhymes (markov-rhymes markov-data rhyming-words) - rime-adjuster (adjust-for-rimes target-word words-map) - modified-markov-data - (merge - markov-data - (into {} - (map (fn [[word weights]] - [word (rime-adjuster weights)]) - markov--rhymes)))] - (->> rhyming-words - (markov-rhymes modified-markov-data) - (map - (fn [[k v]] - (markov-gen modified-markov-data (list k)))) - (map #(remove nil? %))))) - -(defn adjust-for-over-syllables - "Adjust weights to prefer not going over the number - of syllables of the target word." - [target] - (fn [words] - (p :adjust-for-syllables - (map - (fn [word] - (if (or (nil? (:syllable-count word)) - (nil? (:syllables target))) - (println word target)) - (cond - (= (:syllable-count word) (count (:syllables target))) - (as-> word word - (assoc word :weight (* 3 (:weight word))) - (assoc word :adjusted-for-syllables-factor 3)) - - (< (:syllable-count word) (count (:syllables target))) - (as-> word word - (assoc word :weight (* 2 (:weight word))) - (assoc word :adjusted-for-syllables-factor 2)) - - :else - (as-> word word - (assoc word :weight (* 1 (:weight word))) - (assoc word :adjusted-for-syllables-factor 1)))) - words)))) - -(comment - (let [words (->> ["distort" "kiss" "sport"] - (map #(frp/phrase->word frp/words %)) - (map #(assoc % :weight 1))) - target (->> "report" - (frp/phrase->word frp/words) - (#(assoc % :syllables (:syllables %)))) - adjuster (adjust-for-over-syllables target)] - (adjuster words))) - -(defn adjust-for-rhymes - "Adjust weights to prefer words that rhyme" - [target] - (fn [words] - (p :adjust-for-rhymes - (map - (fn [word] - (let [factor (max 0.001 (count (frp/consecutive-matching word target :rimes)))] - (as-> word word - (assoc word :weight (* factor (:weight word))) - (assoc word :adjust-for-rhyme-factor factor)))) - words)))) - -(defn adjust-for-rhymes-1 - "Adjust weights to prefer words that rhyme" - [target percent] - (fn [words] - (let [ratio (/ percent (- 1 percent)) - [rhymes non-rhymes] - ((juxt filter remove) - (fn [word] - (< 0 (count (frp/consecutive-matching word target :rimes)))) - words) - weight-non-rhymes (apply + (map :weight non-rhymes)) - target-weight-rhymes (* ratio weight-non-rhymes) - count-rhymes (max 1 (count rhymes)) - adjustment-rhyme (/ target-weight-rhymes count-rhymes)] - (concat - non-rhymes - (map - (fn [rhyme] - (as-> rhyme rhyme - (assoc rhyme :weight (* adjustment-rhyme (:weight rhyme))) - (assoc rhyme :adjust-for-rhyme-factor adjustment-rhyme))) - rhymes))))) - -(defn adjust-for-membership-1 - [set_ percent] - (let [ratio (- 1 percent)] - (fn [words] - (let [[members non-members] - ((juxt filter remove) - #(set_ (:norm-word %)) - words) - weight-non-members (apply + (map :weight non-members)) - target-weight-members (* ratio weight-non-members) - count-members (count members) - adjustment-members (/ target-weight-members count-members)] - (concat - (map - (fn [member] - (as-> member member - (assoc member :weight (* adjustment-members (:weight member))) - (assoc member :adjustment-for-membership adjustment-members))) - members) - non-members))))) - -(comment - (let [words (->> ["distort" "kiss" "sport"] - (map #(frp/phrase->word frp/words %)) - (map #(assoc % :weight 1))) - target (->> "report" - (frp/phrase->word frp/words) - (#(assoc % :remaining-syllables (:syllables %)))) - rhyme-adjuster (adjust-for-rhymes target) - syllable-count-adjuster (adjust-for-over-syllables target)] - (syllable-count-adjuster (rhyme-adjuster words)))) - -(defn adjust-for-membership [set_] - (fn [words] - (map - (fn [word] - (if (set_ (:norm-word word)) - (as-> word word - (assoc word :weight (* 2 (:weight word))) - (assoc word :adjust-for-membership-factor 2)) - (assoc word :adjust-for-membership-factor 1))) - words))) - -(defn filter-for-membership [set_] - (fn [words] - (map - (fn [word] - (if-not (set_ (:norm-word word)) - (as-> word word - (assoc word :weight (* 0.01 (:weight word))) - (assoc word :filter-for-membership-factor 0.01)) - word)) - words))) - -(defn adjust-for-markov [markov-options] - (let [markov-set (into #{} (map first (keys markov-options)))] - (fn [words] - (let [result (map - (fn [word] - (if (markov-set (:norm-word word)) - (as-> word word - (assoc word :weight (* 100 (:weight word))) - (assoc word :adjust-for-markov-factor 100)) - (assoc word :adjust-for-markov-factor 1))) - words)] - result)))) - -(comment - (let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))] - (take 5 (markov-adjuster frp/words)))) -(defn adjust-for-membership-1 - [set_ percent] - (let [ratio (- 1 percent)] - (fn [words] - (let [[members non-members] - ((juxt filter remove) - #(set_ (:norm-word %)) - words) - weight-non-members (apply + (map :weight non-members)) - target-weight-members (* ratio weight-non-members) - count-members (count members) - adjustment-members (/ target-weight-members count-members)] - (concat - (map - (fn [member] - (as-> member member - (assoc member :weight (* adjustment-members (:weight member))) - (assoc member :adjustment-for-membership adjustment-members))) - members) - non-members))))) - -(defn adjust-for-markov-1 - [markov-options percent] - (let [ratio (- 1 percent)] - (fn [words] - (if (nil? markov-options) - words - (let [[markovs non-markovs] - ((juxt filter remove) - #(markov-options (:norm-word %)) - words) - weight-non-markovs (apply + (map :weight non-markovs)) - target-weight-markovs (* ratio weight-non-markovs) - count-markovs (count markovs) - adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))] - (concat - (map - (fn [markov] - (as-> markov markov - (assoc markov :weight (* adjustment-markovs (:weight markov))) - (assoc markov :adjustment-for-markov adjustment-markovs))) - markovs) - non-markovs)))))) - -(defn e-prhyme - "2020-10-21 iteration" - [words markov target stop?] - (let [target (assoc target :original-syllables (:syllables target)) - words (map #(assoc % :weight 1) words)] - (loop [target target - result '() - sentinel 0] - (if (or (stop? target result) - (> sentinel 5)) - result - (let [markov-options (markov (list (:norm-word (first result)))) - markov-adjuster (adjust-for-markov-1 markov-options 0.9) - syllable-count-adjuster (adjust-for-over-syllables target) - rhyme-adjuster (adjust-for-rhymes-1 target 0.9) - lovecraft-set (into #{} (map (comp first first) lovecraft-markov)) - lovecraft-filter (adjust-for-membership-1 lovecraft-set 0.9) - adjust (comp rhyme-adjuster - syllable-count-adjuster - markov-adjuster - lovecraft-filter) - weighted-words (p :adjust - (->> (adjust words) - (remove #(= 0 (:weight %))))) - rng (p :from-weights (wr/from-weights (map :weight weighted-words))) - index (p :nextr (wr/nextr rng nil)) - selection (nth weighted-words index) - new-target (->> target - (#(assoc % :syllables (drop-last - (:syllable-count - selection) - (:syllables - target)))) - (#(assoc % :rimes (prhyme/rimes (:syllables %)))) - (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %)))) - (#(assoc % :nuclei (prhyme/nucleus (:syllables %))))) - result (cons selection result)] - (recur new-target result (inc sentinel))))))) - -(def words (map #(assoc % :weight 1) frp/words)) - -(defn main [poem-lines] - (map - (fn [line] - (let [orig-target (frp/phrase->word frp/words line)] - (e-prhyme - frp/popular - lovecraft-markov - orig-target - (fn [target result] - (<= (count (:syllables orig-target)) - (apply + (map :syllable-count result))))))) - poem-lines)) - -(defn rhymer [words markov target stop] - (cons (e-prhyme - words - markov - target - stop) - (lazy-seq (rhymer words markov target stop)))) - -(defn stop [target] - (fn [inner-target result] - (<= (count (:syllables target)) - (apply + (map :syllable-count result))))) - -(defn sentence-stop [target] - (fn [inner-target result] - (let [result-sentence (string/join " " (map :norm-word result))] - (when-not (empty? result) - (or (nlp/valid-sentence? result-sentence) - (< (:syllable-count target) - (apply + (map :syllable-count result))) - (< 5 (count result))))))) - -(comment - (let [phrase (frp/phrase->word frp/words "i solemnly swear i am up to no good") - r (rhymer - frp/popular - lovecraft-markov - phrase - (sentence-stop phrase))] - (take 5 (map #(string/join " " (map :norm-word %)) - (filter #(nlp/valid-sentence? (string/join " " (map :norm-word %))) r)))) - - (let [poem-lines ["mister sandman" - "give me a dream" - "make him the cutest" - "that i've ever seen" - "give him two lips" - "like roses in clover" - "please tell me that" - "these lonesome nights are over"]] - (map (fn [line] (map :norm-word line)) (main poem-lines))) - - (let [orig-target (frp/phrase->word frp/words "mister sandman give me a dream")] - (repeatedly - 10 - (fn [] - (e-prhyme - frp/popular - lovecraft-markov - orig-target - (fn [target result] - (<= (count (:syllables orig-target)) - (apply + (map :syllable-count result))))))))) - -(comment - (frp/phrase->word frp/words "distort bad man") - (repeatedly 10 #(make-rhymes lovecraft-markov "bad man")) - - (rhyming-words "magic beam") - ((make-markov-picker lovecraft-markov) '("no")) - (markov-gen lovecraft-markov '("world")) - (interleave - (->> "your eyes" - (make-rhymes lovecraft-markov) - (map - (fn [[k v]] - (markov-gen lovecraft-markov (list k))))) - (->> "pretty" - (make-rhymes lovecraft-markov) - (map - (fn [[k v]] - (markov-gen lovecraft-markov (list k)))) - (remove nil?))) - - (frp/phrase->word frp/words "well-off") - (frp/prhyme frp/words (assoc (words-map "well") :rimes? true)) - ) -(defn ghost - "Rhyme a phrase with markov" - [words word] - (let [rhymes (frp/prhyme words word) - norm-rhyme-words (->> rhymes - (map :word) - (map string/lower-case) - (map #(string/replace % #"\(\d+\)" "")) - (into #{}) - (filter #(get lovecraft-markov (list %)))) - keyer (markov-key #(list (first (string/split % #"\s"))))] - (->> norm-rhyme-words - (map (fn [w] - (gen-from markover #(< (count %) 5) w)))))) - -(comment - (take 10 lovecraft-markov) - (ghost frp/words (assoc (frp/make-word ["dream" "D" "R" "IY" "M"]) - :rimes? - true))) - -(comment - (->> (frp/make-word ["dream" "D" "R" "IY" "M"]) - (#(assoc % :rimes? true)) - (frp/prhyme frp/words) - (take 10)) - - - (->> (main) - (#(spit "lovecraft.edn" (pr-str %)))) - - (let [t (read-string (slurp "lovecraft.edn"))] - (take 20 t)) - ) - -(comment - (->> (tokens-from-file "lovecraft.txt") - (reverse) - (normalize-tokens) - ((window 2)) - (markov) - (take 10) - (into {}) - (#(get % '("away"))) - (choose-from-markov-possibilities)) - - (markov [["boy" "good"] ["the" "over"] - ["ran" "he"] ["walked" "he"] - ["walked" "he"] ["walked" "she"]]) - (tokens-from-file "lovecraft.txt") - (scrape) - (def test-links (take 3 (links))) - (->> (text-from-link (first test-links)) - (cleanup)) - (->> (text-from-link (first test-links)) - (append-to-file "test.txt" "hi")) - (take 3 (html/select (fetch-url (first test-links)) [:body])) - )