Eric Ihli 4 years ago
parent 0998df73b6
commit 1cb959010c

@ -1,31 +1,10 @@
(ns com.owoga.prhyme.core (ns com.owoga.prhyme.core
(:require [clojure.java.io :as io] (:require [clojure.string :as string]
[clojure.string :as string]
[clojure.set :as set] [clojure.set :as set]
[com.owoga.prhyme.util :as u] [com.owoga.prhyme.util :as u]
[com.owoga.prhyme.syllabify :as s])) [com.owoga.prhyme.syllabify :as s]
[com.owoga.prhyme.data.dictionary :as dict]
(def dictionary [com.owoga.prhyme.data.phonetics :as phonetics]))
(line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
(def words (map u/prepare-word dictionary))
(def popular
(set (line-seq (io/reader (io/resource "popular.txt")))))
(def adverbs
(set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
(def adjectives
(set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
(def verbs
(set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
(def nouns
(set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
(defrecord Word [word syllables syllable-count rimes onsets nuclei])
;;; Typical rhyme model (explanation of following 3 functions) ;;; Typical rhyme model (explanation of following 3 functions)
;; ;;
@ -48,51 +27,74 @@
(defn rimes [syllables] (defn rimes [syllables]
(->> syllables (->> syllables
(map reverse) (map reverse)
(map #(first (u/take-through u/vowel %))) (map #(first (u/take-through phonetics/vowel %)))
(map reverse))) (map reverse)))
(defn onset+nucleus [syllables] (defn onset+nucleus [syllables]
(->> syllables (->> syllables
(map #(first (u/take-through u/vowel %))))) (map #(first (u/take-through phonetics/vowel %)))))
(defn nucleus [syllables] (defn nucleus [syllables]
(map #(list (last (first (u/take-through u/vowel %)))) syllables)) (map #(list (last (first (u/take-through phonetics/vowel %)))) syllables))
(defn make-word [word] (defn merge-phrase-words
(let [syllables (s/syllabify (rest word)) "Given multiple `Word`, like the words for 'well off', create a single `Word`
rimes (rimes syllables) that is syllabified as ('well' 'off') rather than as the combined ('weh'
onsets (onset+nucleus syllables) 'loff'). Useful for finding single-word rhymes of multiple-word targets.
nuclei (nucleus syllables)]
(->> (->Word An example: 'war on crime' -> 'turpentine'.
(first word) As opposed to: 'war on crime' -> 'caw fawn lime'."
syllables [phrase phrase-words]
(count syllables) (loop [merged (first phrase-words)
rimes phrase-words (rest phrase-words)]
onsets (cond
nuclei) (and (empty? phrase-words) (empty? merged)) nil
(#(assoc % :norm-word (string/lower-case (empty? phrase-words) (assoc merged :word phrase)
(string/replace :else (recur (-> merged
(:word %) (assoc :syllables (concat (:syllables merged)
#"\(\d+\)" (:syllables (first phrase-words))))
""))))))) (assoc :syllable-count (+ (:syllable-count merged)
(:syllable-count (first phrase-words))))
(defn make-word-1 [word phonemes] (assoc :rimes (concat (:rimes merged)
(:rimes (first phrase-words))))
(assoc :onsets (concat (:onsets merged)
(:onsets (first phrase-words))))
(assoc :nuclei (concat (:nuclei merged)
(:nuclei (first phrase-words)))))
(rest phrase-words)))))
(defn cmu->prhyme [[word & phonemes]]
(let [syllables (s/syllabify phonemes) (let [syllables (s/syllabify phonemes)
rimes (rimes syllables) rimes (rimes syllables)
onsets (onset+nucleus syllables) onsets (onset+nucleus syllables)
nuclei (nucleus syllables)] nuclei (nucleus syllables)]
(->> (->Word {:word word
(string/lower-case word) :syllables syllables
syllables :syllable-count (count syllables)
(count syllables) :rimes rimes
rimes :onsets onsets
onsets :nuclei nuclei
nuclei) :weight 1
;; CMU dict has multiple pronounciations for some words. :normalized-word (-> word
;; foobar(1), foobar(2), etc... string/lower-case
;; it's useful to have the normalized word for situations (string/replace #"\(\d+\)" ""))}))
;; when you don't care how it's pronounced.
(#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" "")))))) (defn phrase->word
"Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
that has the correct syllables, rimes, onsets, and nucleus. This way we can
rhyme against phrases that aren't in the dictionary, as long as the words that
make up the phrase are in the dictionary. Returns nil if the word is not in
the dictionary."
[words phrase]
(->> (string/split phrase #"[ -]")
(map (fn [phrase-word]
(let [word (first (filter (fn [word]
(= phrase-word (:norm-word word)))
words))]
(if (nil? word)
(cmu->prhyme (cons phrase-word (u/get-phones phrase-word)))
word))))
(merge-phrase-words phrase)))
(defn words-by-rime* [words] (defn words-by-rime* [words]
(let [words-with-rime (->> words (let [words-with-rime (->> words
@ -101,7 +103,7 @@
(map #(map reverse %)) (map #(map reverse %))
(map #(map (map #(map
(fn [syllable] (fn [syllable]
(first (u/take-through u/vowel syllable))) %)) (first (u/take-through phonetics/vowel syllable))) %))
(map #(map reverse %)) (map #(map reverse %))
(map reverse) (map reverse)
(map #(cons %1 %2) (map first words)))] (map #(cons %1 %2) (map first words)))]
@ -118,7 +120,7 @@
(cons val (:words existing))) (cons val (:words existing)))
(rest words))))))) (rest words)))))))
(def words-by-rime (words-by-rime* words)) (def words-by-rime (words-by-rime* dict/cmu-dict))
(defn words-by-onset-nucleus* [words] (defn words-by-onset-nucleus* [words]
(let [words-with-onset-nucleus (->> words (let [words-with-onset-nucleus (->> words
@ -126,7 +128,7 @@
(map s/syllabify) (map s/syllabify)
(map #(map (map #(map
(fn [syllable] (fn [syllable]
(first (u/take-through u/vowel syllable))) (first (u/take-through phonetics/vowel syllable)))
%)) %))
(map #(cons %1 %2) (map first words)))] (map #(cons %1 %2) (map first words)))]
(loop [by-onset {} (loop [by-onset {}
@ -152,7 +154,7 @@
(fn [syllable] (fn [syllable]
(list (list
(last (last
(first (u/take-through u/vowel syllable))))) (first (u/take-through phonetics/vowel syllable)))))
%)) %))
(map #(cons %1 %2) (map first words)))] (map #(cons %1 %2) (map first words)))]
(loop [by-nucleus {} (loop [by-nucleus {}
@ -198,7 +200,6 @@
(defn filter-to-syllable-count [n words] (defn filter-to-syllable-count [n words]
(filter (fn [word] (= n (count (s/syllabify (rest word))))) words)) (filter (fn [word] (= n (count (s/syllabify (rest word))))) words))
(defn rhymes? (defn rhymes?
"What does it mean for something to rhyme?" "What does it mean for something to rhyme?"
[a b] [a b]

@ -2,6 +2,7 @@
(:require [clojure.java.io :as io] (:require [clojure.java.io :as io]
[clojure.string :as string] [clojure.string :as string]
[clojure.set :as set] [clojure.set :as set]
[com.owoga.prhyme.data.thesaurus :refer [thesaurus]]
[com.owoga.prhyme.core :as p] [com.owoga.prhyme.core :as p]
[com.owoga.prhyme.util :as u] [com.owoga.prhyme.util :as u]
[com.owoga.prhyme.syllabify :as s])) [com.owoga.prhyme.syllabify :as s]))
@ -9,35 +10,11 @@
(def dictionary (def dictionary
(line-seq (io/reader (io/resource "cmudict_SPHINX_40")))) (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
(def thesaurus
(->> (line-seq (io/reader (io/resource "mthesaur.txt")))
(map #(string/split % #","))
(map #(vector (first %) (rest %)))
(into {})))
(defrecord Word [word syllables syllable-count rimes onsets nuclei])
(defn make-word [word]
(let [syllables (s/syllabify (rest word))
rimes (p/rimes syllables)
onsets (p/onset+nucleus syllables)
nuclei (p/nucleus syllables)]
(->> (->Word
(first word)
syllables
(count syllables)
rimes
onsets
nuclei)
(#(assoc % :norm-word (string/lower-case
(string/replace
(:word %)
#"\(\d+\)"
"")))))))
(def words (->> dictionary (def words (->> dictionary
(map u/prepare-word) (map u/prepare-word)
(map make-word))) (map p/cmu->prhyme)))
(def popular-dict (def popular-dict
(set (line-seq (io/reader (io/resource "popular.txt"))))) (set (line-seq (io/reader (io/resource "popular.txt")))))
@ -69,20 +46,6 @@
(:nuclei (first phrase-words))))) (:nuclei (first phrase-words)))))
(rest phrase-words))))) (rest phrase-words)))))
(defn phrase->word
"Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
that has the correct syllables, rimes, onsets, and nucleus. This way we can
rhyme against phrases that aren't in the dictionary, as long as the words that
make up the phrase are in the dictionary. Returns nil if the word is not in
the dictionary."
[words phrase]
(->> (string/split phrase #"[ -]")
(map (fn [phrase-word]
(first (filter (fn [word]
(= phrase-word (string/lower-case (:norm-word word))))
words))))
(merge-phrase-words phrase)))
(defn partition-word [word] (defn partition-word [word]
(->> word (->> word
(:syllables) (:syllables)
@ -155,20 +118,18 @@
(filter (fn [word] (some #(re-matches (re-pattern (str "(?i)" %)) (:word word)) synonyms)) (filter (fn [word] (some #(re-matches (re-pattern (str "(?i)" %)) (:word word)) synonyms))
words))) words)))
(defn phrymo [dictionary phrase]
(phrase->word dictionary phrase))
(comment (comment
(->> (make-word ["foobar" "F" "UW" "B" "AA" "R"]) (->> (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"])
(#(assoc % :rimes? true)) (#(assoc % :rimes? true))
(prhyme words) (prhyme words)
(filter #(= (:syllable-count %) 2)) (filter #(= (:syllable-count %) 2))
(sort-by #(consecutive-matching (sort-by #(count
% (consecutive-matching
(make-word ["foobar" "F" "UW" "B" "AA" "R"]) %
:rimes))) (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"])
:rimes))))
(as-> (make-word ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word (as-> (p/cmu->prhyme ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word
(into word {:rimes? true}) (into word {:rimes? true})
(prhyme popular word) (prhyme popular word)
(mapcat #(matching-synonyms thesaurus % word) (mapcat #(matching-synonyms thesaurus % word)
@ -177,7 +138,7 @@
"distress" "corpse" "necrotic" "zombie" "distress" "corpse" "necrotic" "zombie"
"coma" "monster"])) "coma" "monster"]))
(as-> (make-word ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word (as-> (p/cmu->prhyme ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word
(into word {:rimes? true}) (into word {:rimes? true})
(prhyme popular word) (prhyme popular word)
(mapcat #(matching-synonyms thesaurus % word) (mapcat #(matching-synonyms thesaurus % word)

@ -36,23 +36,6 @@
(:nuclei (first phrase-words))))) (:nuclei (first phrase-words)))))
(rest phrase-words))))) (rest phrase-words)))))
(defn phrase->word
"Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
that has the correct syllables, rimes, onsets, and nucleus. This way we can
rhyme against phrases that aren't in the dictionary, as long as the words that
make up the phrase are in the dictionary. Returns nil if the word is not in
the dictionary."
[words phrase]
(->> (string/split phrase #"[ -]")
(map (fn [phrase-word]
(let [word (first (filter (fn [word]
(= phrase-word (:norm-word word)))
words))]
(if (nil? word)
(frp/make-word (cons phrase-word (util/get-phones phrase-word)))
word))))
(merge-phrase-words phrase)))
(defn adjust-for-markov (defn adjust-for-markov
[markov percent] [markov percent]
(let [target-markov-n (count (first (first markov)))] (let [target-markov-n (count (first (first markov)))]
@ -121,31 +104,6 @@
target target
result])))))) result]))))))
(comment
(let [markov-1-example
{'("dream") {"a" 1}
'("a") {"me" 1}}
markov-2-example
{'(nil nil) {"dream" 1}
'(nil "dream") {"a" 1}
'("dream" "a") {"me" 1}
'("a" "me") {"give" 1}
'("give" nil) {nil 1}}
result-a '()
result-b '({:norm-word "dream",
:weight 9.000000000000002,
:adjustment-for-markov 9.000000000000002})
words [{:norm-word "dream" :weight 1}
{:norm-word "foo" :weight 1}
{:norm-word "a" :weight 1}
{:norm-word "me" :weight 1}
{:norm-word "give" :weight 1}]
adj (adjust-for-markov-with-boundaries markov-2-example 0.9)]
(adj [words 'target result-b]))
((adjust-for-markov-with-boundaries {'("foo" "bar") {}} 0.5)
['() '() '("hi" "bye" "there")]))
(defn adjust-for-rimes (defn adjust-for-rimes
[dictionary percent] [dictionary percent]
(fn [[words target result]] (fn [[words target result]]

@ -1,38 +0,0 @@
(ns com.owoga.prhyme.grammar)
(def root-states
[{::tk/name :failed
::tk/transitions [{::tk/on tk/_ ::tk/to :failed}]}
{::tk/name :object
::tk/transitions [{::tk/on :adjectives ::tk/to :obj-adj}
{::tk/on :nouns ::tk/to :obj-noun}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
{::tk/name :obj-adj
::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
{::tk/name :obj-noun
::tk/transitions [{::tk/on :verbs ::tk/to :verbs}
{::tk/on :adverbs ::tk/to :adverbs}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
{::tk/name :verbs
::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun}
{::tk/on :adjectives ::tk/to :subj-adj}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
{::tk/name :adverbs
::tk/transitions [{::tk/on :verbs ::tk/to :verbs}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
{::tk/name :subj-noun
::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun}
{::tk/on :adjectives ::tk/to :obj-adj}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
{::tk/name :subj-adj
::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun}
{::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}])
(def root-fsm
{::tk/states root-states
::tk/action! (fn [{::tk/keys [signal action] :as fsm}]
(case signal
:failed (println "Failed! " signal " " action))
fsm)
::tk/state :object})

@ -2,7 +2,9 @@
(:require [com.owoga.prhyme.gen :as gen] (:require [com.owoga.prhyme.gen :as gen]
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection] [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
[clojure.string :as string] [clojure.string :as string]
[com.owoga.prhyme.core :as prhyme]
[com.owoga.prhyme.frp :as frp] [com.owoga.prhyme.frp :as frp]
[com.owoga.prhyme.util :as util]
[taoensso.nippy :as nippy] [taoensso.nippy :as nippy]
[clojure.java.io :as io])) [clojure.java.io :as io]))
@ -21,15 +23,12 @@
banned-words (into #{} (->> result banned-words (into #{} (->> result
(map #(string/split % #" ")) (map #(string/split % #" "))
(map #(last %)))) (map #(last %))))
adj (apply adj (util/comp-rnil
comp (weighted-selection/adjust-for-markov
(remove markov
nil? 0.99)
[(weighted-selection/adjust-for-markov (when (rhymes pattern)
markov (weighted-selection/adjust-for-tail-rhyme 0.99)))
0.99)
(when (rhymes pattern)
(weighted-selection/adjust-for-tail-rhyme 0.99))]))
rhyme (if (nil? (get rhymes pattern)) rhyme (if (nil? (get rhymes pattern))
(gen/gen-sentence-with-syllable-count (gen/gen-sentence-with-syllable-count
adj adj
@ -40,7 +39,7 @@
syllable-count syllable-count
(remove #(banned-words (:norm-word %)) (remove #(banned-words (:norm-word %))
base-words) base-words)
(frp/phrase->word frp/words (get rhymes pattern))))] (prhyme/phrase->word frp/words (get rhymes pattern))))]
(recur (rest scheme) (recur (rest scheme)
(assoc rhymes pattern rhyme) (assoc rhymes pattern rhyme)
(conj result rhyme))))))) (conj result rhyme)))))))
@ -55,7 +54,7 @@
(->> (gen/selection-seq (->> (gen/selection-seq
(map #(assoc % :weight 1) frp/words) (map #(assoc % :weight 1) frp/words)
(weighted-selection/adjust-for-rhymes 0.99) (weighted-selection/adjust-for-rhymes 0.99)
(frp/phrase->word frp/words "hi there my boy")) (prhyme/phrase->word frp/words "hi there my boy"))
(take 3)) (take 3))
["bishop larch smitten us dwell" ["bishop larch smitten us dwell"

@ -1,5 +1,6 @@
(ns com.owoga.prhyme.syllabify (ns com.owoga.prhyme.syllabify
(:require [com.owoga.prhyme.util :as p])) (:require [com.owoga.prhyme.data.phonetics :as phonetics]
[com.owoga.prhyme.util :as util]))
;; ER is not yet handled properly. ;; ER is not yet handled properly.
;; PARENTHESES is syllabified as ("P" "ER" "IH" "N") ("TH" "UH") ("S" "IY" "S") ;; PARENTHESES is syllabified as ("P" "ER" "IH" "N") ("TH" "UH") ("S" "IY" "S")
;; Glides are also broken. "R OY AH L" gets syllabified as a single syllable. ;; Glides are also broken. "R OY AH L" gets syllabified as a single syllable.
@ -35,16 +36,16 @@
(def lax-vowels #{"EH" "IH" "AE" "AH" "UH"}) (def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
(defn sonority [phone] (defn sonority [phone]
(.indexOf sonority-hierarchy (p/phonemap phone))) (.indexOf sonority-hierarchy (phonetics/phonemap phone)))
(defn vowel? [phone] (defn vowel? [phone]
(p/vowel phone)) (phonetics/vowel phone))
(defn >sonorous [a b] (defn >sonorous [a b]
(> (sonority a) (sonority b))) (> (sonority a) (sonority b)))
(defn slurp-rime [phones] (defn slurp-rime [phones]
(let [splits (p/take-through vowel? phones)] (let [splits (util/take-through vowel? phones)]
[(first splits) (flatten (rest splits))])) [(first splits) (flatten (rest splits))]))
(defn slurp-onset [phones] (defn slurp-onset [phones]

@ -1,41 +1,14 @@
(ns com.owoga.prhyme.util (ns com.owoga.prhyme.util
(:require [clojure.java.io :as io] (:require [clojure.java.io :as io]
[clojure.string :as string] [clojure.string :as string]
[clojure.set :as set] [clojure.set :as set])
[clojure.zip :as z]) (:import (com.sun.speech.freetts.en.us CMULexicon)))
(:import (com.sun.speech.freetts.lexicon LetterToSoundImpl)
(com.sun.speech.freetts.en.us CMULexicon)
(java.io File)))
(defn prepare-word (defn prepare-word
"Splits whitespace-separated fields into a sequence." "Splits whitespace-separated fields into a sequence."
[line] [line]
(string/split line #"[\t ]")) (string/split line #"[\t ]"))
(def dictionary
(line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
(def words (map prepare-word dictionary))
(def words-map
(into {} (map #(vector (string/lower-case (first %)) {:phonemes (rest %)}) words)))
(def popular
(set (line-seq (io/reader (io/resource "popular.txt")))))
(def adverbs
(set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
(def adjectives
(set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
(def verbs
(set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
(def nouns
(set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
(CMULexicon. "cmulex" true) (CMULexicon. "cmulex" true)
(def cmu-lexicon (CMULexicon/getInstance true)) (def cmu-lexicon (CMULexicon/getInstance true))
@ -54,24 +27,6 @@
(map convert-to-sphinx) (map convert-to-sphinx)
(map string/upper-case))) (map string/upper-case)))
(def phonemap
(->> (io/reader (io/resource "cmudict-0.7b.phones"))
(line-seq)
(map #(string/split % #"\t"))
(into {})))
(def long-vowel #{"EY" "IY" "AY" "OW" "UW"})
(def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"})
(def vowel (set/union long-vowel short-vowel))
(def consonant (set/difference (into #{} (keys phonemap)) vowel))
(def syllable-end (set/union consonant long-vowel))
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
(defn window [n] (defn window [n]
(fn [coll] (fn [coll]
(cond (cond
@ -80,6 +35,22 @@
:else (cons (take n coll) :else (cons (take n coll)
(lazy-seq ((window n) (rest coll))))))) (lazy-seq ((window n) (rest coll)))))))
(defn comp-rnil
"Compose functions, ignoring nil values."
[& functions]
(apply comp (remove nil? functions)))
(defn reduce-while
[pred f val coll]
(loop [val val
coll coll]
(cond
(empty? coll) val
(pred val)
(let [new-val (f val (first coll))]
(recur new-val (rest coll)))
:else val)))
(defn extend-coll [coll val n] (defn extend-coll [coll val n]
(concat (repeat n val) (concat (repeat n val)
coll coll
@ -96,12 +67,6 @@
(defn clean-text [text] (defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" ""))) (string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn english? [text]
(let [words (string/split text #"\s+")
english-words
(->> words (filter #(words-map (string/lower-case %))))]
(< 0.7 (/ (count english-words) (max 1 (count words))))))
(defn padr [val n coll] (defn padr [val n coll]
(concat coll (repeat n val))) (concat coll (repeat n val)))
@ -160,8 +125,6 @@
(defn count-pred [pred coll] (defn count-pred [pred coll]
(count (filter pred coll))) (count (filter pred coll)))
(def count-vowels (partial count-pred vowel))
(defn single? [coll] (= 1 (count coll))) (defn single? [coll] (= 1 (count coll)))
(defn partitions (defn partitions

@ -1,639 +0,0 @@
(ns com.owoga.prhyme.util.lovecraft
(:require [net.cgrand.enlive-html :as html]
[clojure.string :as string]
[com.owoga.prhyme.util.weighted-rand :as wr]
[com.owoga.prhyme.core :as prhyme]
[com.owoga.prhyme.util.nlp :as nlp]
[taoensso.tufte :as tufte :refer [defnp p profiled profile]]
[com.owoga.prhyme.frp :as frp]
[clojure.java.io :as io]
[clojure.set :as set]))
(tufte/add-basic-println-handler! {})
(def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/")
(def words-map
(into {} (map #(vector (string/lower-case (:word %)) %) frp/words)))
(defn fetch-url [url]
(html/html-resource (java.net.URL. url)))
(comment
(fetch-url *base-url*))
(defn links []
(map
#(str *base-url* (first (html/attr-values % :href)))
(html/select
(fetch-url *base-url*)
[:li :> [:a (html/attr? :href)]])))
(defn contentful-sections [nodes]
(->> nodes
(map html/text)
(filter #(> (count %) 100))))
(defn text-from-link [link]
(->> (html/select
(fetch-url link)
[:body])
(first)
(html/text)
((fn [s] (string/replace s #"[\s\u00A0]+" " ")))))
(defn cleanup [content]
(-> content
(string/replace #"Return to.*$" "")
(string/replace #"Home.*?This Site" "")
(string/replace #"[^a-zA-Z -]+" "")))
(defn tokens [content]
(string/split content #"\s+"))
(defn append-to-file [filepath text]
(with-open [w (io/writer filepath :append true)]
(.write w text)))
(defn scrape []
(run!
(fn [link]
(->> (text-from-link link)
(cleanup)
(#(str % "\n"))
(append-to-file "lovecraft.txt")))
(take 10 (links))))
(defn tokens-from-file [file]
(with-open [r (io/reader file)]
(tokens (slurp r))))
(defn window [n]
(fn [coll]
(cond
(empty? coll) []
(< (count coll) n) []
:else (cons (take n coll)
(lazy-seq ((window n) (drop n coll)))))))
(defnp markov [tokens]
(->> tokens
(map
(fn [token]
(let [k (butlast token)
v (last token)]
[k v])))
(reduce
(fn [a [k v]]
(update-in a [k v] (fnil inc 0)))
{})))
(defnp running-total
([coll]
(running-total coll 0))
([coll last-val]
(cond
(empty? coll) nil
:else (cons (+ last-val (first coll))
(lazy-seq
(running-total
(rest coll)
(+ last-val (first coll))))))))
(defnp weighted-rand [weights]
(let [running-weights (running-total weights)
rand-val (rand (last running-weights))]
(loop [i 0]
(if (> (nth running-weights i) rand-val)
i
(recur (inc i))))))
(defnp choose-from-markov-possibilities [possibilities]
(if (empty? possibilities)
nil
(let [weights (vals possibilities)
rng (wr/from-weights weights)
index (wr/nextr rng nil)]
(nth (keys possibilities) index))))
(def word-set (into #{} (->> prhyme/words
(map first)
(map string/lower-case)
(map #(string/replace % #"\(\d+\)" "")))))
(defn normalize-tokens [tokens]
(->> tokens
(map string/lower-case)
(filter word-set)))
(defn main []
(->> (tokens-from-file "lovecraft.txt")
(reverse)
(normalize-tokens)
((window 2))
(markov)
(into {})))
(defn make-markov-picker [markov-data]
(fn [k]
(choose-from-markov-possibilities
(get markov-data k {}))))
(defn synonym?
"Given a possibility, like [\"foo\" 3]
which says that foo follows a particular key with
a weight of 3, a word is a synonym of that possibility
if the word is a synonym ."
[p synonyms]
(synonyms p))
(defnp adjust-for-synonyms
"If a word is in a set of synonyms, adjust its weight upwards."
[synonyms]
(fn [possibilities]
(reduce
(fn [p s]
(if (s p)
(update p s #(* 5 %))
p))
possibilities
synonyms)))
(defnp adjust-for-rimes
[target-rime dictionary]
(fn [possibilities]
(into
{}
(map
(fn [[p v]]
(let [possibility (get dictionary p)
factor (count
(frp/consecutive-matching
target-rime
possibility
:rimes))]
[p (* v (max 1 (* factor 4)))]))
possibilities))))
(comment
((adjust-for-synonyms #{"war" "famine"})
{"war" 1
"disease" 3})
;; => {"war" 5, "disease" 3}
((adjust-for-rimes
(frp/make-word ["magic" "M" "AE" "JH" "IH" "K"])
words-map)
{"tragic" 3
"trick" 2
"foo" 1})
;; => {"tragic" 24, "trick" 8, "foo" 1}
)
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
(defonce markover (make-markov-picker lovecraft-markov))
(defn markov-key [key-fn]
(fn [text]
(key-fn text)))
(defn gen-from [m p initial]
(loop [r (list initial)]
(cond
(p r) (recur (cons (m (list (first r))) r))
:else r)))
(defn rhyming-words
"List of rhyming words sorted by quality of rhyme."
[target]
(let [target-phrase (->> target
(frp/phrase->word frp/words)
(#(assoc % :rimes? true)))]
(->> target-phrase
(#(assoc % :rimes? true))
(frp/prhyme frp/words)
(sort-by
#(- (count
(frp/consecutive-matching
%
target-phrase
:rimes)))))))
(defn markov-rhymes [markov-data rhyming-words]
(->> (map
(fn [word]
(->> word
:word
string/lower-case
(#(string/replace % #"\(\d+\)" ""))
(#(vector % (get markov-data (list %))))))
rhyming-words)
(into #{})
(remove
(fn [[w p]]
(nil? p)))))
(defn markov-gen [markov-data initial]
(let [m (make-markov-picker markov-data)]
(loop [r initial]
(if (> (count r) 5)
r
(recur (cons (m (list (first r)))
r))))))
(defn make-rhymes [markov-data target]
(let [target-word (frp/phrase->word frp/words target)
rhyming-words (rhyming-words target)
markov--rhymes (markov-rhymes markov-data rhyming-words)
rime-adjuster (adjust-for-rimes target-word words-map)
modified-markov-data
(merge
markov-data
(into {}
(map (fn [[word weights]]
[word (rime-adjuster weights)])
markov--rhymes)))]
(->> rhyming-words
(markov-rhymes modified-markov-data)
(map
(fn [[k v]]
(markov-gen modified-markov-data (list k))))
(map #(remove nil? %)))))
(defn adjust-for-over-syllables
"Adjust weights to prefer not going over the number
of syllables of the target word."
[target]
(fn [words]
(p :adjust-for-syllables
(map
(fn [word]
(if (or (nil? (:syllable-count word))
(nil? (:syllables target)))
(println word target))
(cond
(= (:syllable-count word) (count (:syllables target)))
(as-> word word
(assoc word :weight (* 3 (:weight word)))
(assoc word :adjusted-for-syllables-factor 3))
(< (:syllable-count word) (count (:syllables target)))
(as-> word word
(assoc word :weight (* 2 (:weight word)))
(assoc word :adjusted-for-syllables-factor 2))
:else
(as-> word word
(assoc word :weight (* 1 (:weight word)))
(assoc word :adjusted-for-syllables-factor 1))))
words))))
(comment
(let [words (->> ["distort" "kiss" "sport"]
(map #(frp/phrase->word frp/words %))
(map #(assoc % :weight 1)))
target (->> "report"
(frp/phrase->word frp/words)
(#(assoc % :syllables (:syllables %))))
adjuster (adjust-for-over-syllables target)]
(adjuster words)))
(defn adjust-for-rhymes
"Adjust weights to prefer words that rhyme"
[target]
(fn [words]
(p :adjust-for-rhymes
(map
(fn [word]
(let [factor (max 0.001 (count (frp/consecutive-matching word target :rimes)))]
(as-> word word
(assoc word :weight (* factor (:weight word)))
(assoc word :adjust-for-rhyme-factor factor))))
words))))
(defn adjust-for-rhymes-1
"Adjust weights to prefer words that rhyme"
[target percent]
(fn [words]
(let [ratio (/ percent (- 1 percent))
[rhymes non-rhymes]
((juxt filter remove)
(fn [word]
(< 0 (count (frp/consecutive-matching word target :rimes))))
words)
weight-non-rhymes (apply + (map :weight non-rhymes))
target-weight-rhymes (* ratio weight-non-rhymes)
count-rhymes (max 1 (count rhymes))
adjustment-rhyme (/ target-weight-rhymes count-rhymes)]
(concat
non-rhymes
(map
(fn [rhyme]
(as-> rhyme rhyme
(assoc rhyme :weight (* adjustment-rhyme (:weight rhyme)))
(assoc rhyme :adjust-for-rhyme-factor adjustment-rhyme)))
rhymes)))))
(defn adjust-for-membership-1
[set_ percent]
(let [ratio (- 1 percent)]
(fn [words]
(let [[members non-members]
((juxt filter remove)
#(set_ (:norm-word %))
words)
weight-non-members (apply + (map :weight non-members))
target-weight-members (* ratio weight-non-members)
count-members (count members)
adjustment-members (/ target-weight-members count-members)]
(concat
(map
(fn [member]
(as-> member member
(assoc member :weight (* adjustment-members (:weight member)))
(assoc member :adjustment-for-membership adjustment-members)))
members)
non-members)))))
(comment
(let [words (->> ["distort" "kiss" "sport"]
(map #(frp/phrase->word frp/words %))
(map #(assoc % :weight 1)))
target (->> "report"
(frp/phrase->word frp/words)
(#(assoc % :remaining-syllables (:syllables %))))
rhyme-adjuster (adjust-for-rhymes target)
syllable-count-adjuster (adjust-for-over-syllables target)]
(syllable-count-adjuster (rhyme-adjuster words))))
(defn adjust-for-membership [set_]
(fn [words]
(map
(fn [word]
(if (set_ (:norm-word word))
(as-> word word
(assoc word :weight (* 2 (:weight word)))
(assoc word :adjust-for-membership-factor 2))
(assoc word :adjust-for-membership-factor 1)))
words)))
(defn filter-for-membership [set_]
(fn [words]
(map
(fn [word]
(if-not (set_ (:norm-word word))
(as-> word word
(assoc word :weight (* 0.01 (:weight word)))
(assoc word :filter-for-membership-factor 0.01))
word))
words)))
(defn adjust-for-markov [markov-options]
(let [markov-set (into #{} (map first (keys markov-options)))]
(fn [words]
(let [result (map
(fn [word]
(if (markov-set (:norm-word word))
(as-> word word
(assoc word :weight (* 100 (:weight word)))
(assoc word :adjust-for-markov-factor 100))
(assoc word :adjust-for-markov-factor 1)))
words)]
result))))
(comment
(let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))]
(take 5 (markov-adjuster frp/words))))
(defn adjust-for-membership-1
[set_ percent]
(let [ratio (- 1 percent)]
(fn [words]
(let [[members non-members]
((juxt filter remove)
#(set_ (:norm-word %))
words)
weight-non-members (apply + (map :weight non-members))
target-weight-members (* ratio weight-non-members)
count-members (count members)
adjustment-members (/ target-weight-members count-members)]
(concat
(map
(fn [member]
(as-> member member
(assoc member :weight (* adjustment-members (:weight member)))
(assoc member :adjustment-for-membership adjustment-members)))
members)
non-members)))))
(defn adjust-for-markov-1
[markov-options percent]
(let [ratio (- 1 percent)]
(fn [words]
(if (nil? markov-options)
words
(let [[markovs non-markovs]
((juxt filter remove)
#(markov-options (:norm-word %))
words)
weight-non-markovs (apply + (map :weight non-markovs))
target-weight-markovs (* ratio weight-non-markovs)
count-markovs (count markovs)
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
(concat
(map
(fn [markov]
(as-> markov markov
(assoc markov :weight (* adjustment-markovs (:weight markov)))
(assoc markov :adjustment-for-markov adjustment-markovs)))
markovs)
non-markovs))))))
(defn e-prhyme
"2020-10-21 iteration"
[words markov target stop?]
(let [target (assoc target :original-syllables (:syllables target))
words (map #(assoc % :weight 1) words)]
(loop [target target
result '()
sentinel 0]
(if (or (stop? target result)
(> sentinel 5))
result
(let [markov-options (markov (list (:norm-word (first result))))
markov-adjuster (adjust-for-markov-1 markov-options 0.9)
syllable-count-adjuster (adjust-for-over-syllables target)
rhyme-adjuster (adjust-for-rhymes-1 target 0.9)
lovecraft-set (into #{} (map (comp first first) lovecraft-markov))
lovecraft-filter (adjust-for-membership-1 lovecraft-set 0.9)
adjust (comp rhyme-adjuster
syllable-count-adjuster
markov-adjuster
lovecraft-filter)
weighted-words (p :adjust
(->> (adjust words)
(remove #(= 0 (:weight %)))))
rng (p :from-weights (wr/from-weights (map :weight weighted-words)))
index (p :nextr (wr/nextr rng nil))
selection (nth weighted-words index)
new-target (->> target
(#(assoc % :syllables (drop-last
(:syllable-count
selection)
(:syllables
target))))
(#(assoc % :rimes (prhyme/rimes (:syllables %))))
(#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
(#(assoc % :nuclei (prhyme/nucleus (:syllables %)))))
result (cons selection result)]
(recur new-target result (inc sentinel)))))))
(def words (map #(assoc % :weight 1) frp/words))
(defn main [poem-lines]
(map
(fn [line]
(let [orig-target (frp/phrase->word frp/words line)]
(e-prhyme
frp/popular
lovecraft-markov
orig-target
(fn [target result]
(<= (count (:syllables orig-target))
(apply + (map :syllable-count result)))))))
poem-lines))
(defn rhymer [words markov target stop]
(cons (e-prhyme
words
markov
target
stop)
(lazy-seq (rhymer words markov target stop))))
(defn stop [target]
(fn [inner-target result]
(<= (count (:syllables target))
(apply + (map :syllable-count result)))))
(defn sentence-stop [target]
(fn [inner-target result]
(let [result-sentence (string/join " " (map :norm-word result))]
(when-not (empty? result)
(or (nlp/valid-sentence? result-sentence)
(< (:syllable-count target)
(apply + (map :syllable-count result)))
(< 5 (count result)))))))
(comment
(let [phrase (frp/phrase->word frp/words "i solemnly swear i am up to no good")
r (rhymer
frp/popular
lovecraft-markov
phrase
(sentence-stop phrase))]
(take 5 (map #(string/join " " (map :norm-word %))
(filter #(nlp/valid-sentence? (string/join " " (map :norm-word %))) r))))
(let [poem-lines ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"
"give him two lips"
"like roses in clover"
"please tell me that"
"these lonesome nights are over"]]
(map (fn [line] (map :norm-word line)) (main poem-lines)))
(let [orig-target (frp/phrase->word frp/words "mister sandman give me a dream")]
(repeatedly
10
(fn []
(e-prhyme
frp/popular
lovecraft-markov
orig-target
(fn [target result]
(<= (count (:syllables orig-target))
(apply + (map :syllable-count result)))))))))
(comment
(frp/phrase->word frp/words "distort bad man")
(repeatedly 10 #(make-rhymes lovecraft-markov "bad man"))
(rhyming-words "magic beam")
((make-markov-picker lovecraft-markov) '("no"))
(markov-gen lovecraft-markov '("world"))
(interleave
(->> "your eyes"
(make-rhymes lovecraft-markov)
(map
(fn [[k v]]
(markov-gen lovecraft-markov (list k)))))
(->> "pretty"
(make-rhymes lovecraft-markov)
(map
(fn [[k v]]
(markov-gen lovecraft-markov (list k))))
(remove nil?)))
(frp/phrase->word frp/words "well-off")
(frp/prhyme frp/words (assoc (words-map "well") :rimes? true))
)
(defn ghost
"Rhyme a phrase with markov"
[words word]
(let [rhymes (frp/prhyme words word)
norm-rhyme-words (->> rhymes
(map :word)
(map string/lower-case)
(map #(string/replace % #"\(\d+\)" ""))
(into #{})
(filter #(get lovecraft-markov (list %))))
keyer (markov-key #(list (first (string/split % #"\s"))))]
(->> norm-rhyme-words
(map (fn [w]
(gen-from markover #(< (count %) 5) w))))))
(comment
(take 10 lovecraft-markov)
(ghost frp/words (assoc (frp/make-word ["dream" "D" "R" "IY" "M"])
:rimes?
true)))
(comment
(->> (frp/make-word ["dream" "D" "R" "IY" "M"])
(#(assoc % :rimes? true))
(frp/prhyme frp/words)
(take 10))
(->> (main)
(#(spit "lovecraft.edn" (pr-str %))))
(let [t (read-string (slurp "lovecraft.edn"))]
(take 20 t))
)
(comment
(->> (tokens-from-file "lovecraft.txt")
(reverse)
(normalize-tokens)
((window 2))
(markov)
(take 10)
(into {})
(#(get % '("away")))
(choose-from-markov-possibilities))
(markov [["boy" "good"] ["the" "over"]
["ran" "he"] ["walked" "he"]
["walked" "he"] ["walked" "she"]])
(tokens-from-file "lovecraft.txt")
(scrape)
(def test-links (take 3 (links)))
(->> (text-from-link (first test-links))
(cleanup))
(->> (text-from-link (first test-links))
(append-to-file "test.txt" "hi"))
(take 3 (html/select (fetch-url (first test-links)) [:body]))
)
Loading…
Cancel
Save