WIP

6 years ago · 1cb959010c
parent 0998df73b6
commit 1cb959010c
8 changed files with 113 additions and 907 deletions
--- a/src/com/owoga/prhyme/core.clj
+++ b/src/com/owoga/prhyme/core.clj
@ -1,31 +1,10 @@
 (ns com.owoga.prhyme.core
-  (:require [clojure.java.io :as io]
-            [clojure.string :as string]
+  (:require [clojure.string :as string]
            [clojure.set :as set]
            [com.owoga.prhyme.util :as u]
-            [com.owoga.prhyme.syllabify :as s]))
-
-(def dictionary
-  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
-
-(def words (map u/prepare-word dictionary))
-
-(def popular
-  (set (line-seq (io/reader (io/resource "popular.txt")))))
-
-(def adverbs
-  (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
-
-(def adjectives
-  (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
-
-(def verbs
-  (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
-
-(def nouns
-  (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
-
-(defrecord Word [word syllables syllable-count rimes onsets nuclei])
+            [com.owoga.prhyme.syllabify :as s]
+            [com.owoga.prhyme.data.dictionary :as dict]
+            [com.owoga.prhyme.data.phonetics :as phonetics]))

 ;;; Typical rhyme model (explanation of following 3 functions)
 ;;
@ -48,51 +27,74 @@
 (defn rimes [syllables]
  (->> syllables
       (map reverse)
-       (map #(first (u/take-through u/vowel %)))
+       (map #(first (u/take-through phonetics/vowel %)))
       (map reverse)))

 (defn onset+nucleus [syllables]
  (->> syllables
-       (map #(first (u/take-through u/vowel %)))))
+       (map #(first (u/take-through phonetics/vowel %)))))

 (defn nucleus [syllables]
-  (map #(list (last (first (u/take-through u/vowel %)))) syllables))
-
-(defn make-word [word]
-  (let [syllables (s/syllabify (rest word))
-        rimes     (rimes syllables)
-        onsets    (onset+nucleus syllables)
-        nuclei    (nucleus syllables)]
-    (->> (->Word
-          (first word)
-          syllables
-          (count syllables)
-          rimes
-          onsets
-          nuclei)
-         (#(assoc % :norm-word (string/lower-case
-                                (string/replace
-                                 (:word %)
-                                 #"\(\d+\)"
-                                 "")))))))
-
-(defn make-word-1 [word phonemes]
+  (map #(list (last (first (u/take-through phonetics/vowel %)))) syllables))
+
+(defn merge-phrase-words
+  "Given multiple `Word`, like the words for 'well off', create a single `Word`
+  that is syllabified as ('well' 'off') rather than as the combined ('weh'
+  'loff'). Useful for finding single-word rhymes of multiple-word targets.
+
+  An example: 'war on crime' -> 'turpentine'.
+  As opposed to: 'war on crime' -> 'caw fawn lime'."
+  [phrase phrase-words]
+  (loop [merged (first phrase-words)
+         phrase-words (rest phrase-words)]
+    (cond
+      (and (empty? phrase-words) (empty? merged)) nil
+      (empty? phrase-words) (assoc merged :word phrase)
+      :else (recur (-> merged
+                       (assoc :syllables (concat (:syllables merged)
+                                                 (:syllables (first phrase-words))))
+                       (assoc :syllable-count (+ (:syllable-count merged)
+                                                 (:syllable-count (first phrase-words))))
+                       (assoc :rimes (concat (:rimes merged)
+                                             (:rimes (first phrase-words))))
+                       (assoc :onsets (concat (:onsets merged)
+                                              (:onsets (first phrase-words))))
+                       (assoc :nuclei (concat (:nuclei merged)
+                                              (:nuclei (first phrase-words)))))
+                   (rest phrase-words)))))
+
+(defn cmu->prhyme [[word & phonemes]]
  (let [syllables (s/syllabify phonemes)
-        rimes     (rimes syllables)
-        onsets    (onset+nucleus syllables)
-        nuclei    (nucleus syllables)]
-    (->> (->Word
-          (string/lower-case word)
-          syllables
-          (count syllables)
-          rimes
-          onsets
-          nuclei)
-         ;; CMU dict has multiple pronounciations for some words.
-         ;; foobar(1), foobar(2), etc...
-         ;; it's useful to have the normalized word for situations
-         ;; when you don't care how it's pronounced.
-         (#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" ""))))))
+        rimes (rimes syllables)
+        onsets (onset+nucleus syllables)
+        nuclei (nucleus syllables)]
+    {:word word
+     :syllables syllables
+     :syllable-count (count syllables)
+     :rimes rimes
+     :onsets onsets
+     :nuclei nuclei
+     :weight 1
+     :normalized-word (-> word
+                          string/lower-case
+                          (string/replace #"\(\d+\)" ""))}))
+
+(defn phrase->word
+  "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
+  that has the correct syllables, rimes, onsets, and nucleus. This way we can
+  rhyme against phrases that aren't in the dictionary, as long as the words that
+  make up the phrase are in the dictionary. Returns nil if the word is not in
+  the dictionary."
+  [words phrase]
+  (->> (string/split phrase #"[ -]")
+       (map (fn [phrase-word]
+              (let [word (first (filter (fn [word]
+                                          (= phrase-word (:norm-word word)))
+                                        words))]
+                (if (nil? word)
+                  (cmu->prhyme (cons phrase-word (u/get-phones phrase-word)))
+                  word))))
+       (merge-phrase-words phrase)))

 (defn words-by-rime* [words]
  (let [words-with-rime (->> words
@ -101,7 +103,7 @@
                             (map #(map reverse %))
                             (map #(map
                                    (fn [syllable]
-                                      (first (u/take-through u/vowel syllable))) %))
+                                      (first (u/take-through phonetics/vowel syllable))) %))
                             (map #(map reverse %))
                             (map reverse)
                             (map #(cons %1 %2) (map first words)))]
@ -118,7 +120,7 @@
                                 (cons val (:words existing)))
                       (rest words)))))))

-(def words-by-rime (words-by-rime* words))
+(def words-by-rime (words-by-rime* dict/cmu-dict))

 (defn words-by-onset-nucleus* [words]
  (let [words-with-onset-nucleus (->> words
@ -126,7 +128,7 @@
                                      (map s/syllabify)
                                      (map #(map
                                             (fn [syllable]
-                                               (first (u/take-through u/vowel syllable)))
+                                               (first (u/take-through phonetics/vowel syllable)))
                                             %))
                                      (map #(cons %1 %2) (map first words)))]
    (loop [by-onset {}
@ -152,7 +154,7 @@
                                       (fn [syllable]
                                         (list
                                          (last
-                                           (first (u/take-through u/vowel syllable)))))
+                                           (first (u/take-through phonetics/vowel syllable)))))
                                       %))
                                (map #(cons %1 %2) (map first words)))]
    (loop [by-nucleus {}
@ -198,7 +200,6 @@
 (defn filter-to-syllable-count [n words]
  (filter (fn [word] (= n (count (s/syllabify (rest word))))) words))

-
 (defn rhymes?
  "What does it mean for something to rhyme?"
  [a b]
--- a/src/com/owoga/prhyme/frp.clj
+++ b/src/com/owoga/prhyme/frp.clj
@ -2,6 +2,7 @@
  (:require [clojure.java.io :as io]
            [clojure.string :as string]
            [clojure.set :as set]
+            [com.owoga.prhyme.data.thesaurus :refer [thesaurus]]
            [com.owoga.prhyme.core :as p]
            [com.owoga.prhyme.util :as u]
            [com.owoga.prhyme.syllabify :as s]))
@ -9,35 +10,11 @@
 (def dictionary
  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))

-(def thesaurus
-  (->> (line-seq (io/reader (io/resource "mthesaur.txt")))
-       (map #(string/split % #","))
-       (map #(vector (first %) (rest %)))
-       (into {})))
-
-(defrecord Word [word syllables syllable-count rimes onsets nuclei])
-
-(defn make-word [word]
-  (let [syllables (s/syllabify (rest word))
-        rimes     (p/rimes syllables)
-        onsets    (p/onset+nucleus syllables)
-        nuclei    (p/nucleus syllables)]
-    (->> (->Word
-          (first word)
-          syllables
-          (count syllables)
-          rimes
-          onsets
-          nuclei)
-         (#(assoc % :norm-word (string/lower-case
-                                (string/replace
-                                 (:word %)
-                                 #"\(\d+\)"
-                                 "")))))))
+

 (def words (->> dictionary
                (map u/prepare-word)
-                (map make-word)))
+                (map p/cmu->prhyme)))

 (def popular-dict
  (set (line-seq (io/reader (io/resource "popular.txt")))))
@ -69,20 +46,6 @@
                                              (:nuclei (first phrase-words)))))
                   (rest phrase-words)))))

-(defn phrase->word
-  "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
-  that has the correct syllables, rimes, onsets, and nucleus. This way we can
-  rhyme against phrases that aren't in the dictionary, as long as the words that
-  make up the phrase are in the dictionary. Returns nil if the word is not in
-  the dictionary."
-  [words phrase]
-  (->> (string/split phrase #"[ -]")
-       (map (fn [phrase-word]
-              (first (filter (fn [word]
-                               (= phrase-word (string/lower-case (:norm-word word))))
-                             words))))
-       (merge-phrase-words phrase)))
-
 (defn partition-word [word]
  (->> word
       (:syllables)
@ -155,20 +118,18 @@
    (filter (fn [word] (some #(re-matches (re-pattern (str "(?i)" %)) (:word word)) synonyms))
            words)))

-(defn phrymo [dictionary phrase]
-  (phrase->word dictionary phrase))
-
 (comment
-  (->> (make-word ["foobar" "F" "UW" "B" "AA" "R"])
+  (->> (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"])
       (#(assoc % :rimes? true))
       (prhyme words)
       (filter #(= (:syllable-count %) 2))
-       (sort-by #(consecutive-matching
-                  %
-                  (make-word ["foobar" "F" "UW" "B" "AA" "R"])
-                  :rimes)))
+       (sort-by #(count
+                  (consecutive-matching
+                   %
+                   (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"])
+                   :rimes))))

-  (as-> (make-word ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word
+  (as-> (p/cmu->prhyme ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word
    (into word {:rimes? true})
    (prhyme popular word)
    (mapcat #(matching-synonyms thesaurus % word)
@ -177,7 +138,7 @@
             "distress" "corpse" "necrotic" "zombie"
             "coma" "monster"]))

-  (as-> (make-word ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word
+  (as-> (p/cmu->prhyme ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word
    (into word {:rimes? true})
    (prhyme popular word)
    (mapcat #(matching-synonyms thesaurus % word)
--- a/src/com/owoga/prhyme/gen.clj
+++ b/src/com/owoga/prhyme/gen.clj
@ -36,23 +36,6 @@
                                              (:nuclei (first phrase-words)))))
                   (rest phrase-words)))))

-(defn phrase->word
-  "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
-  that has the correct syllables, rimes, onsets, and nucleus. This way we can
-  rhyme against phrases that aren't in the dictionary, as long as the words that
-  make up the phrase are in the dictionary. Returns nil if the word is not in
-  the dictionary."
-  [words phrase]
-  (->> (string/split phrase #"[ -]")
-       (map (fn [phrase-word]
-              (let [word (first (filter (fn [word]
-                                          (= phrase-word (:norm-word word)))
-                                        words))]
-                (if (nil? word)
-                  (frp/make-word (cons phrase-word (util/get-phones phrase-word)))
-                  word))))
-       (merge-phrase-words phrase)))
-
 (defn adjust-for-markov
  [markov percent]
  (let [target-markov-n (count (first (first markov)))]
@ -121,31 +104,6 @@
             target
             result]))))))

-(comment
-  (let [markov-1-example
-        {'("dream") {"a" 1}
-         '("a") {"me" 1}}
-        markov-2-example
-        {'(nil nil) {"dream" 1}
-         '(nil "dream") {"a" 1}
-         '("dream" "a") {"me" 1}
-         '("a" "me") {"give" 1}
-         '("give" nil) {nil 1}}
-        result-a '()
-        result-b '({:norm-word "dream",
-                    :weight 9.000000000000002,
-                    :adjustment-for-markov 9.000000000000002})
-        words [{:norm-word "dream" :weight 1}
-               {:norm-word "foo" :weight 1}
-               {:norm-word "a" :weight 1}
-               {:norm-word "me" :weight 1}
-               {:norm-word "give" :weight 1}]
-        adj (adjust-for-markov-with-boundaries markov-2-example 0.9)]
-    (adj [words 'target result-b]))
-
-  ((adjust-for-markov-with-boundaries {'("foo" "bar") {}} 0.5)
-   ['() '() '("hi" "bye" "there")]))
-
 (defn adjust-for-rimes
  [dictionary percent]
  (fn [[words target result]]
--- a/src/com/owoga/prhyme/grammar.clj
+++ b/src/com/owoga/prhyme/grammar.clj
@ -1,38 +0,0 @@
-(ns com.owoga.prhyme.grammar)
-
-(def root-states
-  [{::tk/name :failed
-    ::tk/transitions [{::tk/on tk/_ ::tk/to :failed}]}
-   {::tk/name :object
-    ::tk/transitions [{::tk/on :adjectives ::tk/to :obj-adj}
-                      {::tk/on :nouns ::tk/to :obj-noun}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
-   {::tk/name :obj-adj
-    ::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
-   {::tk/name :obj-noun
-    ::tk/transitions [{::tk/on :verbs ::tk/to :verbs}
-                      {::tk/on :adverbs ::tk/to :adverbs}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
-   {::tk/name :verbs
-    ::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun}
-                      {::tk/on :adjectives ::tk/to :subj-adj}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
-   {::tk/name :adverbs
-    ::tk/transitions [{::tk/on :verbs ::tk/to :verbs}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
-   {::tk/name :subj-noun
-    ::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun}
-                      {::tk/on :adjectives ::tk/to :obj-adj}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
-   {::tk/name :subj-adj
-    ::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun}
-                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}])
-
-(def root-fsm
-  {::tk/states root-states
-   ::tk/action! (fn [{::tk/keys [signal action] :as fsm}]
-                  (case signal
-                    :failed (println "Failed! " signal " " action))
-                  fsm)
-   ::tk/state :object})
--- a/src/com/owoga/prhyme/lymeric.clj
+++ b/src/com/owoga/prhyme/lymeric.clj
@ -2,7 +2,9 @@
  (:require [com.owoga.prhyme.gen :as gen]
            [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
            [clojure.string :as string]
+            [com.owoga.prhyme.core :as prhyme]
            [com.owoga.prhyme.frp :as frp]
+            [com.owoga.prhyme.util :as util]
            [taoensso.nippy :as nippy]
            [clojure.java.io :as io]))

@ -21,15 +23,12 @@
              banned-words (into #{} (->> result
                                          (map #(string/split % #" "))
                                          (map #(last %))))
-              adj (apply
-                   comp
-                   (remove
-                    nil?
-                    [(weighted-selection/adjust-for-markov
-                      markov
-                      0.99)
-                     (when (rhymes pattern)
-                       (weighted-selection/adjust-for-tail-rhyme 0.99))]))
+              adj (util/comp-rnil
+                   (weighted-selection/adjust-for-markov
+                    markov
+                    0.99)
+                   (when (rhymes pattern)
+                     (weighted-selection/adjust-for-tail-rhyme 0.99)))
              rhyme (if (nil? (get rhymes pattern))
                      (gen/gen-sentence-with-syllable-count
                       adj
@ -40,7 +39,7 @@
                       syllable-count
                       (remove #(banned-words (:norm-word %))
                               base-words)
-                       (frp/phrase->word frp/words (get rhymes pattern))))]
+                       (prhyme/phrase->word frp/words (get rhymes pattern))))]
          (recur (rest scheme)
                 (assoc rhymes pattern rhyme)
                 (conj result rhyme)))))))
@ -55,7 +54,7 @@
  (->> (gen/selection-seq
        (map #(assoc % :weight 1) frp/words)
        (weighted-selection/adjust-for-rhymes 0.99)
-        (frp/phrase->word frp/words "hi there my boy"))
+        (prhyme/phrase->word frp/words "hi there my boy"))
       (take 3))

  ["bishop larch smitten us dwell"
--- a/src/com/owoga/prhyme/syllabify.clj
+++ b/src/com/owoga/prhyme/syllabify.clj
@ -1,5 +1,6 @@
 (ns com.owoga.prhyme.syllabify
-  (:require [com.owoga.prhyme.util :as p]))
+  (:require [com.owoga.prhyme.data.phonetics :as phonetics]
+            [com.owoga.prhyme.util :as util]))
 ;; ER is not yet handled properly.
 ;; PARENTHESES is syllabified as ("P" "ER" "IH" "N") ("TH" "UH") ("S" "IY" "S")
 ;; Glides are also broken. "R OY AH L" gets syllabified as a single syllable.
@ -35,16 +36,16 @@
 (def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})

 (defn sonority [phone]
-  (.indexOf sonority-hierarchy (p/phonemap phone)))
+  (.indexOf sonority-hierarchy (phonetics/phonemap phone)))

 (defn vowel? [phone]
-  (p/vowel phone))
+  (phonetics/vowel phone))

 (defn >sonorous [a b]
  (> (sonority a) (sonority b)))

 (defn slurp-rime [phones]
-  (let [splits (p/take-through vowel? phones)]
+  (let [splits (util/take-through vowel? phones)]
    [(first splits) (flatten (rest splits))]))

 (defn slurp-onset [phones]
--- a/src/com/owoga/prhyme/util.clj
+++ b/src/com/owoga/prhyme/util.clj
@ -1,41 +1,14 @@
 (ns com.owoga.prhyme.util
  (:require [clojure.java.io :as io]
            [clojure.string :as string]
-            [clojure.set :as set]
-            [clojure.zip :as z])
-  (:import (com.sun.speech.freetts.lexicon LetterToSoundImpl)
-           (com.sun.speech.freetts.en.us CMULexicon)
-           (java.io File)))
+            [clojure.set :as set])
+  (:import (com.sun.speech.freetts.en.us CMULexicon)))

 (defn prepare-word
  "Splits whitespace-separated fields into a sequence."
  [line]
  (string/split line #"[\t ]"))

-(def dictionary
-  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
-
-(def words (map prepare-word dictionary))
-
-(def words-map
-  (into {} (map #(vector (string/lower-case (first %)) {:phonemes (rest %)}) words)))
-
-(def popular
-  (set (line-seq (io/reader (io/resource "popular.txt")))))
-
-(def adverbs
-  (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
-
-(def adjectives
-  (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
-
-(def verbs
-  (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
-
-(def nouns
-  (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
-
-
 (CMULexicon. "cmulex" true)

 (def cmu-lexicon (CMULexicon/getInstance true))
@ -54,24 +27,6 @@
       (map convert-to-sphinx)
       (map string/upper-case)))

-(def phonemap
-  (->> (io/reader (io/resource "cmudict-0.7b.phones"))
-       (line-seq)
-       (map #(string/split % #"\t"))
-       (into {})))
-
-(def long-vowel #{"EY" "IY" "AY" "OW" "UW"})
-
-(def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"})
-
-(def vowel (set/union long-vowel short-vowel))
-
-(def consonant (set/difference (into #{} (keys phonemap)) vowel))
-
-(def syllable-end (set/union consonant long-vowel))
-
-(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
-
 (defn window [n]
  (fn [coll]
    (cond
@ -80,6 +35,22 @@
      :else (cons (take n coll)
                  (lazy-seq ((window n) (rest coll)))))))

+(defn comp-rnil
+  "Compose functions, ignoring nil values."
+  [& functions]
+  (apply comp (remove nil? functions)))
+
+(defn reduce-while
+  [pred f val coll]
+  (loop [val val
+         coll coll]
+    (cond
+      (empty? coll) val
+      (pred val)
+      (let [new-val (f val (first coll))]
+        (recur new-val (rest coll)))
+      :else val)))
+
 (defn extend-coll [coll val n]
  (concat (repeat n val)
          coll
@ -96,12 +67,6 @@
 (defn clean-text [text]
  (string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))

-(defn english? [text]
-  (let [words (string/split text #"\s+")
-        english-words
-        (->> words (filter #(words-map (string/lower-case %))))]
-    (< 0.7 (/ (count english-words) (max 1 (count words))))))
-
 (defn padr [val n coll]
  (concat coll (repeat n val)))

@ -160,8 +125,6 @@
 (defn count-pred [pred coll]
  (count (filter pred coll)))

-(def count-vowels (partial count-pred vowel))
-
 (defn single? [coll] (= 1 (count coll)))

 (defn partitions
--- a/src/com/owoga/prhyme/util/lovecraft.clj
+++ b/src/com/owoga/prhyme/util/lovecraft.clj
@ -1,639 +0,0 @@
-(ns com.owoga.prhyme.util.lovecraft
-  (:require [net.cgrand.enlive-html :as html]
-            [clojure.string :as string]
-            [com.owoga.prhyme.util.weighted-rand :as wr]
-            [com.owoga.prhyme.core :as prhyme]
-            [com.owoga.prhyme.util.nlp :as nlp]
-            [taoensso.tufte :as tufte :refer [defnp p profiled profile]]
-            [com.owoga.prhyme.frp :as frp]
-            [clojure.java.io :as io]
-            [clojure.set :as set]))
-
-(tufte/add-basic-println-handler! {})
-
-(def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/")
-
-(def words-map
-  (into {} (map #(vector (string/lower-case (:word %)) %) frp/words)))
-
-(defn fetch-url [url]
-  (html/html-resource (java.net.URL. url)))
-
-(comment
-  (fetch-url *base-url*))
-
-(defn links []
-  (map
-   #(str *base-url* (first (html/attr-values % :href)))
-   (html/select
-    (fetch-url *base-url*)
-    [:li :> [:a (html/attr? :href)]])))
-
-(defn contentful-sections [nodes]
-  (->> nodes
-       (map html/text)
-       (filter #(> (count %) 100))))
-
-(defn text-from-link [link]
-  (->> (html/select
-        (fetch-url link)
-        [:body])
-       (first)
-       (html/text)
-       ((fn [s] (string/replace s #"[\s\u00A0]+" " ")))))
-
-(defn cleanup [content]
-  (-> content
-      (string/replace #"Return to.*$" "")
-      (string/replace #"Home.*?This Site" "")
-      (string/replace #"[^a-zA-Z -]+" "")))
-
-(defn tokens [content]
-  (string/split content #"\s+"))
-
-(defn append-to-file [filepath text]
-  (with-open [w (io/writer filepath :append true)]
-    (.write w text)))
-
-(defn scrape []
-  (run!
-   (fn [link]
-     (->> (text-from-link link)
-          (cleanup)
-          (#(str % "\n"))
-          (append-to-file "lovecraft.txt")))
-   (take 10 (links))))
-
-(defn tokens-from-file [file]
-  (with-open [r (io/reader file)]
-    (tokens (slurp r))))
-
-(defn window [n]
-  (fn [coll]
-    (cond
-      (empty? coll) []
-      (< (count coll) n) []
-      :else (cons (take n coll)
-                  (lazy-seq ((window n) (drop n coll)))))))
-
-(defnp markov [tokens]
-  (->> tokens
-       (map
-        (fn [token]
-          (let [k (butlast token)
-                v (last token)]
-            [k v])))
-       (reduce
-        (fn [a [k v]]
-          (update-in a [k v] (fnil inc 0)))
-        {})))
-
-(defnp running-total
-  ([coll]
-   (running-total coll 0))
-  ([coll last-val]
-   (cond
-     (empty? coll) nil
-     :else (cons (+ last-val (first coll))
-                 (lazy-seq
-                  (running-total
-                   (rest coll)
-                   (+ last-val (first coll))))))))
-
-(defnp weighted-rand [weights]
-  (let [running-weights (running-total weights)
-        rand-val (rand (last running-weights))]
-    (loop [i 0]
-      (if (> (nth running-weights i) rand-val)
-        i
-        (recur (inc i))))))
-
-(defnp choose-from-markov-possibilities [possibilities]
-  (if (empty? possibilities)
-    nil
-    (let [weights (vals possibilities)
-          rng (wr/from-weights weights)
-          index (wr/nextr rng nil)]
-      (nth (keys possibilities) index))))
-
-(def word-set (into #{} (->> prhyme/words
-                             (map first)
-                             (map string/lower-case)
-                             (map #(string/replace % #"\(\d+\)" "")))))
-
-(defn normalize-tokens [tokens]
-  (->> tokens
-       (map string/lower-case)
-       (filter word-set)))
-
-(defn main []
-  (->> (tokens-from-file "lovecraft.txt")
-       (reverse)
-       (normalize-tokens)
-       ((window 2))
-       (markov)
-       (into {})))
-
-(defn make-markov-picker [markov-data]
-  (fn [k]
-    (choose-from-markov-possibilities
-     (get markov-data k {}))))
-
-(defn synonym?
-  "Given a possibility, like [\"foo\" 3]
-  which says that foo follows a particular key with
-  a weight of 3, a word is a synonym of that possibility
-  if the word is a synonym ."
-  [p synonyms]
-  (synonyms p))
-
-(defnp adjust-for-synonyms
-  "If a word is in a set of synonyms, adjust its weight upwards."
-  [synonyms]
-  (fn [possibilities]
-    (reduce
-     (fn [p s]
-       (if (s p)
-         (update p s #(* 5 %))
-         p))
-     possibilities
-     synonyms)))
-
-(defnp adjust-for-rimes
-  [target-rime dictionary]
-  (fn [possibilities]
-    (into
-     {}
-     (map
-      (fn [[p v]]
-        (let [possibility (get dictionary p)
-              factor (count
-                      (frp/consecutive-matching
-                       target-rime
-                       possibility
-                       :rimes))]
-          [p (* v (max 1 (* factor 4)))]))
-      possibilities))))
-
-(comment
-  ((adjust-for-synonyms #{"war" "famine"})
-   {"war" 1
-    "disease" 3})
-  ;; => {"war" 5, "disease" 3}
-  ((adjust-for-rimes
-    (frp/make-word ["magic" "M" "AE" "JH" "IH" "K"])
-    words-map)
-   {"tragic" 3
-    "trick" 2
-    "foo" 1})
-  ;; => {"tragic" 24, "trick" 8, "foo" 1} 
-  )
-
-(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
-(defonce markover (make-markov-picker lovecraft-markov))
-
-(defn markov-key [key-fn]
-  (fn [text]
-    (key-fn text)))
-
-(defn gen-from [m p initial]
-  (loop [r (list initial)]
-    (cond
-      (p r) (recur (cons (m (list (first r))) r))
-      :else r)))
-
-
-(defn rhyming-words
-  "List of rhyming words sorted by quality of rhyme."
-  [target]
-  (let [target-phrase (->> target
-                           (frp/phrase->word frp/words)
-                           (#(assoc % :rimes? true)))]
-    (->> target-phrase
-         (#(assoc % :rimes? true))
-         (frp/prhyme frp/words)
-         (sort-by
-          #(- (count
-               (frp/consecutive-matching
-                %
-                target-phrase
-                :rimes)))))))
-
-(defn markov-rhymes [markov-data rhyming-words]
-  (->> (map
-        (fn [word]
-          (->> word
-               :word
-               string/lower-case
-               (#(string/replace % #"\(\d+\)" ""))
-               (#(vector % (get markov-data (list %))))))
-        rhyming-words)
-       (into #{})
-       (remove
-        (fn [[w p]]
-          (nil? p)))))
-
-(defn markov-gen [markov-data initial]
-  (let [m (make-markov-picker markov-data)]
-    (loop [r initial]
-      (if (> (count r) 5)
-        r
-        (recur (cons (m (list (first r)))
-                     r))))))
-
-(defn make-rhymes [markov-data target]
-  (let [target-word (frp/phrase->word frp/words target)
-        rhyming-words (rhyming-words target)
-        markov--rhymes (markov-rhymes markov-data rhyming-words)
-        rime-adjuster (adjust-for-rimes target-word words-map)
-        modified-markov-data
-        (merge
-         markov-data
-         (into {}
-               (map (fn [[word weights]]
-                      [word (rime-adjuster weights)])
-                    markov--rhymes)))]
-    (->> rhyming-words
-         (markov-rhymes modified-markov-data)
-         (map
-          (fn [[k v]]
-            (markov-gen modified-markov-data (list k))))
-         (map #(remove nil? %)))))
-
-(defn adjust-for-over-syllables
-  "Adjust weights to prefer not going over the number
-  of syllables of the target word."
-  [target]
-  (fn [words]
-    (p :adjust-for-syllables
-       (map
-        (fn [word]
-          (if (or (nil? (:syllable-count word))
-                  (nil? (:syllables target)))
-            (println word target))
-          (cond
-            (= (:syllable-count word) (count (:syllables target)))
-            (as-> word word
-              (assoc word :weight (* 3 (:weight word)))
-              (assoc word :adjusted-for-syllables-factor 3))
-
-            (< (:syllable-count word) (count (:syllables target)))
-            (as-> word word
-              (assoc word :weight (* 2 (:weight word)))
-              (assoc word :adjusted-for-syllables-factor 2))
-
-            :else
-            (as-> word word
-              (assoc word :weight (* 1 (:weight word)))
-              (assoc word :adjusted-for-syllables-factor 1))))
-        words))))
-
-(comment
-  (let [words (->> ["distort" "kiss" "sport"]
-                   (map #(frp/phrase->word frp/words %))
-                   (map #(assoc % :weight 1)))
-        target (->> "report"
-                    (frp/phrase->word frp/words)
-                    (#(assoc % :syllables (:syllables %))))
-        adjuster (adjust-for-over-syllables target)]
-    (adjuster words)))
-
-(defn adjust-for-rhymes
-  "Adjust weights to prefer words that rhyme"
-  [target]
-  (fn [words]
-    (p :adjust-for-rhymes
-       (map
-        (fn [word]
-          (let [factor (max 0.001 (count (frp/consecutive-matching word target :rimes)))]
-            (as-> word word
-              (assoc word :weight (* factor (:weight word)))
-              (assoc word :adjust-for-rhyme-factor factor))))
-        words))))
-
-(defn adjust-for-rhymes-1
-  "Adjust weights to prefer words that rhyme"
-  [target percent]
-  (fn [words]
-    (let [ratio (/ percent (- 1 percent))
-          [rhymes non-rhymes]
-          ((juxt filter remove)
-           (fn [word]
-             (< 0 (count (frp/consecutive-matching word target :rimes))))
-           words)
-          weight-non-rhymes (apply + (map :weight non-rhymes))
-          target-weight-rhymes (* ratio weight-non-rhymes)
-          count-rhymes (max 1 (count rhymes))
-          adjustment-rhyme (/ target-weight-rhymes count-rhymes)]
-      (concat
-       non-rhymes
-       (map
-        (fn [rhyme]
-          (as-> rhyme rhyme
-            (assoc rhyme :weight (* adjustment-rhyme (:weight rhyme)))
-            (assoc rhyme :adjust-for-rhyme-factor adjustment-rhyme)))
-        rhymes)))))
-
-(defn adjust-for-membership-1
-  [set_ percent]
-  (let [ratio (- 1 percent)]
-    (fn [words]
-      (let [[members non-members]
-            ((juxt filter remove)
-             #(set_ (:norm-word %))
-             words)
-            weight-non-members (apply + (map :weight non-members))
-            target-weight-members (* ratio weight-non-members)
-            count-members (count members)
-            adjustment-members (/ target-weight-members count-members)]
-        (concat
-         (map
-          (fn [member]
-            (as-> member member
-              (assoc member :weight (* adjustment-members (:weight member)))
-              (assoc member :adjustment-for-membership adjustment-members)))
-          members)
-         non-members)))))
-
-(comment
-  (let [words (->> ["distort" "kiss" "sport"]
-                   (map #(frp/phrase->word frp/words %))
-                   (map #(assoc % :weight 1)))
-        target (->> "report"
-                    (frp/phrase->word frp/words)
-                    (#(assoc % :remaining-syllables (:syllables %))))
-        rhyme-adjuster (adjust-for-rhymes target)
-        syllable-count-adjuster (adjust-for-over-syllables target)]
-    (syllable-count-adjuster (rhyme-adjuster words))))
-
-(defn adjust-for-membership [set_]
-  (fn [words]
-    (map
-     (fn [word]
-       (if (set_ (:norm-word word))
-         (as-> word word
-           (assoc word :weight (* 2 (:weight word)))
-           (assoc word :adjust-for-membership-factor 2))
-         (assoc word :adjust-for-membership-factor 1)))
-     words)))
-
-(defn filter-for-membership [set_]
-  (fn [words]
-    (map
-     (fn [word]
-       (if-not (set_ (:norm-word word))
-         (as-> word word
-           (assoc word :weight (* 0.01 (:weight word)))
-           (assoc word :filter-for-membership-factor 0.01))
-         word))
-     words)))
-
-(defn adjust-for-markov [markov-options]
-  (let [markov-set (into #{} (map first (keys markov-options)))]
-    (fn [words]
-      (let [result (map
-                    (fn [word]
-                      (if (markov-set (:norm-word word))
-                        (as-> word word
-                          (assoc word :weight (* 100 (:weight word)))
-                          (assoc word :adjust-for-markov-factor 100))
-                        (assoc word :adjust-for-markov-factor 1)))
-                    words)]
-        result))))
-
-(comment
-  (let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))]
-    (take 5 (markov-adjuster frp/words))))
-(defn adjust-for-membership-1
-  [set_ percent]
-  (let [ratio (- 1 percent)]
-    (fn [words]
-      (let [[members non-members]
-            ((juxt filter remove)
-             #(set_ (:norm-word %))
-             words)
-            weight-non-members (apply + (map :weight non-members))
-            target-weight-members (* ratio weight-non-members)
-            count-members (count members)
-            adjustment-members (/ target-weight-members count-members)]
-        (concat
-         (map
-          (fn [member]
-            (as-> member member
-              (assoc member :weight (* adjustment-members (:weight member)))
-              (assoc member :adjustment-for-membership adjustment-members)))
-          members)
-         non-members)))))
-
-(defn adjust-for-markov-1
-  [markov-options percent]
-  (let [ratio (- 1 percent)]
-    (fn [words]
-      (if (nil? markov-options)
-        words
-        (let [[markovs non-markovs]
-              ((juxt filter remove)
-               #(markov-options (:norm-word %))
-               words)
-              weight-non-markovs (apply + (map :weight non-markovs))
-              target-weight-markovs (* ratio weight-non-markovs)
-              count-markovs (count markovs)
-              adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
-          (concat
-           (map
-            (fn [markov]
-              (as-> markov markov
-                (assoc markov :weight (* adjustment-markovs (:weight markov)))
-                (assoc markov :adjustment-for-markov adjustment-markovs)))
-            markovs)
-           non-markovs))))))
-
-(defn e-prhyme
-  "2020-10-21 iteration"
-  [words markov target stop?]
-  (let [target (assoc target :original-syllables (:syllables target))
-        words (map #(assoc % :weight 1) words)]
-    (loop [target target
-           result '()
-           sentinel 0]
-      (if (or (stop? target result)
-              (> sentinel 5))
-        result
-        (let [markov-options (markov (list (:norm-word (first result))))
-              markov-adjuster (adjust-for-markov-1 markov-options 0.9)
-              syllable-count-adjuster (adjust-for-over-syllables target)
-              rhyme-adjuster (adjust-for-rhymes-1 target 0.9)
-              lovecraft-set (into #{} (map (comp first first) lovecraft-markov))
-              lovecraft-filter (adjust-for-membership-1 lovecraft-set 0.9)
-              adjust (comp rhyme-adjuster
-                           syllable-count-adjuster
-                           markov-adjuster
-                           lovecraft-filter)
-              weighted-words (p :adjust
-                                (->> (adjust words)
-                                     (remove #(= 0 (:weight %)))))
-              rng (p :from-weights (wr/from-weights (map :weight weighted-words)))
-              index (p :nextr (wr/nextr rng nil))
-              selection (nth weighted-words index)
-              new-target (->> target
-                              (#(assoc % :syllables (drop-last
-                                                     (:syllable-count
-                                                      selection)
-                                                     (:syllables
-                                                      target))))
-                              (#(assoc % :rimes (prhyme/rimes (:syllables %))))
-                              (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
-                              (#(assoc % :nuclei (prhyme/nucleus (:syllables %)))))
-              result (cons selection result)]
-          (recur new-target result (inc sentinel)))))))
-
-(def words (map #(assoc % :weight 1) frp/words))
-
-(defn main [poem-lines]
-  (map
-   (fn [line]
-     (let [orig-target (frp/phrase->word frp/words line)]
-       (e-prhyme
-        frp/popular
-        lovecraft-markov
-        orig-target
-        (fn [target result]
-          (<= (count (:syllables orig-target))
-              (apply + (map :syllable-count result)))))))
-   poem-lines))
-
-(defn rhymer [words markov target stop]
-  (cons (e-prhyme
-         words
-         markov
-         target
-         stop)
-        (lazy-seq (rhymer words markov target stop))))
-
-(defn stop [target]
-  (fn [inner-target result]
-    (<= (count (:syllables target))
-        (apply + (map :syllable-count result)))))
-
-(defn sentence-stop [target]
-  (fn [inner-target result]
-    (let [result-sentence (string/join " " (map :norm-word result))]
-      (when-not (empty? result)
-        (or (nlp/valid-sentence? result-sentence)
-            (< (:syllable-count target)
-               (apply + (map :syllable-count result)))
-            (< 5 (count result)))))))
-
-(comment
-  (let [phrase (frp/phrase->word frp/words "i solemnly swear i am up to no good")
-        r (rhymer
-           frp/popular
-           lovecraft-markov
-           phrase
-           (sentence-stop phrase))]
-    (take 5 (map #(string/join " " (map :norm-word %))
-                 (filter #(nlp/valid-sentence? (string/join " " (map :norm-word %))) r))))
-
-  (let [poem-lines ["mister sandman"
-                    "give me a dream"
-                    "make him the cutest"
-                    "that i've ever seen"
-                    "give him two lips"
-                    "like roses in clover"
-                    "please tell me that"
-                    "these lonesome nights are over"]]
-    (map (fn [line] (map :norm-word line)) (main poem-lines)))
-
-  (let [orig-target (frp/phrase->word frp/words "mister sandman give me a dream")]
-    (repeatedly
-     10
-     (fn []
-       (e-prhyme
-        frp/popular
-        lovecraft-markov
-        orig-target
-        (fn [target result]
-          (<= (count (:syllables orig-target))
-              (apply + (map :syllable-count result)))))))))
-
-(comment
-  (frp/phrase->word frp/words "distort bad man")
-  (repeatedly 10 #(make-rhymes lovecraft-markov "bad man"))
-
-  (rhyming-words "magic beam")
-  ((make-markov-picker lovecraft-markov) '("no"))
-  (markov-gen lovecraft-markov '("world"))
-  (interleave
-   (->> "your eyes"
-        (make-rhymes lovecraft-markov)
-        (map
-         (fn [[k v]]
-           (markov-gen lovecraft-markov (list k)))))
-   (->> "pretty"
-        (make-rhymes lovecraft-markov)
-        (map
-         (fn [[k v]]
-           (markov-gen lovecraft-markov (list k))))
-        (remove nil?)))
-
-  (frp/phrase->word frp/words "well-off")
-  (frp/prhyme frp/words (assoc (words-map "well") :rimes? true))
-  )
-(defn ghost
-  "Rhyme a phrase with markov"
-  [words word]
-  (let [rhymes (frp/prhyme words word)
-        norm-rhyme-words (->> rhymes
-                              (map :word)
-                              (map string/lower-case)
-                              (map #(string/replace % #"\(\d+\)" ""))
-                              (into #{})
-                              (filter #(get lovecraft-markov (list %))))
-        keyer (markov-key #(list (first (string/split % #"\s"))))]
-    (->> norm-rhyme-words
-         (map (fn [w]
-                (gen-from markover #(< (count %) 5) w))))))
-
-(comment
-  (take 10 lovecraft-markov)
-  (ghost frp/words (assoc (frp/make-word ["dream" "D" "R" "IY" "M"])
-                          :rimes?
-                          true)))
-
-(comment
-  (->> (frp/make-word ["dream" "D" "R" "IY" "M"])
-       (#(assoc % :rimes? true))
-       (frp/prhyme frp/words)
-       (take 10))
-
-
-  (->> (main)
-       (#(spit "lovecraft.edn" (pr-str %))))
-
-  (let [t (read-string (slurp "lovecraft.edn"))]
-    (take 20 t))
-  )
-
-(comment
-  (->> (tokens-from-file "lovecraft.txt")
-       (reverse)
-       (normalize-tokens)
-       ((window 2))
-       (markov)
-       (take 10)
-       (into {})
-       (#(get % '("away")))
-       (choose-from-markov-possibilities))
- 
-  (markov [["boy" "good"] ["the" "over"]
-           ["ran" "he"] ["walked" "he"]
-           ["walked" "he"] ["walked" "she"]])
-  (tokens-from-file "lovecraft.txt")
-  (scrape)
-  (def test-links (take 3 (links)))
-  (->> (text-from-link (first test-links))
-       (cleanup))
-  (->> (text-from-link (first test-links))
-       (append-to-file "test.txt" "hi"))
-  (take 3 (html/select (fetch-url (first test-links)) [:body]))
-  )