WIP

6 years ago · 1cb959010c
parent 0998df73b6
commit 1cb959010c
8 changed files with 113 additions and 907 deletions
--- a/src/com/owoga/prhyme/core.clj
+++ b/src/com/owoga/prhyme/core.clj
@ -1,31 +1,10 @@
 (ns com.owoga.prhyme.core
-  (:require [clojure.java.io :as io]
+  (:require [clojure.string :as string]
            [clojure.string :as string]
            [clojure.set :as set]
            [com.owoga.prhyme.util :as u]
-            [com.owoga.prhyme.syllabify :as s]))
+            [com.owoga.prhyme.syllabify :as s]
-
+            [com.owoga.prhyme.data.dictionary :as dict]
-(def dictionary
+            [com.owoga.prhyme.data.phonetics :as phonetics]))
  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
 (def words (map u/prepare-word dictionary))
 (def popular
  (set (line-seq (io/reader (io/resource "popular.txt")))))
 (def adverbs
  (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
 (def adjectives
  (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
 (def verbs
  (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
 (def nouns
  (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
 (defrecord Word [word syllables syllable-count rimes onsets nuclei])
 ;;; Typical rhyme model (explanation of following 3 functions)
 ;;
@ -48,51 +27,74 @@
 (defn rimes [syllables]
  (->> syllables
       (map reverse)
-       (map #(first (u/take-through u/vowel %)))
+       (map #(first (u/take-through phonetics/vowel %)))
       (map reverse)))
 (defn onset+nucleus [syllables]
  (->> syllables
-       (map #(first (u/take-through u/vowel %)))))
+       (map #(first (u/take-through phonetics/vowel %)))))
 (defn nucleus [syllables]
-  (map #(list (last (first (u/take-through u/vowel %)))) syllables))
+  (map #(list (last (first (u/take-through phonetics/vowel %)))) syllables))
-
+
-(defn make-word [word]
+(defn merge-phrase-words
-  (let [syllables (s/syllabify (rest word))
+  "Given multiple `Word`, like the words for 'well off', create a single `Word`
-        rimes     (rimes syllables)
+  that is syllabified as ('well' 'off') rather than as the combined ('weh'
-        onsets    (onset+nucleus syllables)
+  'loff'). Useful for finding single-word rhymes of multiple-word targets.
-        nuclei    (nucleus syllables)]
+
-    (->> (->Word
+  An example: 'war on crime' -> 'turpentine'.
-          (first word)
+  As opposed to: 'war on crime' -> 'caw fawn lime'."
-          syllables
+  [phrase phrase-words]
-          (count syllables)
+  (loop [merged (first phrase-words)
-          rimes
+         phrase-words (rest phrase-words)]
-          onsets
+    (cond
-          nuclei)
+      (and (empty? phrase-words) (empty? merged)) nil
-         (#(assoc % :norm-word (string/lower-case
+      (empty? phrase-words) (assoc merged :word phrase)
-                                (string/replace
+      :else (recur (-> merged
-                                 (:word %)
+                       (assoc :syllables (concat (:syllables merged)
-                                 #"\(\d+\)"
+                                                 (:syllables (first phrase-words))))
-                                 "")))))))
+                       (assoc :syllable-count (+ (:syllable-count merged)
-
+                                                 (:syllable-count (first phrase-words))))
-(defn make-word-1 [word phonemes]
+                       (assoc :rimes (concat (:rimes merged)
                                             (:rimes (first phrase-words))))
                       (assoc :onsets (concat (:onsets merged)
                                              (:onsets (first phrase-words))))
                       (assoc :nuclei (concat (:nuclei merged)
                                              (:nuclei (first phrase-words)))))
                   (rest phrase-words)))))
 (defn cmu->prhyme [[word & phonemes]]
  (let [syllables (s/syllabify phonemes)
-        rimes     (rimes syllables)
+        rimes (rimes syllables)
-        onsets    (onset+nucleus syllables)
+        onsets (onset+nucleus syllables)
-        nuclei    (nucleus syllables)]
+        nuclei (nucleus syllables)]
-    (->> (->Word
+    {:word word
-          (string/lower-case word)
+     :syllables syllables
-          syllables
+     :syllable-count (count syllables)
-          (count syllables)
+     :rimes rimes
-          rimes
+     :onsets onsets
-          onsets
+     :nuclei nuclei
-          nuclei)
+     :weight 1
-         ;; CMU dict has multiple pronounciations for some words.
+     :normalized-word (-> word
-         ;; foobar(1), foobar(2), etc...
+                          string/lower-case
-         ;; it's useful to have the normalized word for situations
+                          (string/replace #"\(\d+\)" ""))}))
-         ;; when you don't care how it's pronounced.
+
-         (#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" ""))))))
+(defn phrase->word
  "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
  that has the correct syllables, rimes, onsets, and nucleus. This way we can
  rhyme against phrases that aren't in the dictionary, as long as the words that
  make up the phrase are in the dictionary. Returns nil if the word is not in
  the dictionary."
  [words phrase]
  (->> (string/split phrase #"[ -]")
       (map (fn [phrase-word]
              (let [word (first (filter (fn [word]
                                          (= phrase-word (:norm-word word)))
                                        words))]
                (if (nil? word)
                  (cmu->prhyme (cons phrase-word (u/get-phones phrase-word)))
                  word))))
       (merge-phrase-words phrase)))
 (defn words-by-rime* [words]
  (let [words-with-rime (->> words
@ -101,7 +103,7 @@
                             (map #(map reverse %))
                             (map #(map
                                    (fn [syllable]
-                                      (first (u/take-through u/vowel syllable))) %))
+                                      (first (u/take-through phonetics/vowel syllable))) %))
                             (map #(map reverse %))
                             (map reverse)
                             (map #(cons %1 %2) (map first words)))]
@ -118,7 +120,7 @@
                                 (cons val (:words existing)))
                       (rest words)))))))
-(def words-by-rime (words-by-rime* words))
+(def words-by-rime (words-by-rime* dict/cmu-dict))
 (defn words-by-onset-nucleus* [words]
  (let [words-with-onset-nucleus (->> words
@ -126,7 +128,7 @@
                                      (map s/syllabify)
                                      (map #(map
                                             (fn [syllable]
-                                               (first (u/take-through u/vowel syllable)))
+                                               (first (u/take-through phonetics/vowel syllable)))
                                             %))
                                      (map #(cons %1 %2) (map first words)))]
    (loop [by-onset {}
@ -152,7 +154,7 @@
                                       (fn [syllable]
                                         (list
                                          (last
-                                           (first (u/take-through u/vowel syllable)))))
+                                           (first (u/take-through phonetics/vowel syllable)))))
                                       %))
                                (map #(cons %1 %2) (map first words)))]
    (loop [by-nucleus {}
@ -198,7 +200,6 @@
 (defn filter-to-syllable-count [n words]
  (filter (fn [word] (= n (count (s/syllabify (rest word))))) words))
 (defn rhymes?
  "What does it mean for something to rhyme?"
  [a b]
--- a/src/com/owoga/prhyme/frp.clj
+++ b/src/com/owoga/prhyme/frp.clj
@ -2,6 +2,7 @@
  (:require [clojure.java.io :as io]
            [clojure.string :as string]
            [clojure.set :as set]
            [com.owoga.prhyme.data.thesaurus :refer [thesaurus]]
            [com.owoga.prhyme.core :as p]
            [com.owoga.prhyme.util :as u]
            [com.owoga.prhyme.syllabify :as s]))
@ -9,35 +10,11 @@
 (def dictionary
  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
-(def thesaurus
+
  (->> (line-seq (io/reader (io/resource "mthesaur.txt")))
       (map #(string/split % #","))
       (map #(vector (first %) (rest %)))
       (into {})))
 (defrecord Word [word syllables syllable-count rimes onsets nuclei])
 (defn make-word [word]
  (let [syllables (s/syllabify (rest word))
        rimes     (p/rimes syllables)
        onsets    (p/onset+nucleus syllables)
        nuclei    (p/nucleus syllables)]
    (->> (->Word
          (first word)
          syllables
          (count syllables)
          rimes
          onsets
          nuclei)
         (#(assoc % :norm-word (string/lower-case
                                (string/replace
                                 (:word %)
                                 #"\(\d+\)"
                                 "")))))))
 (def words (->> dictionary
                (map u/prepare-word)
-                (map make-word)))
+                (map p/cmu->prhyme)))
 (def popular-dict
  (set (line-seq (io/reader (io/resource "popular.txt")))))
@ -69,20 +46,6 @@
                                              (:nuclei (first phrase-words)))))
                   (rest phrase-words)))))
 (defn phrase->word
  "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
  that has the correct syllables, rimes, onsets, and nucleus. This way we can
  rhyme against phrases that aren't in the dictionary, as long as the words that
  make up the phrase are in the dictionary. Returns nil if the word is not in
  the dictionary."
  [words phrase]
  (->> (string/split phrase #"[ -]")
       (map (fn [phrase-word]
              (first (filter (fn [word]
                               (= phrase-word (string/lower-case (:norm-word word))))
                             words))))
       (merge-phrase-words phrase)))
 (defn partition-word [word]
  (->> word
       (:syllables)
@ -155,20 +118,18 @@
    (filter (fn [word] (some #(re-matches (re-pattern (str "(?i)" %)) (:word word)) synonyms))
            words)))
 (defn phrymo [dictionary phrase]
  (phrase->word dictionary phrase))
 (comment
-  (->> (make-word ["foobar" "F" "UW" "B" "AA" "R"])
+  (->> (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"])
       (#(assoc % :rimes? true))
       (prhyme words)
       (filter #(= (:syllable-count %) 2))
-       (sort-by #(consecutive-matching
+       (sort-by #(count
-                  %
+                  (consecutive-matching
-                  (make-word ["foobar" "F" "UW" "B" "AA" "R"])
+                   %
-                  :rimes)))
+                   (p/cmu->prhyme ["foobar" "F" "UW" "B" "AA" "R"])
                   :rimes))))
-  (as-> (make-word ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word
+  (as-> (p/cmu->prhyme ["magic beam" "M" "AE" "J" "IH" "K" "B" "IY" "M"]) word
    (into word {:rimes? true})
    (prhyme popular word)
    (mapcat #(matching-synonyms thesaurus % word)
@ -177,7 +138,7 @@
             "distress" "corpse" "necrotic" "zombie"
             "coma" "monster"]))
-  (as-> (make-word ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word
+  (as-> (p/cmu->prhyme ["please turn" "P" "L" "IH" "Z" "T" "ER" "N"]) word
    (into word {:rimes? true})
    (prhyme popular word)
    (mapcat #(matching-synonyms thesaurus % word)
--- a/src/com/owoga/prhyme/gen.clj
+++ b/src/com/owoga/prhyme/gen.clj
@ -36,23 +36,6 @@
                                              (:nuclei (first phrase-words)))))
                   (rest phrase-words)))))
 (defn phrase->word
  "Given a word like 'well-off' or a phrase like 'war on poverty', return a Word
  that has the correct syllables, rimes, onsets, and nucleus. This way we can
  rhyme against phrases that aren't in the dictionary, as long as the words that
  make up the phrase are in the dictionary. Returns nil if the word is not in
  the dictionary."
  [words phrase]
  (->> (string/split phrase #"[ -]")
       (map (fn [phrase-word]
              (let [word (first (filter (fn [word]
                                          (= phrase-word (:norm-word word)))
                                        words))]
                (if (nil? word)
                  (frp/make-word (cons phrase-word (util/get-phones phrase-word)))
                  word))))
       (merge-phrase-words phrase)))
 (defn adjust-for-markov
  [markov percent]
  (let [target-markov-n (count (first (first markov)))]
@ -121,31 +104,6 @@
             target
             result]))))))
 (comment
  (let [markov-1-example
        {'("dream") {"a" 1}
         '("a") {"me" 1}}
        markov-2-example
        {'(nil nil) {"dream" 1}
         '(nil "dream") {"a" 1}
         '("dream" "a") {"me" 1}
         '("a" "me") {"give" 1}
         '("give" nil) {nil 1}}
        result-a '()
        result-b '({:norm-word "dream",
                    :weight 9.000000000000002,
                    :adjustment-for-markov 9.000000000000002})
        words [{:norm-word "dream" :weight 1}
               {:norm-word "foo" :weight 1}
               {:norm-word "a" :weight 1}
               {:norm-word "me" :weight 1}
               {:norm-word "give" :weight 1}]
        adj (adjust-for-markov-with-boundaries markov-2-example 0.9)]
    (adj [words 'target result-b]))
  ((adjust-for-markov-with-boundaries {'("foo" "bar") {}} 0.5)
   ['() '() '("hi" "bye" "there")]))
 (defn adjust-for-rimes
  [dictionary percent]
  (fn [[words target result]]
--- a/src/com/owoga/prhyme/grammar.clj
+++ b/src/com/owoga/prhyme/grammar.clj
@ -1,38 +0,0 @@
 (ns com.owoga.prhyme.grammar)
 (def root-states
  [{::tk/name :failed
    ::tk/transitions [{::tk/on tk/_ ::tk/to :failed}]}
   {::tk/name :object
    ::tk/transitions [{::tk/on :adjectives ::tk/to :obj-adj}
                      {::tk/on :nouns ::tk/to :obj-noun}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
   {::tk/name :obj-adj
    ::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
   {::tk/name :obj-noun
    ::tk/transitions [{::tk/on :verbs ::tk/to :verbs}
                      {::tk/on :adverbs ::tk/to :adverbs}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
   {::tk/name :verbs
    ::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun}
                      {::tk/on :adjectives ::tk/to :subj-adj}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
   {::tk/name :adverbs
    ::tk/transitions [{::tk/on :verbs ::tk/to :verbs}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
   {::tk/name :subj-noun
    ::tk/transitions [{::tk/on :nouns ::tk/to :obj-noun}
                      {::tk/on :adjectives ::tk/to :obj-adj}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}
   {::tk/name :subj-adj
    ::tk/transitions [{::tk/on :nouns ::tk/to :subj-noun}
                      {::tk/on tk/_ ::tk/to :object ::tk/actions [:failed]}]}])
 (def root-fsm
  {::tk/states root-states
   ::tk/action! (fn [{::tk/keys [signal action] :as fsm}]
                  (case signal
                    :failed (println "Failed! " signal " " action))
                  fsm)
   ::tk/state :object})
--- a/src/com/owoga/prhyme/lymeric.clj
+++ b/src/com/owoga/prhyme/lymeric.clj
@ -2,7 +2,9 @@
  (:require [com.owoga.prhyme.gen :as gen]
            [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
            [clojure.string :as string]
            [com.owoga.prhyme.core :as prhyme]
            [com.owoga.prhyme.frp :as frp]
            [com.owoga.prhyme.util :as util]
            [taoensso.nippy :as nippy]
            [clojure.java.io :as io]))
@ -21,15 +23,12 @@
              banned-words (into #{} (->> result
                                          (map #(string/split % #" "))
                                          (map #(last %))))
-              adj (apply
+              adj (util/comp-rnil
-                   comp
+                   (weighted-selection/adjust-for-markov
-                   (remove
+                    markov
-                    nil?
+                    0.99)
-                    [(weighted-selection/adjust-for-markov
+                   (when (rhymes pattern)
-                      markov
+                     (weighted-selection/adjust-for-tail-rhyme 0.99)))
                      0.99)
                     (when (rhymes pattern)
                       (weighted-selection/adjust-for-tail-rhyme 0.99))]))
              rhyme (if (nil? (get rhymes pattern))
                      (gen/gen-sentence-with-syllable-count
                       adj
@ -40,7 +39,7 @@
                       syllable-count
                       (remove #(banned-words (:norm-word %))
                               base-words)
-                       (frp/phrase->word frp/words (get rhymes pattern))))]
+                       (prhyme/phrase->word frp/words (get rhymes pattern))))]
          (recur (rest scheme)
                 (assoc rhymes pattern rhyme)
                 (conj result rhyme)))))))
@ -55,7 +54,7 @@
  (->> (gen/selection-seq
        (map #(assoc % :weight 1) frp/words)
        (weighted-selection/adjust-for-rhymes 0.99)
-        (frp/phrase->word frp/words "hi there my boy"))
+        (prhyme/phrase->word frp/words "hi there my boy"))
       (take 3))
  ["bishop larch smitten us dwell"
--- a/src/com/owoga/prhyme/syllabify.clj
+++ b/src/com/owoga/prhyme/syllabify.clj
@ -1,5 +1,6 @@
 (ns com.owoga.prhyme.syllabify
-  (:require [com.owoga.prhyme.util :as p]))
+  (:require [com.owoga.prhyme.data.phonetics :as phonetics]
            [com.owoga.prhyme.util :as util]))
 ;; ER is not yet handled properly.
 ;; PARENTHESES is syllabified as ("P" "ER" "IH" "N") ("TH" "UH") ("S" "IY" "S")
 ;; Glides are also broken. "R OY AH L" gets syllabified as a single syllable.
@ -35,16 +36,16 @@
 (def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
 (defn sonority [phone]
-  (.indexOf sonority-hierarchy (p/phonemap phone)))
+  (.indexOf sonority-hierarchy (phonetics/phonemap phone)))
 (defn vowel? [phone]
-  (p/vowel phone))
+  (phonetics/vowel phone))
 (defn >sonorous [a b]
  (> (sonority a) (sonority b)))
 (defn slurp-rime [phones]
-  (let [splits (p/take-through vowel? phones)]
+  (let [splits (util/take-through vowel? phones)]
    [(first splits) (flatten (rest splits))]))
 (defn slurp-onset [phones]
--- a/src/com/owoga/prhyme/util.clj
+++ b/src/com/owoga/prhyme/util.clj
@ -1,41 +1,14 @@
 (ns com.owoga.prhyme.util
  (:require [clojure.java.io :as io]
            [clojure.string :as string]
-            [clojure.set :as set]
+            [clojure.set :as set])
-            [clojure.zip :as z])
+  (:import (com.sun.speech.freetts.en.us CMULexicon)))
  (:import (com.sun.speech.freetts.lexicon LetterToSoundImpl)
           (com.sun.speech.freetts.en.us CMULexicon)
           (java.io File)))
 (defn prepare-word
  "Splits whitespace-separated fields into a sequence."
  [line]
  (string/split line #"[\t ]"))
 (def dictionary
  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
 (def words (map prepare-word dictionary))
 (def words-map
  (into {} (map #(vector (string/lower-case (first %)) {:phonemes (rest %)}) words)))
 (def popular
  (set (line-seq (io/reader (io/resource "popular.txt")))))
 (def adverbs
  (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
 (def adjectives
  (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
 (def verbs
  (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
 (def nouns
  (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
 (CMULexicon. "cmulex" true)
 (def cmu-lexicon (CMULexicon/getInstance true))
@ -54,24 +27,6 @@
       (map convert-to-sphinx)
       (map string/upper-case)))
 (def phonemap
  (->> (io/reader (io/resource "cmudict-0.7b.phones"))
       (line-seq)
       (map #(string/split % #"\t"))
       (into {})))
 (def long-vowel #{"EY" "IY" "AY" "OW" "UW"})
 (def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"})
 (def vowel (set/union long-vowel short-vowel))
 (def consonant (set/difference (into #{} (keys phonemap)) vowel))
 (def syllable-end (set/union consonant long-vowel))
 (def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
 (defn window [n]
  (fn [coll]
    (cond
@ -80,6 +35,22 @@
      :else (cons (take n coll)
                  (lazy-seq ((window n) (rest coll)))))))
 (defn comp-rnil
  "Compose functions, ignoring nil values."
  [& functions]
  (apply comp (remove nil? functions)))
 (defn reduce-while
  [pred f val coll]
  (loop [val val
         coll coll]
    (cond
      (empty? coll) val
      (pred val)
      (let [new-val (f val (first coll))]
        (recur new-val (rest coll)))
      :else val)))
 (defn extend-coll [coll val n]
  (concat (repeat n val)
          coll
@ -96,12 +67,6 @@
 (defn clean-text [text]
  (string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
 (defn english? [text]
  (let [words (string/split text #"\s+")
        english-words
        (->> words (filter #(words-map (string/lower-case %))))]
    (< 0.7 (/ (count english-words) (max 1 (count words))))))
 (defn padr [val n coll]
  (concat coll (repeat n val)))
@ -160,8 +125,6 @@
 (defn count-pred [pred coll]
  (count (filter pred coll)))
 (def count-vowels (partial count-pred vowel))
 (defn single? [coll] (= 1 (count coll)))
 (defn partitions
--- a/src/com/owoga/prhyme/util/lovecraft.clj
+++ b/src/com/owoga/prhyme/util/lovecraft.clj
@ -1,639 +0,0 @@
 (ns com.owoga.prhyme.util.lovecraft
  (:require [net.cgrand.enlive-html :as html]
            [clojure.string :as string]
            [com.owoga.prhyme.util.weighted-rand :as wr]
            [com.owoga.prhyme.core :as prhyme]
            [com.owoga.prhyme.util.nlp :as nlp]
            [taoensso.tufte :as tufte :refer [defnp p profiled profile]]
            [com.owoga.prhyme.frp :as frp]
            [clojure.java.io :as io]
            [clojure.set :as set]))
 (tufte/add-basic-println-handler! {})
 (def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/")
 (def words-map
  (into {} (map #(vector (string/lower-case (:word %)) %) frp/words)))
 (defn fetch-url [url]
  (html/html-resource (java.net.URL. url)))
 (comment
  (fetch-url *base-url*))
 (defn links []
  (map
   #(str *base-url* (first (html/attr-values % :href)))
   (html/select
    (fetch-url *base-url*)
    [:li :> [:a (html/attr? :href)]])))
 (defn contentful-sections [nodes]
  (->> nodes
       (map html/text)
       (filter #(> (count %) 100))))
 (defn text-from-link [link]
  (->> (html/select
        (fetch-url link)
        [:body])
       (first)
       (html/text)
       ((fn [s] (string/replace s #"[\s\u00A0]+" " ")))))
 (defn cleanup [content]
  (-> content
      (string/replace #"Return to.*$" "")
      (string/replace #"Home.*?This Site" "")
      (string/replace #"[^a-zA-Z -]+" "")))
 (defn tokens [content]
  (string/split content #"\s+"))
 (defn append-to-file [filepath text]
  (with-open [w (io/writer filepath :append true)]
    (.write w text)))
 (defn scrape []
  (run!
   (fn [link]
     (->> (text-from-link link)
          (cleanup)
          (#(str % "\n"))
          (append-to-file "lovecraft.txt")))
   (take 10 (links))))
 (defn tokens-from-file [file]
  (with-open [r (io/reader file)]
    (tokens (slurp r))))
 (defn window [n]
  (fn [coll]
    (cond
      (empty? coll) []
      (< (count coll) n) []
      :else (cons (take n coll)
                  (lazy-seq ((window n) (drop n coll)))))))
 (defnp markov [tokens]
  (->> tokens
       (map
        (fn [token]
          (let [k (butlast token)
                v (last token)]
            [k v])))
       (reduce
        (fn [a [k v]]
          (update-in a [k v] (fnil inc 0)))
        {})))
 (defnp running-total
  ([coll]
   (running-total coll 0))
  ([coll last-val]
   (cond
     (empty? coll) nil
     :else (cons (+ last-val (first coll))
                 (lazy-seq
                  (running-total
                   (rest coll)
                   (+ last-val (first coll))))))))
 (defnp weighted-rand [weights]
  (let [running-weights (running-total weights)
        rand-val (rand (last running-weights))]
    (loop [i 0]
      (if (> (nth running-weights i) rand-val)
        i
        (recur (inc i))))))
 (defnp choose-from-markov-possibilities [possibilities]
  (if (empty? possibilities)
    nil
    (let [weights (vals possibilities)
          rng (wr/from-weights weights)
          index (wr/nextr rng nil)]
      (nth (keys possibilities) index))))
 (def word-set (into #{} (->> prhyme/words
                             (map first)
                             (map string/lower-case)
                             (map #(string/replace % #"\(\d+\)" "")))))
 (defn normalize-tokens [tokens]
  (->> tokens
       (map string/lower-case)
       (filter word-set)))
 (defn main []
  (->> (tokens-from-file "lovecraft.txt")
       (reverse)
       (normalize-tokens)
       ((window 2))
       (markov)
       (into {})))
 (defn make-markov-picker [markov-data]
  (fn [k]
    (choose-from-markov-possibilities
     (get markov-data k {}))))
 (defn synonym?
  "Given a possibility, like [\"foo\" 3]
  which says that foo follows a particular key with
  a weight of 3, a word is a synonym of that possibility
  if the word is a synonym ."
  [p synonyms]
  (synonyms p))
 (defnp adjust-for-synonyms
  "If a word is in a set of synonyms, adjust its weight upwards."
  [synonyms]
  (fn [possibilities]
    (reduce
     (fn [p s]
       (if (s p)
         (update p s #(* 5 %))
         p))
     possibilities
     synonyms)))
 (defnp adjust-for-rimes
  [target-rime dictionary]
  (fn [possibilities]
    (into
     {}
     (map
      (fn [[p v]]
        (let [possibility (get dictionary p)
              factor (count
                      (frp/consecutive-matching
                       target-rime
                       possibility
                       :rimes))]
          [p (* v (max 1 (* factor 4)))]))
      possibilities))))
 (comment
  ((adjust-for-synonyms #{"war" "famine"})
   {"war" 1
    "disease" 3})
  ;; => {"war" 5, "disease" 3}
  ((adjust-for-rimes
    (frp/make-word ["magic" "M" "AE" "JH" "IH" "K"])
    words-map)
   {"tragic" 3
    "trick" 2
    "foo" 1})
  ;; => {"tragic" 24, "trick" 8, "foo" 1} 
  )
 (defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
 (defonce markover (make-markov-picker lovecraft-markov))
 (defn markov-key [key-fn]
  (fn [text]
    (key-fn text)))
 (defn gen-from [m p initial]
  (loop [r (list initial)]
    (cond
      (p r) (recur (cons (m (list (first r))) r))
      :else r)))
 (defn rhyming-words
  "List of rhyming words sorted by quality of rhyme."
  [target]
  (let [target-phrase (->> target
                           (frp/phrase->word frp/words)
                           (#(assoc % :rimes? true)))]
    (->> target-phrase
         (#(assoc % :rimes? true))
         (frp/prhyme frp/words)
         (sort-by
          #(- (count
               (frp/consecutive-matching
                %
                target-phrase
                :rimes)))))))
 (defn markov-rhymes [markov-data rhyming-words]
  (->> (map
        (fn [word]
          (->> word
               :word
               string/lower-case
               (#(string/replace % #"\(\d+\)" ""))
               (#(vector % (get markov-data (list %))))))
        rhyming-words)
       (into #{})
       (remove
        (fn [[w p]]
          (nil? p)))))
 (defn markov-gen [markov-data initial]
  (let [m (make-markov-picker markov-data)]
    (loop [r initial]
      (if (> (count r) 5)
        r
        (recur (cons (m (list (first r)))
                     r))))))
 (defn make-rhymes [markov-data target]
  (let [target-word (frp/phrase->word frp/words target)
        rhyming-words (rhyming-words target)
        markov--rhymes (markov-rhymes markov-data rhyming-words)
        rime-adjuster (adjust-for-rimes target-word words-map)
        modified-markov-data
        (merge
         markov-data
         (into {}
               (map (fn [[word weights]]
                      [word (rime-adjuster weights)])
                    markov--rhymes)))]
    (->> rhyming-words
         (markov-rhymes modified-markov-data)
         (map
          (fn [[k v]]
            (markov-gen modified-markov-data (list k))))
         (map #(remove nil? %)))))
 (defn adjust-for-over-syllables
  "Adjust weights to prefer not going over the number
  of syllables of the target word."
  [target]
  (fn [words]
    (p :adjust-for-syllables
       (map
        (fn [word]
          (if (or (nil? (:syllable-count word))
                  (nil? (:syllables target)))
            (println word target))
          (cond
            (= (:syllable-count word) (count (:syllables target)))
            (as-> word word
              (assoc word :weight (* 3 (:weight word)))
              (assoc word :adjusted-for-syllables-factor 3))
            (< (:syllable-count word) (count (:syllables target)))
            (as-> word word
              (assoc word :weight (* 2 (:weight word)))
              (assoc word :adjusted-for-syllables-factor 2))
            :else
            (as-> word word
              (assoc word :weight (* 1 (:weight word)))
              (assoc word :adjusted-for-syllables-factor 1))))
        words))))
 (comment
  (let [words (->> ["distort" "kiss" "sport"]
                   (map #(frp/phrase->word frp/words %))
                   (map #(assoc % :weight 1)))
        target (->> "report"
                    (frp/phrase->word frp/words)
                    (#(assoc % :syllables (:syllables %))))
        adjuster (adjust-for-over-syllables target)]
    (adjuster words)))
 (defn adjust-for-rhymes
  "Adjust weights to prefer words that rhyme"
  [target]
  (fn [words]
    (p :adjust-for-rhymes
       (map
        (fn [word]
          (let [factor (max 0.001 (count (frp/consecutive-matching word target :rimes)))]
            (as-> word word
              (assoc word :weight (* factor (:weight word)))
              (assoc word :adjust-for-rhyme-factor factor))))
        words))))
 (defn adjust-for-rhymes-1
  "Adjust weights to prefer words that rhyme"
  [target percent]
  (fn [words]
    (let [ratio (/ percent (- 1 percent))
          [rhymes non-rhymes]
          ((juxt filter remove)
           (fn [word]
             (< 0 (count (frp/consecutive-matching word target :rimes))))
           words)
          weight-non-rhymes (apply + (map :weight non-rhymes))
          target-weight-rhymes (* ratio weight-non-rhymes)
          count-rhymes (max 1 (count rhymes))
          adjustment-rhyme (/ target-weight-rhymes count-rhymes)]
      (concat
       non-rhymes
       (map
        (fn [rhyme]
          (as-> rhyme rhyme
            (assoc rhyme :weight (* adjustment-rhyme (:weight rhyme)))
            (assoc rhyme :adjust-for-rhyme-factor adjustment-rhyme)))
        rhymes)))))
 (defn adjust-for-membership-1
  [set_ percent]
  (let [ratio (- 1 percent)]
    (fn [words]
      (let [[members non-members]
            ((juxt filter remove)
             #(set_ (:norm-word %))
             words)
            weight-non-members (apply + (map :weight non-members))
            target-weight-members (* ratio weight-non-members)
            count-members (count members)
            adjustment-members (/ target-weight-members count-members)]
        (concat
         (map
          (fn [member]
            (as-> member member
              (assoc member :weight (* adjustment-members (:weight member)))
              (assoc member :adjustment-for-membership adjustment-members)))
          members)
         non-members)))))
 (comment
  (let [words (->> ["distort" "kiss" "sport"]
                   (map #(frp/phrase->word frp/words %))
                   (map #(assoc % :weight 1)))
        target (->> "report"
                    (frp/phrase->word frp/words)
                    (#(assoc % :remaining-syllables (:syllables %))))
        rhyme-adjuster (adjust-for-rhymes target)
        syllable-count-adjuster (adjust-for-over-syllables target)]
    (syllable-count-adjuster (rhyme-adjuster words))))
 (defn adjust-for-membership [set_]
  (fn [words]
    (map
     (fn [word]
       (if (set_ (:norm-word word))
         (as-> word word
           (assoc word :weight (* 2 (:weight word)))
           (assoc word :adjust-for-membership-factor 2))
         (assoc word :adjust-for-membership-factor 1)))
     words)))
 (defn filter-for-membership [set_]
  (fn [words]
    (map
     (fn [word]
       (if-not (set_ (:norm-word word))
         (as-> word word
           (assoc word :weight (* 0.01 (:weight word)))
           (assoc word :filter-for-membership-factor 0.01))
         word))
     words)))
 (defn adjust-for-markov [markov-options]
  (let [markov-set (into #{} (map first (keys markov-options)))]
    (fn [words]
      (let [result (map
                    (fn [word]
                      (if (markov-set (:norm-word word))
                        (as-> word word
                          (assoc word :weight (* 100 (:weight word)))
                          (assoc word :adjust-for-markov-factor 100))
                        (assoc word :adjust-for-markov-factor 1)))
                    words)]
        result))))
 (comment
  (let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))]
    (take 5 (markov-adjuster frp/words))))
 (defn adjust-for-membership-1
  [set_ percent]
  (let [ratio (- 1 percent)]
    (fn [words]
      (let [[members non-members]
            ((juxt filter remove)
             #(set_ (:norm-word %))
             words)
            weight-non-members (apply + (map :weight non-members))
            target-weight-members (* ratio weight-non-members)
            count-members (count members)
            adjustment-members (/ target-weight-members count-members)]
        (concat
         (map
          (fn [member]
            (as-> member member
              (assoc member :weight (* adjustment-members (:weight member)))
              (assoc member :adjustment-for-membership adjustment-members)))
          members)
         non-members)))))
 (defn adjust-for-markov-1
  [markov-options percent]
  (let [ratio (- 1 percent)]
    (fn [words]
      (if (nil? markov-options)
        words
        (let [[markovs non-markovs]
              ((juxt filter remove)
               #(markov-options (:norm-word %))
               words)
              weight-non-markovs (apply + (map :weight non-markovs))
              target-weight-markovs (* ratio weight-non-markovs)
              count-markovs (count markovs)
              adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
          (concat
           (map
            (fn [markov]
              (as-> markov markov
                (assoc markov :weight (* adjustment-markovs (:weight markov)))
                (assoc markov :adjustment-for-markov adjustment-markovs)))
            markovs)
           non-markovs))))))
 (defn e-prhyme
  "2020-10-21 iteration"
  [words markov target stop?]
  (let [target (assoc target :original-syllables (:syllables target))
        words (map #(assoc % :weight 1) words)]
    (loop [target target
           result '()
           sentinel 0]
      (if (or (stop? target result)
              (> sentinel 5))
        result
        (let [markov-options (markov (list (:norm-word (first result))))
              markov-adjuster (adjust-for-markov-1 markov-options 0.9)
              syllable-count-adjuster (adjust-for-over-syllables target)
              rhyme-adjuster (adjust-for-rhymes-1 target 0.9)
              lovecraft-set (into #{} (map (comp first first) lovecraft-markov))
              lovecraft-filter (adjust-for-membership-1 lovecraft-set 0.9)
              adjust (comp rhyme-adjuster
                           syllable-count-adjuster
                           markov-adjuster
                           lovecraft-filter)
              weighted-words (p :adjust
                                (->> (adjust words)
                                     (remove #(= 0 (:weight %)))))
              rng (p :from-weights (wr/from-weights (map :weight weighted-words)))
              index (p :nextr (wr/nextr rng nil))
              selection (nth weighted-words index)
              new-target (->> target
                              (#(assoc % :syllables (drop-last
                                                     (:syllable-count
                                                      selection)
                                                     (:syllables
                                                      target))))
                              (#(assoc % :rimes (prhyme/rimes (:syllables %))))
                              (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
                              (#(assoc % :nuclei (prhyme/nucleus (:syllables %)))))
              result (cons selection result)]
          (recur new-target result (inc sentinel)))))))
 (def words (map #(assoc % :weight 1) frp/words))
 (defn main [poem-lines]
  (map
   (fn [line]
     (let [orig-target (frp/phrase->word frp/words line)]
       (e-prhyme
        frp/popular
        lovecraft-markov
        orig-target
        (fn [target result]
          (<= (count (:syllables orig-target))
              (apply + (map :syllable-count result)))))))
   poem-lines))
 (defn rhymer [words markov target stop]
  (cons (e-prhyme
         words
         markov
         target
         stop)
        (lazy-seq (rhymer words markov target stop))))
 (defn stop [target]
  (fn [inner-target result]
    (<= (count (:syllables target))
        (apply + (map :syllable-count result)))))
 (defn sentence-stop [target]
  (fn [inner-target result]
    (let [result-sentence (string/join " " (map :norm-word result))]
      (when-not (empty? result)
        (or (nlp/valid-sentence? result-sentence)
            (< (:syllable-count target)
               (apply + (map :syllable-count result)))
            (< 5 (count result)))))))
 (comment
  (let [phrase (frp/phrase->word frp/words "i solemnly swear i am up to no good")
        r (rhymer
           frp/popular
           lovecraft-markov
           phrase
           (sentence-stop phrase))]
    (take 5 (map #(string/join " " (map :norm-word %))
                 (filter #(nlp/valid-sentence? (string/join " " (map :norm-word %))) r))))
  (let [poem-lines ["mister sandman"
                    "give me a dream"
                    "make him the cutest"
                    "that i've ever seen"
                    "give him two lips"
                    "like roses in clover"
                    "please tell me that"
                    "these lonesome nights are over"]]
    (map (fn [line] (map :norm-word line)) (main poem-lines)))
  (let [orig-target (frp/phrase->word frp/words "mister sandman give me a dream")]
    (repeatedly
     10
     (fn []
       (e-prhyme
        frp/popular
        lovecraft-markov
        orig-target
        (fn [target result]
          (<= (count (:syllables orig-target))
              (apply + (map :syllable-count result)))))))))
 (comment
  (frp/phrase->word frp/words "distort bad man")
  (repeatedly 10 #(make-rhymes lovecraft-markov "bad man"))
  (rhyming-words "magic beam")
  ((make-markov-picker lovecraft-markov) '("no"))
  (markov-gen lovecraft-markov '("world"))
  (interleave
   (->> "your eyes"
        (make-rhymes lovecraft-markov)
        (map
         (fn [[k v]]
           (markov-gen lovecraft-markov (list k)))))
   (->> "pretty"
        (make-rhymes lovecraft-markov)
        (map
         (fn [[k v]]
           (markov-gen lovecraft-markov (list k))))
        (remove nil?)))
  (frp/phrase->word frp/words "well-off")
  (frp/prhyme frp/words (assoc (words-map "well") :rimes? true))
  )
 (defn ghost
  "Rhyme a phrase with markov"
  [words word]
  (let [rhymes (frp/prhyme words word)
        norm-rhyme-words (->> rhymes
                              (map :word)
                              (map string/lower-case)
                              (map #(string/replace % #"\(\d+\)" ""))
                              (into #{})
                              (filter #(get lovecraft-markov (list %))))
        keyer (markov-key #(list (first (string/split % #"\s"))))]
    (->> norm-rhyme-words
         (map (fn [w]
                (gen-from markover #(< (count %) 5) w))))))
 (comment
  (take 10 lovecraft-markov)
  (ghost frp/words (assoc (frp/make-word ["dream" "D" "R" "IY" "M"])
                          :rimes?
                          true)))
 (comment
  (->> (frp/make-word ["dream" "D" "R" "IY" "M"])
       (#(assoc % :rimes? true))
       (frp/prhyme frp/words)
       (take 10))
  (->> (main)
       (#(spit "lovecraft.edn" (pr-str %))))
  (let [t (read-string (slurp "lovecraft.edn"))]
    (take 20 t))
  )
 (comment
  (->> (tokens-from-file "lovecraft.txt")
       (reverse)
       (normalize-tokens)
       ((window 2))
       (markov)
       (take 10)
       (into {})
       (#(get % '("away")))
       (choose-from-markov-possibilities))
  (markov [["boy" "good"] ["the" "over"]
           ["ran" "he"] ["walked" "he"]
           ["walked" "he"] ["walked" "she"]])
  (tokens-from-file "lovecraft.txt")
  (scrape)
  (def test-links (take 3 (links)))
  (->> (text-from-link (first test-links))
       (cleanup))
  (->> (text-from-link (first test-links))
       (append-to-file "test.txt" "hi"))
  (take 3 (html/select (fetch-url (first test-links)) [:body]))
  )