Cleanup and organize namespaces

4 years ago · f30686e95e
parent 7e461b3620
commit f30686e95e
7 changed files with 284 additions and 24 deletions
--- a/src/com/owoga/corpus/darklyrics.clj
+++ b/src/com/owoga/corpus/darklyrics.clj
@ -168,7 +168,6 @@
     artist-album-texts)))

 (comment
-
  (def darkov-2 (util/read-markov "dark-corpus-2.edn"))
  (get darkov-2 '(nil nil))
  (take 3 (scrape base-url))
--- a/src/com/owoga/prhyme/core.clj
+++ b/src/com/owoga/prhyme/core.clj
@ -159,6 +159,32 @@

    :else (= (last (:rimes a)) (last (:rimes b)))))

+(defn rhymes?
+  "What does it mean for something to rhyme?"
+  [a b]
+  (cond
+    (and (= 1 (count (last (:rimes a))))
+         (= 1 (count (last (:rimes b))))
+         (or (= (last (:rimes a)) '("ER"))
+             (= (last (:rimes a)) '("AA"))
+             (= (last (:rimes a)) '("AE"))
+             (= (last (:rimes a)) '("AO"))
+             (= (last (:rimes a)) '("AW"))
+             (= (last (:rimes a)) '("EH"))
+             (= (last (:rimes a)) '("IH"))
+             (= (last (:rimes a)) '("UH"))
+             (= (last (:rimes a)) '("AH"))))
+    (= (list (first (take-last 2 (:nuclei a)))
+             (last (:onsets a)))
+       (list (first (take-last 2 (:nuclei b)))
+             (last (:onsets b))))
+
+    (and (= 1 (count (last (:rimes a))))
+         (= 1 (count (last (:rimes b)))))
+    (= (last (:onsets a)) (last (:onsets b)))
+
+    :else (= (last (:rimes a)) (last (:rimes b)))))
+
 (defn onset+nucleus [syllables]
  (->> syllables
       (map #(first (u/take-through u/vowel %)))))
--- a/src/com/owoga/prhyme/gen.clj
+++ b/src/com/owoga/prhyme/gen.clj
@ -1,5 +1,6 @@
 (ns com.owoga.prhyme.gen
  (:require [clojure.string :as string]
+            [com.owoga.prhyme.util.math :as math]
            [com.owoga.prhyme.util :as util]
            [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
            [com.owoga.prhyme.util.nlp :as nlp]
@ -225,6 +226,30 @@
       (filter nlp/valid-sentence?)
       first))

+(defn remove-selection-from-target [target selection]
+  (->> target
+       (#(assoc % :syllables (drop-last
+                              (:syllable-count
+                               selection)
+                              (:syllables
+                               target))))
+       (#(assoc % :rimes (prhyme/rimes (:syllables %))))
+       (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
+       (#(assoc % :nuclei (prhyme/nucleus (:syllables %))))))
+
+(defn selection-seq
+  ([words adjust target]
+   (selection-seq words adjust target '()))
+  ([words adjust target result]
+   (let [[weighted-words _ _] (adjust [words target result])
+         selection (math/weighted-selection :weight weighted-words)
+         new-target (remove-selection-from-target target selection)
+         new-result (cons selection result)]
+     (cons selection
+           (lazy-seq (selection-seq words adjust new-target new-result))))))
+
+(defn generate-prhyme [words adjust target stop?]
+  (loop [result '()]))

 (defn prhyme
  "2020-10-21 iteration"
--- a/src/com/owoga/prhyme/generation/weighted_selection.clj
+++ b/src/com/owoga/prhyme/generation/weighted_selection.clj
@ -0,0 +1,110 @@
+(ns com.owoga.prhyme.generation.weighted-selection
+  "Utilities for generation of rhymes by assigning weights to a collection of
+  words and randomly choosing words based on their weights.
+
+  For example, we might want the first word of the rhyme of a target phrase to
+  be selected from words that are highly weighted by their rhymeness. It's not
+  as important for subsequent words to rhyme, so we might want subsequent words
+  to be selected weighted by whether or not they are synonyms to some other
+  word, by how many syllables they have, by whether they are associated with a
+  markov value, etc..."
+  (:require [com.owoga.prhyme.core :as prhyme]))
+
+;;;; Signature of "weight adjustment" functions
+;;;
+;;; A weight adjustment function gets called every time a decision needs to be
+;;; made for which word to choose, so I think it's important to not be too slow.
+;;;
+;;; The function will receive:
+;;; - a collection of the words from which to weight.
+;;; - the target phrase we're rhyming for
+;;; - the current result that we have so far
+;;;
+;;; By default, the weights of the passed in words will probably be 1. If you
+;;; want future weights to be adjusted from past weights, that's up to you.
+;;;
+;;; The target phrase will change as words are chosen for the result.
+;;; A good and strategy will be to chop off syllables from the target phrase
+;;; for each syllable of a matching result.
+
+(defn adjust-for-markov
+  "Works with a markov data structure that was generated taking into account
+  sentence boundaries (represented as nils).
+
+  A key in the markov structure of '(nil) would have a value that represents all
+  words that have occurred in position 1 of the raw data.
+
+  A key of '(nil \"foo\") would have a value that represents all words
+  that occurred in position 2 following \"foo\"
+
+  Automatically detects the order (window size) of the markov model. Does this
+  by counting the length of the first key.
+  "
+  [markov percent]
+  (let [markov-n (count (first (first markov)))]
+    (fn [[words target result]]
+      (let [key (let [k (map :norm-word (take markov-n result))]
+                  (reverse
+                   (if (> markov-n (count k))
+                     (concat k (repeat (- markov-n (count k)) nil))
+                     k)))
+            markov-options (markov key)
+            markov-option-avg (/ (apply + (vals markov-options))
+                                 (max 1 (count markov-options)))]
+        (if (nil? markov-options)
+          [words target result]
+          (let [[markovs non-markovs]
+                ((juxt filter remove)
+                 #(markov-options (:norm-word %))
+                 words)
+                weight-non-markovs (apply + (map :weight non-markovs))
+                target-weight-markovs (- (/ weight-non-markovs (- 1 percent))
+                                         weight-non-markovs)
+                count-markovs (count markovs)
+                adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
+            [(concat
+              (map
+               (fn [m]
+                 (let [option (markov-options (:norm-word m))]
+                   (as-> m m
+                     (assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m)))
+                     (assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs)))))
+               markovs)
+              non-markovs)
+             target
+             result]))))))
+
+(defn adjust-for-rhymes
+  "Weights words by whether or not they rhyme.
+  Once result contains something, becomes inactive. If you want to try to rhyme
+  every selection, you'll need a different function. This one will only rhyme
+  the tail of a target."
+  [percent]
+  (fn [[words target result]]
+    (if (empty? result)
+      (let [words-with-rime-count
+            (map
+             (fn [word]
+               (assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
+             words)
+
+            [rhyming non-rhyming]
+            ((juxt filter remove)
+             #(< 0 (:num-matching %))
+             words-with-rime-count)
+
+            weight-non-rhyming (apply + (map :weight non-rhyming))
+            target-weight-rhyming (* 100 percent weight-non-rhyming)
+            count-rhyming (count rhyming)
+            adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
+        [(concat
+          (map
+           (fn [word]
+             (as-> word word
+               (assoc word :weight (* adjustment-rhyming (:weight word)))
+               (assoc word :adjustment-for-rimes adjustment-rhyming)))
+           rhyming)
+          non-rhyming)
+         target
+         result])
+      [words target result])))
--- a/src/com/owoga/prhyme/lymeric.clj
+++ b/src/com/owoga/prhyme/lymeric.clj
@ -1,7 +1,7 @@
 (ns com.owoga.prhyme.lymeric
  (:require [com.owoga.prhyme.gen :as gen]
            [com.owoga.prhyme.util :as util]
-            [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
+            [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
            [clojure.string :as string]
            [com.owoga.prhyme.frp :as frp]
            [com.owoga.prhyme.util.nlp :as nlp]
@ -40,7 +40,6 @@
                       (remove #(banned-words (:norm-word %))
                               base-words)
                       (frp/phrase->word frp/words (get rhymes pattern))))]
-          (println banned-words)
          (recur (rest scheme)
                 (assoc rhymes pattern rhyme)
                 (conj result rhyme)))))))
@ -48,25 +47,22 @@

 (comment
  (rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8)))
-
-  (first (filter #(= "abba" (:norm-word %)) frp/words))
-
-  (gen/gen-sentence-with-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular))
-  (def adj
-    (comp (gen/adjust-for-markov-with-boundaries darklyrics/darkov-2 0.9)
-          (gen/adjust-for-tail-rimes util/words-map 0.99)))
-  (repeatedly
-   10
-   (fn []
-     (gen/gen-rhyme-with-syllable-count
-      adj
-      8
-      frp/popular
-      (frp/phrase->word frp/words "famed watched waterloo"))))
-
  )

 (comment
+  (rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7)))
+
+  (->> (gen/selection-seq
+        (map #(assoc % :weight 1) frp/words)
+        (weighted-selection/adjust-for-rhymes 0.99)
+        (frp/phrase->word frp/words "hi there my boy"))
+       (take 3))
+
+  ["bishop larch smitten us dwell"
+   "solely first week in hell"
+   "and take that for three"
+   "come wrapped in glory"
+   "you ever leave it so well"]
  ["romancing realized too late"
   "my crown revive my withered state"
   "reign is obsolete"
@ -86,8 +82,7 @@
   "it wither away with this knife"
   "hate is my virtue"
   "my feelings are well overdue"
-   "war we await the afterlife"]
-  )
+   "war we await the afterlife"])
 (->> (repeatedly
      (fn []
        (gen/gen-target-by-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular))))
@ -102,6 +97,4 @@
  (let [adj (comp (gen/adjust-for-markov darklyrics/darkov-2)
                  (gen/adjust-for-tail-rimes util/words-map))]))

-(apply (fnil + 0) '())
-
 (map :syllable-count '())
--- a/src/com/owoga/prhyme/util/math.clj
+++ b/src/com/owoga/prhyme/util/math.clj
@ -0,0 +1,101 @@
+;; Fast weighted random selection thanks to the Vose algorithm.
+;; https://gist.github.com/ghadishayban/a26cc402958ef3c7ce61
+
+(ns com.owoga.prhyme.util.math
+  (:import clojure.lang.PersistentQueue))
+
+
+;; Vose's alias method
+;; http://www.keithschwarz.com/darts-dice-coins/
+(defprotocol Rand
+  (nextr [_ rng]))
+
+(deftype Vose [n ^ints alias ^doubles prob]
+  Rand
+  ;; returns the index of the chosen weight
+  (nextr [_ rng] ;; not using the rng for now
+    (let [i (rand-int n)
+          p (aget prob i)]
+      (if (or (= p 1.0)
+              (< (rand) p))
+        i
+        (aget alias i)))))
+
+(defn ^:private make-vose [dist]
+  (let [N (count dist)
+        alias (int-array N)
+        prob  (double-array N)]
+    (if (zero? N)
+      (->Vose N alias prob)
+      (let [^doubles ps (->> dist
+                             (map (partial * N))
+                             (into-array Double/TYPE))
+
+            [small large] (loop [i 0
+                                 [small large] [PersistentQueue/EMPTY
+                                                PersistentQueue/EMPTY]
+                                 ps (seq ps)]
+                            (if (seq ps)
+                              (let [p (first ps)]
+                                (if (< p 1)
+                                  (recur (inc i)
+                                         [(conj small i) large]
+                                         (rest ps))
+                                  (recur (inc i)
+                                         [small          (conj large i)]
+                                         (rest ps))))
+                              [small large]))
+
+            [small large] (loop [small small
+                                 large large]
+                            (if (and (seq small) (seq large))
+                              (let [l (first small)
+                                    g (first large)
+                                    small (pop small)
+                                    large (pop large)]
+                                (aset-double prob l (aget ps l))
+                                (aset-int alias l g)
+                                (let [pg (- (+ (aget ps g) (aget ps l))
+                                            1.0)]
+                                  (aset-double ps g pg)
+                                  (if (< pg 1)
+                                    (recur (conj small g) large)
+                                    (recur small (conj large g)))))
+                              [small large]))]
+        (doseq [g (concat large small)]
+          (aset-double prob g 1))
+        (->Vose N alias prob)))))
+
+(defn from-weights [ws]
+  (let [N (count ws)
+        tot (reduce + 0.0 ws)
+        dist (if (zero? tot)
+                  (repeat N (/ 1 tot))
+                  (map #(/ % tot) ws))]
+    (make-vose (vec dist))))
+
+(comment
+  (let [ws [1 2 1 3 3]
+        rng (from-weights ws)
+        chosen (repeatedly 1000000 #(nextr rng nil))
+        accuracy (mapv (comp float
+                             #(/ % 100000)
+                             (frequencies chosen))
+                       (range (count ws)))]
+    accuracy))
+
+(defn weighted-selection
+  "If given a coll, assumes the coll is weights and returns the selected index by
+  weighted random selection.
+
+  If given a key function and a collection, uses the key function to get a
+  collection of weights and returns the value at the randomly selected index."
+  ([coll]
+   (let [rng (from-weights coll)
+         index (nextr rng nil)]
+     index))
+  ([key-fn coll]
+   (let [rng (from-weights (map key-fn coll))
+         index (nextr rng nil)
+         selection (nth coll index)]
+     selection)))
--- a/src/com/owoga/prhyme/util/nlp.clj
+++ b/src/com/owoga/prhyme/util/nlp.clj
@ -7,7 +7,13 @@
 (def get-sentences (nlp/make-sentence-detector "models/en-sent.bin"))
 (def parse (tb/make-treebank-parser "en-parser-chunking.bin"))

-(defn valid-sentence? [phrase]
+(defn valid-sentence?
+  "Tokenizes and parses the phrase using OpenNLP models from
+  http://opennlp.sourceforge.net/models-1.5/
+
+  If the parse tree has an 'S as the top-level tag, then
+  we consider it a valid English sentence."
+  [phrase]
  (->> phrase
       tokenize
       (string/join " ")