Cleanup and organize namespaces

5 years ago · f30686e95e
parent 7e461b3620
commit f30686e95e
7 changed files with 284 additions and 24 deletions
--- a/src/com/owoga/corpus/darklyrics.clj
+++ b/src/com/owoga/corpus/darklyrics.clj
@ -168,7 +168,6 @@
     artist-album-texts)))
 (comment
  (def darkov-2 (util/read-markov "dark-corpus-2.edn"))
  (get darkov-2 '(nil nil))
  (take 3 (scrape base-url))
--- a/src/com/owoga/prhyme/core.clj
+++ b/src/com/owoga/prhyme/core.clj
@ -159,6 +159,32 @@
    :else (= (last (:rimes a)) (last (:rimes b)))))
 (defn rhymes?
  "What does it mean for something to rhyme?"
  [a b]
  (cond
    (and (= 1 (count (last (:rimes a))))
         (= 1 (count (last (:rimes b))))
         (or (= (last (:rimes a)) '("ER"))
             (= (last (:rimes a)) '("AA"))
             (= (last (:rimes a)) '("AE"))
             (= (last (:rimes a)) '("AO"))
             (= (last (:rimes a)) '("AW"))
             (= (last (:rimes a)) '("EH"))
             (= (last (:rimes a)) '("IH"))
             (= (last (:rimes a)) '("UH"))
             (= (last (:rimes a)) '("AH"))))
    (= (list (first (take-last 2 (:nuclei a)))
             (last (:onsets a)))
       (list (first (take-last 2 (:nuclei b)))
             (last (:onsets b))))
    (and (= 1 (count (last (:rimes a))))
         (= 1 (count (last (:rimes b)))))
    (= (last (:onsets a)) (last (:onsets b)))
    :else (= (last (:rimes a)) (last (:rimes b)))))
 (defn onset+nucleus [syllables]
  (->> syllables
       (map #(first (u/take-through u/vowel %)))))
--- a/src/com/owoga/prhyme/gen.clj
+++ b/src/com/owoga/prhyme/gen.clj
@ -1,5 +1,6 @@
 (ns com.owoga.prhyme.gen
  (:require [clojure.string :as string]
            [com.owoga.prhyme.util.math :as math]
            [com.owoga.prhyme.util :as util]
            [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
            [com.owoga.prhyme.util.nlp :as nlp]
@ -225,6 +226,30 @@
       (filter nlp/valid-sentence?)
       first))
 (defn remove-selection-from-target [target selection]
  (->> target
       (#(assoc % :syllables (drop-last
                              (:syllable-count
                               selection)
                              (:syllables
                               target))))
       (#(assoc % :rimes (prhyme/rimes (:syllables %))))
       (#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
       (#(assoc % :nuclei (prhyme/nucleus (:syllables %))))))
 (defn selection-seq
  ([words adjust target]
   (selection-seq words adjust target '()))
  ([words adjust target result]
   (let [[weighted-words _ _] (adjust [words target result])
         selection (math/weighted-selection :weight weighted-words)
         new-target (remove-selection-from-target target selection)
         new-result (cons selection result)]
     (cons selection
           (lazy-seq (selection-seq words adjust new-target new-result))))))
 (defn generate-prhyme [words adjust target stop?]
  (loop [result '()]))
 (defn prhyme
  "2020-10-21 iteration"
--- a/src/com/owoga/prhyme/generation/weighted_selection.clj
+++ b/src/com/owoga/prhyme/generation/weighted_selection.clj
@ -0,0 +1,110 @@
 (ns com.owoga.prhyme.generation.weighted-selection
  "Utilities for generation of rhymes by assigning weights to a collection of
  words and randomly choosing words based on their weights.
  For example, we might want the first word of the rhyme of a target phrase to
  be selected from words that are highly weighted by their rhymeness. It's not
  as important for subsequent words to rhyme, so we might want subsequent words
  to be selected weighted by whether or not they are synonyms to some other
  word, by how many syllables they have, by whether they are associated with a
  markov value, etc..."
  (:require [com.owoga.prhyme.core :as prhyme]))
 ;;;; Signature of "weight adjustment" functions
 ;;;
 ;;; A weight adjustment function gets called every time a decision needs to be
 ;;; made for which word to choose, so I think it's important to not be too slow.
 ;;;
 ;;; The function will receive:
 ;;; - a collection of the words from which to weight.
 ;;; - the target phrase we're rhyming for
 ;;; - the current result that we have so far
 ;;;
 ;;; By default, the weights of the passed in words will probably be 1. If you
 ;;; want future weights to be adjusted from past weights, that's up to you.
 ;;;
 ;;; The target phrase will change as words are chosen for the result.
 ;;; A good and strategy will be to chop off syllables from the target phrase
 ;;; for each syllable of a matching result.
 (defn adjust-for-markov
  "Works with a markov data structure that was generated taking into account
  sentence boundaries (represented as nils).
  A key in the markov structure of '(nil) would have a value that represents all
  words that have occurred in position 1 of the raw data.
  A key of '(nil \"foo\") would have a value that represents all words
  that occurred in position 2 following \"foo\"
  Automatically detects the order (window size) of the markov model. Does this
  by counting the length of the first key.
  "
  [markov percent]
  (let [markov-n (count (first (first markov)))]
    (fn [[words target result]]
      (let [key (let [k (map :norm-word (take markov-n result))]
                  (reverse
                   (if (> markov-n (count k))
                     (concat k (repeat (- markov-n (count k)) nil))
                     k)))
            markov-options (markov key)
            markov-option-avg (/ (apply + (vals markov-options))
                                 (max 1 (count markov-options)))]
        (if (nil? markov-options)
          [words target result]
          (let [[markovs non-markovs]
                ((juxt filter remove)
                 #(markov-options (:norm-word %))
                 words)
                weight-non-markovs (apply + (map :weight non-markovs))
                target-weight-markovs (- (/ weight-non-markovs (- 1 percent))
                                         weight-non-markovs)
                count-markovs (count markovs)
                adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
            [(concat
              (map
               (fn [m]
                 (let [option (markov-options (:norm-word m))]
                   (as-> m m
                     (assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m)))
                     (assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs)))))
               markovs)
              non-markovs)
             target
             result]))))))
 (defn adjust-for-rhymes
  "Weights words by whether or not they rhyme.
  Once result contains something, becomes inactive. If you want to try to rhyme
  every selection, you'll need a different function. This one will only rhyme
  the tail of a target."
  [percent]
  (fn [[words target result]]
    (if (empty? result)
      (let [words-with-rime-count
            (map
             (fn [word]
               (assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
             words)
            [rhyming non-rhyming]
            ((juxt filter remove)
             #(< 0 (:num-matching %))
             words-with-rime-count)
            weight-non-rhyming (apply + (map :weight non-rhyming))
            target-weight-rhyming (* 100 percent weight-non-rhyming)
            count-rhyming (count rhyming)
            adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
        [(concat
          (map
           (fn [word]
             (as-> word word
               (assoc word :weight (* adjustment-rhyming (:weight word)))
               (assoc word :adjustment-for-rimes adjustment-rhyming)))
           rhyming)
          non-rhyming)
         target
         result])
      [words target result])))
--- a/src/com/owoga/prhyme/lymeric.clj
+++ b/src/com/owoga/prhyme/lymeric.clj
@ -1,7 +1,7 @@
 (ns com.owoga.prhyme.lymeric
  (:require [com.owoga.prhyme.gen :as gen]
            [com.owoga.prhyme.util :as util]
-            [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
+            [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
            [clojure.string :as string]
            [com.owoga.prhyme.frp :as frp]
            [com.owoga.prhyme.util.nlp :as nlp]
@ -40,7 +40,6 @@
                       (remove #(banned-words (:norm-word %))
                               base-words)
                       (frp/phrase->word frp/words (get rhymes pattern))))]
          (println banned-words)
          (recur (rest scheme)
                 (assoc rhymes pattern rhyme)
                 (conj result rhyme)))))))
@ -48,25 +47,22 @@
 (comment
  (rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8)))
  (first (filter #(= "abba" (:norm-word %)) frp/words))
  (gen/gen-sentence-with-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular))
  (def adj
    (comp (gen/adjust-for-markov-with-boundaries darklyrics/darkov-2 0.9)
          (gen/adjust-for-tail-rimes util/words-map 0.99)))
  (repeatedly
   10
   (fn []
     (gen/gen-rhyme-with-syllable-count
      adj
      8
      frp/popular
      (frp/phrase->word frp/words "famed watched waterloo"))))
  )
 (comment
  (rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7)))
  (->> (gen/selection-seq
        (map #(assoc % :weight 1) frp/words)
        (weighted-selection/adjust-for-rhymes 0.99)
        (frp/phrase->word frp/words "hi there my boy"))
       (take 3))
  ["bishop larch smitten us dwell"
   "solely first week in hell"
   "and take that for three"
   "come wrapped in glory"
   "you ever leave it so well"]
  ["romancing realized too late"
   "my crown revive my withered state"
   "reign is obsolete"
@ -86,8 +82,7 @@
   "it wither away with this knife"
   "hate is my virtue"
   "my feelings are well overdue"
-   "war we await the afterlife"]
+   "war we await the afterlife"])
  )
 (->> (repeatedly
      (fn []
        (gen/gen-target-by-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular))))
@ -102,6 +97,4 @@
  (let [adj (comp (gen/adjust-for-markov darklyrics/darkov-2)
                  (gen/adjust-for-tail-rimes util/words-map))]))
 (apply (fnil + 0) '())
 (map :syllable-count '())
--- a/src/com/owoga/prhyme/util/math.clj
+++ b/src/com/owoga/prhyme/util/math.clj
@ -0,0 +1,101 @@
 ;; Fast weighted random selection thanks to the Vose algorithm.
 ;; https://gist.github.com/ghadishayban/a26cc402958ef3c7ce61
 (ns com.owoga.prhyme.util.math
  (:import clojure.lang.PersistentQueue))
 ;; Vose's alias method
 ;; http://www.keithschwarz.com/darts-dice-coins/
 (defprotocol Rand
  (nextr [_ rng]))
 (deftype Vose [n ^ints alias ^doubles prob]
  Rand
  ;; returns the index of the chosen weight
  (nextr [_ rng] ;; not using the rng for now
    (let [i (rand-int n)
          p (aget prob i)]
      (if (or (= p 1.0)
              (< (rand) p))
        i
        (aget alias i)))))
 (defn ^:private make-vose [dist]
  (let [N (count dist)
        alias (int-array N)
        prob  (double-array N)]
    (if (zero? N)
      (->Vose N alias prob)
      (let [^doubles ps (->> dist
                             (map (partial * N))
                             (into-array Double/TYPE))
            [small large] (loop [i 0
                                 [small large] [PersistentQueue/EMPTY
                                                PersistentQueue/EMPTY]
                                 ps (seq ps)]
                            (if (seq ps)
                              (let [p (first ps)]
                                (if (< p 1)
                                  (recur (inc i)
                                         [(conj small i) large]
                                         (rest ps))
                                  (recur (inc i)
                                         [small          (conj large i)]
                                         (rest ps))))
                              [small large]))
            [small large] (loop [small small
                                 large large]
                            (if (and (seq small) (seq large))
                              (let [l (first small)
                                    g (first large)
                                    small (pop small)
                                    large (pop large)]
                                (aset-double prob l (aget ps l))
                                (aset-int alias l g)
                                (let [pg (- (+ (aget ps g) (aget ps l))
                                            1.0)]
                                  (aset-double ps g pg)
                                  (if (< pg 1)
                                    (recur (conj small g) large)
                                    (recur small (conj large g)))))
                              [small large]))]
        (doseq [g (concat large small)]
          (aset-double prob g 1))
        (->Vose N alias prob)))))
 (defn from-weights [ws]
  (let [N (count ws)
        tot (reduce + 0.0 ws)
        dist (if (zero? tot)
                  (repeat N (/ 1 tot))
                  (map #(/ % tot) ws))]
    (make-vose (vec dist))))
 (comment
  (let [ws [1 2 1 3 3]
        rng (from-weights ws)
        chosen (repeatedly 1000000 #(nextr rng nil))
        accuracy (mapv (comp float
                             #(/ % 100000)
                             (frequencies chosen))
                       (range (count ws)))]
    accuracy))
 (defn weighted-selection
  "If given a coll, assumes the coll is weights and returns the selected index by
  weighted random selection.
  If given a key function and a collection, uses the key function to get a
  collection of weights and returns the value at the randomly selected index."
  ([coll]
   (let [rng (from-weights coll)
         index (nextr rng nil)]
     index))
  ([key-fn coll]
   (let [rng (from-weights (map key-fn coll))
         index (nextr rng nil)
         selection (nth coll index)]
     selection)))
--- a/src/com/owoga/prhyme/util/nlp.clj
+++ b/src/com/owoga/prhyme/util/nlp.clj
@ -7,7 +7,13 @@
 (def get-sentences (nlp/make-sentence-detector "models/en-sent.bin"))
 (def parse (tb/make-treebank-parser "en-parser-chunking.bin"))
-(defn valid-sentence? [phrase]
+(defn valid-sentence?
  "Tokenizes and parses the phrase using OpenNLP models from
  http://opennlp.sourceforge.net/models-1.5/
  If the parse tree has an 'S as the top-level tag, then
  we consider it a valid English sentence."
  [phrase]
  (->> phrase
       tokenize
       (string/join " ")