Cleanup and organize namespaces

main
Eric Ihli 4 years ago
parent 7e461b3620
commit f30686e95e

@ -168,7 +168,6 @@
artist-album-texts))) artist-album-texts)))
(comment (comment
(def darkov-2 (util/read-markov "dark-corpus-2.edn")) (def darkov-2 (util/read-markov "dark-corpus-2.edn"))
(get darkov-2 '(nil nil)) (get darkov-2 '(nil nil))
(take 3 (scrape base-url)) (take 3 (scrape base-url))

@ -159,6 +159,32 @@
:else (= (last (:rimes a)) (last (:rimes b))))) :else (= (last (:rimes a)) (last (:rimes b)))))
(defn rhymes?
"What does it mean for something to rhyme?"
[a b]
(cond
(and (= 1 (count (last (:rimes a))))
(= 1 (count (last (:rimes b))))
(or (= (last (:rimes a)) '("ER"))
(= (last (:rimes a)) '("AA"))
(= (last (:rimes a)) '("AE"))
(= (last (:rimes a)) '("AO"))
(= (last (:rimes a)) '("AW"))
(= (last (:rimes a)) '("EH"))
(= (last (:rimes a)) '("IH"))
(= (last (:rimes a)) '("UH"))
(= (last (:rimes a)) '("AH"))))
(= (list (first (take-last 2 (:nuclei a)))
(last (:onsets a)))
(list (first (take-last 2 (:nuclei b)))
(last (:onsets b))))
(and (= 1 (count (last (:rimes a))))
(= 1 (count (last (:rimes b)))))
(= (last (:onsets a)) (last (:onsets b)))
:else (= (last (:rimes a)) (last (:rimes b)))))
(defn onset+nucleus [syllables] (defn onset+nucleus [syllables]
(->> syllables (->> syllables
(map #(first (u/take-through u/vowel %))))) (map #(first (u/take-through u/vowel %)))))

@ -1,5 +1,6 @@
(ns com.owoga.prhyme.gen (ns com.owoga.prhyme.gen
(:require [clojure.string :as string] (:require [clojure.string :as string]
[com.owoga.prhyme.util.math :as math]
[com.owoga.prhyme.util :as util] [com.owoga.prhyme.util :as util]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand] [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
[com.owoga.prhyme.util.nlp :as nlp] [com.owoga.prhyme.util.nlp :as nlp]
@ -225,6 +226,30 @@
(filter nlp/valid-sentence?) (filter nlp/valid-sentence?)
first)) first))
(defn remove-selection-from-target [target selection]
(->> target
(#(assoc % :syllables (drop-last
(:syllable-count
selection)
(:syllables
target))))
(#(assoc % :rimes (prhyme/rimes (:syllables %))))
(#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
(#(assoc % :nuclei (prhyme/nucleus (:syllables %))))))
(defn selection-seq
([words adjust target]
(selection-seq words adjust target '()))
([words adjust target result]
(let [[weighted-words _ _] (adjust [words target result])
selection (math/weighted-selection :weight weighted-words)
new-target (remove-selection-from-target target selection)
new-result (cons selection result)]
(cons selection
(lazy-seq (selection-seq words adjust new-target new-result))))))
(defn generate-prhyme [words adjust target stop?]
(loop [result '()]))
(defn prhyme (defn prhyme
"2020-10-21 iteration" "2020-10-21 iteration"

@ -0,0 +1,110 @@
(ns com.owoga.prhyme.generation.weighted-selection
"Utilities for generation of rhymes by assigning weights to a collection of
words and randomly choosing words based on their weights.
For example, we might want the first word of the rhyme of a target phrase to
be selected from words that are highly weighted by their rhymeness. It's not
as important for subsequent words to rhyme, so we might want subsequent words
to be selected weighted by whether or not they are synonyms to some other
word, by how many syllables they have, by whether they are associated with a
markov value, etc..."
(:require [com.owoga.prhyme.core :as prhyme]))
;;;; Signature of "weight adjustment" functions
;;;
;;; A weight adjustment function gets called every time a decision needs to be
;;; made for which word to choose, so I think it's important to not be too slow.
;;;
;;; The function will receive:
;;; - a collection of the words from which to weight.
;;; - the target phrase we're rhyming for
;;; - the current result that we have so far
;;;
;;; By default, the weights of the passed in words will probably be 1. If you
;;; want future weights to be adjusted from past weights, that's up to you.
;;;
;;; The target phrase will change as words are chosen for the result.
;;; A good and strategy will be to chop off syllables from the target phrase
;;; for each syllable of a matching result.
(defn adjust-for-markov
"Works with a markov data structure that was generated taking into account
sentence boundaries (represented as nils).
A key in the markov structure of '(nil) would have a value that represents all
words that have occurred in position 1 of the raw data.
A key of '(nil \"foo\") would have a value that represents all words
that occurred in position 2 following \"foo\"
Automatically detects the order (window size) of the markov model. Does this
by counting the length of the first key.
"
[markov percent]
(let [markov-n (count (first (first markov)))]
(fn [[words target result]]
(let [key (let [k (map :norm-word (take markov-n result))]
(reverse
(if (> markov-n (count k))
(concat k (repeat (- markov-n (count k)) nil))
k)))
markov-options (markov key)
markov-option-avg (/ (apply + (vals markov-options))
(max 1 (count markov-options)))]
(if (nil? markov-options)
[words target result]
(let [[markovs non-markovs]
((juxt filter remove)
#(markov-options (:norm-word %))
words)
weight-non-markovs (apply + (map :weight non-markovs))
target-weight-markovs (- (/ weight-non-markovs (- 1 percent))
weight-non-markovs)
count-markovs (count markovs)
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
[(concat
(map
(fn [m]
(let [option (markov-options (:norm-word m))]
(as-> m m
(assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m)))
(assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs)))))
markovs)
non-markovs)
target
result]))))))
(defn adjust-for-rhymes
"Weights words by whether or not they rhyme.
Once result contains something, becomes inactive. If you want to try to rhyme
every selection, you'll need a different function. This one will only rhyme
the tail of a target."
[percent]
(fn [[words target result]]
(if (empty? result)
(let [words-with-rime-count
(map
(fn [word]
(assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])
[words target result])))

@ -1,7 +1,7 @@
(ns com.owoga.prhyme.lymeric (ns com.owoga.prhyme.lymeric
(:require [com.owoga.prhyme.gen :as gen] (:require [com.owoga.prhyme.gen :as gen]
[com.owoga.prhyme.util :as util] [com.owoga.prhyme.util :as util]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand] [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
[clojure.string :as string] [clojure.string :as string]
[com.owoga.prhyme.frp :as frp] [com.owoga.prhyme.frp :as frp]
[com.owoga.prhyme.util.nlp :as nlp] [com.owoga.prhyme.util.nlp :as nlp]
@ -40,7 +40,6 @@
(remove #(banned-words (:norm-word %)) (remove #(banned-words (:norm-word %))
base-words) base-words)
(frp/phrase->word frp/words (get rhymes pattern))))] (frp/phrase->word frp/words (get rhymes pattern))))]
(println banned-words)
(recur (rest scheme) (recur (rest scheme)
(assoc rhymes pattern rhyme) (assoc rhymes pattern rhyme)
(conj result rhyme))))))) (conj result rhyme)))))))
@ -48,25 +47,22 @@
(comment (comment
(rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8))) (rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8)))
(first (filter #(= "abba" (:norm-word %)) frp/words))
(gen/gen-sentence-with-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular))
(def adj
(comp (gen/adjust-for-markov-with-boundaries darklyrics/darkov-2 0.9)
(gen/adjust-for-tail-rimes util/words-map 0.99)))
(repeatedly
10
(fn []
(gen/gen-rhyme-with-syllable-count
adj
8
frp/popular
(frp/phrase->word frp/words "famed watched waterloo"))))
) )
(comment (comment
(rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7)))
(->> (gen/selection-seq
(map #(assoc % :weight 1) frp/words)
(weighted-selection/adjust-for-rhymes 0.99)
(frp/phrase->word frp/words "hi there my boy"))
(take 3))
["bishop larch smitten us dwell"
"solely first week in hell"
"and take that for three"
"come wrapped in glory"
"you ever leave it so well"]
["romancing realized too late" ["romancing realized too late"
"my crown revive my withered state" "my crown revive my withered state"
"reign is obsolete" "reign is obsolete"
@ -86,8 +82,7 @@
"it wither away with this knife" "it wither away with this knife"
"hate is my virtue" "hate is my virtue"
"my feelings are well overdue" "my feelings are well overdue"
"war we await the afterlife"] "war we await the afterlife"])
)
(->> (repeatedly (->> (repeatedly
(fn [] (fn []
(gen/gen-target-by-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular)))) (gen/gen-target-by-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular))))
@ -102,6 +97,4 @@
(let [adj (comp (gen/adjust-for-markov darklyrics/darkov-2) (let [adj (comp (gen/adjust-for-markov darklyrics/darkov-2)
(gen/adjust-for-tail-rimes util/words-map))])) (gen/adjust-for-tail-rimes util/words-map))]))
(apply (fnil + 0) '())
(map :syllable-count '()) (map :syllable-count '())

@ -0,0 +1,101 @@
;; Fast weighted random selection thanks to the Vose algorithm.
;; https://gist.github.com/ghadishayban/a26cc402958ef3c7ce61
(ns com.owoga.prhyme.util.math
(:import clojure.lang.PersistentQueue))
;; Vose's alias method
;; http://www.keithschwarz.com/darts-dice-coins/
(defprotocol Rand
(nextr [_ rng]))
(deftype Vose [n ^ints alias ^doubles prob]
Rand
;; returns the index of the chosen weight
(nextr [_ rng] ;; not using the rng for now
(let [i (rand-int n)
p (aget prob i)]
(if (or (= p 1.0)
(< (rand) p))
i
(aget alias i)))))
(defn ^:private make-vose [dist]
(let [N (count dist)
alias (int-array N)
prob (double-array N)]
(if (zero? N)
(->Vose N alias prob)
(let [^doubles ps (->> dist
(map (partial * N))
(into-array Double/TYPE))
[small large] (loop [i 0
[small large] [PersistentQueue/EMPTY
PersistentQueue/EMPTY]
ps (seq ps)]
(if (seq ps)
(let [p (first ps)]
(if (< p 1)
(recur (inc i)
[(conj small i) large]
(rest ps))
(recur (inc i)
[small (conj large i)]
(rest ps))))
[small large]))
[small large] (loop [small small
large large]
(if (and (seq small) (seq large))
(let [l (first small)
g (first large)
small (pop small)
large (pop large)]
(aset-double prob l (aget ps l))
(aset-int alias l g)
(let [pg (- (+ (aget ps g) (aget ps l))
1.0)]
(aset-double ps g pg)
(if (< pg 1)
(recur (conj small g) large)
(recur small (conj large g)))))
[small large]))]
(doseq [g (concat large small)]
(aset-double prob g 1))
(->Vose N alias prob)))))
(defn from-weights [ws]
(let [N (count ws)
tot (reduce + 0.0 ws)
dist (if (zero? tot)
(repeat N (/ 1 tot))
(map #(/ % tot) ws))]
(make-vose (vec dist))))
(comment
(let [ws [1 2 1 3 3]
rng (from-weights ws)
chosen (repeatedly 1000000 #(nextr rng nil))
accuracy (mapv (comp float
#(/ % 100000)
(frequencies chosen))
(range (count ws)))]
accuracy))
(defn weighted-selection
"If given a coll, assumes the coll is weights and returns the selected index by
weighted random selection.
If given a key function and a collection, uses the key function to get a
collection of weights and returns the value at the randomly selected index."
([coll]
(let [rng (from-weights coll)
index (nextr rng nil)]
index))
([key-fn coll]
(let [rng (from-weights (map key-fn coll))
index (nextr rng nil)
selection (nth coll index)]
selection)))

@ -7,7 +7,13 @@
(def get-sentences (nlp/make-sentence-detector "models/en-sent.bin")) (def get-sentences (nlp/make-sentence-detector "models/en-sent.bin"))
(def parse (tb/make-treebank-parser "en-parser-chunking.bin")) (def parse (tb/make-treebank-parser "en-parser-chunking.bin"))
(defn valid-sentence? [phrase] (defn valid-sentence?
"Tokenizes and parses the phrase using OpenNLP models from
http://opennlp.sourceforge.net/models-1.5/
If the parse tree has an 'S as the top-level tag, then
we consider it a valid English sentence."
[phrase]
(->> phrase (->> phrase
tokenize tokenize
(string/join " ") (string/join " ")

Loading…
Cancel
Save