Cleanup and organize namespaces
parent
7e461b3620
commit
f30686e95e
@ -0,0 +1,110 @@
|
||||
(ns com.owoga.prhyme.generation.weighted-selection
|
||||
"Utilities for generation of rhymes by assigning weights to a collection of
|
||||
words and randomly choosing words based on their weights.
|
||||
|
||||
For example, we might want the first word of the rhyme of a target phrase to
|
||||
be selected from words that are highly weighted by their rhymeness. It's not
|
||||
as important for subsequent words to rhyme, so we might want subsequent words
|
||||
to be selected weighted by whether or not they are synonyms to some other
|
||||
word, by how many syllables they have, by whether they are associated with a
|
||||
markov value, etc..."
|
||||
(:require [com.owoga.prhyme.core :as prhyme]))
|
||||
|
||||
;;;; Signature of "weight adjustment" functions
|
||||
;;;
|
||||
;;; A weight adjustment function gets called every time a decision needs to be
|
||||
;;; made for which word to choose, so I think it's important to not be too slow.
|
||||
;;;
|
||||
;;; The function will receive:
|
||||
;;; - a collection of the words from which to weight.
|
||||
;;; - the target phrase we're rhyming for
|
||||
;;; - the current result that we have so far
|
||||
;;;
|
||||
;;; By default, the weights of the passed in words will probably be 1. If you
|
||||
;;; want future weights to be adjusted from past weights, that's up to you.
|
||||
;;;
|
||||
;;; The target phrase will change as words are chosen for the result.
|
||||
;;; A good and strategy will be to chop off syllables from the target phrase
|
||||
;;; for each syllable of a matching result.
|
||||
|
||||
(defn adjust-for-markov
|
||||
"Works with a markov data structure that was generated taking into account
|
||||
sentence boundaries (represented as nils).
|
||||
|
||||
A key in the markov structure of '(nil) would have a value that represents all
|
||||
words that have occurred in position 1 of the raw data.
|
||||
|
||||
A key of '(nil \"foo\") would have a value that represents all words
|
||||
that occurred in position 2 following \"foo\"
|
||||
|
||||
Automatically detects the order (window size) of the markov model. Does this
|
||||
by counting the length of the first key.
|
||||
"
|
||||
[markov percent]
|
||||
(let [markov-n (count (first (first markov)))]
|
||||
(fn [[words target result]]
|
||||
(let [key (let [k (map :norm-word (take markov-n result))]
|
||||
(reverse
|
||||
(if (> markov-n (count k))
|
||||
(concat k (repeat (- markov-n (count k)) nil))
|
||||
k)))
|
||||
markov-options (markov key)
|
||||
markov-option-avg (/ (apply + (vals markov-options))
|
||||
(max 1 (count markov-options)))]
|
||||
(if (nil? markov-options)
|
||||
[words target result]
|
||||
(let [[markovs non-markovs]
|
||||
((juxt filter remove)
|
||||
#(markov-options (:norm-word %))
|
||||
words)
|
||||
weight-non-markovs (apply + (map :weight non-markovs))
|
||||
target-weight-markovs (- (/ weight-non-markovs (- 1 percent))
|
||||
weight-non-markovs)
|
||||
count-markovs (count markovs)
|
||||
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
|
||||
[(concat
|
||||
(map
|
||||
(fn [m]
|
||||
(let [option (markov-options (:norm-word m))]
|
||||
(as-> m m
|
||||
(assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m)))
|
||||
(assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs)))))
|
||||
markovs)
|
||||
non-markovs)
|
||||
target
|
||||
result]))))))
|
||||
|
||||
(defn adjust-for-rhymes
|
||||
"Weights words by whether or not they rhyme.
|
||||
Once result contains something, becomes inactive. If you want to try to rhyme
|
||||
every selection, you'll need a different function. This one will only rhyme
|
||||
the tail of a target."
|
||||
[percent]
|
||||
(fn [[words target result]]
|
||||
(if (empty? result)
|
||||
(let [words-with-rime-count
|
||||
(map
|
||||
(fn [word]
|
||||
(assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
|
||||
words)
|
||||
|
||||
[rhyming non-rhyming]
|
||||
((juxt filter remove)
|
||||
#(< 0 (:num-matching %))
|
||||
words-with-rime-count)
|
||||
|
||||
weight-non-rhyming (apply + (map :weight non-rhyming))
|
||||
target-weight-rhyming (* 100 percent weight-non-rhyming)
|
||||
count-rhyming (count rhyming)
|
||||
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
|
||||
[(concat
|
||||
(map
|
||||
(fn [word]
|
||||
(as-> word word
|
||||
(assoc word :weight (* adjustment-rhyming (:weight word)))
|
||||
(assoc word :adjustment-for-rimes adjustment-rhyming)))
|
||||
rhyming)
|
||||
non-rhyming)
|
||||
target
|
||||
result])
|
||||
[words target result])))
|
@ -0,0 +1,101 @@
|
||||
;; Fast weighted random selection thanks to the Vose algorithm.
|
||||
;; https://gist.github.com/ghadishayban/a26cc402958ef3c7ce61
|
||||
|
||||
(ns com.owoga.prhyme.util.math
|
||||
(:import clojure.lang.PersistentQueue))
|
||||
|
||||
|
||||
;; Vose's alias method
|
||||
;; http://www.keithschwarz.com/darts-dice-coins/
|
||||
(defprotocol Rand
|
||||
(nextr [_ rng]))
|
||||
|
||||
(deftype Vose [n ^ints alias ^doubles prob]
|
||||
Rand
|
||||
;; returns the index of the chosen weight
|
||||
(nextr [_ rng] ;; not using the rng for now
|
||||
(let [i (rand-int n)
|
||||
p (aget prob i)]
|
||||
(if (or (= p 1.0)
|
||||
(< (rand) p))
|
||||
i
|
||||
(aget alias i)))))
|
||||
|
||||
(defn ^:private make-vose [dist]
|
||||
(let [N (count dist)
|
||||
alias (int-array N)
|
||||
prob (double-array N)]
|
||||
(if (zero? N)
|
||||
(->Vose N alias prob)
|
||||
(let [^doubles ps (->> dist
|
||||
(map (partial * N))
|
||||
(into-array Double/TYPE))
|
||||
|
||||
[small large] (loop [i 0
|
||||
[small large] [PersistentQueue/EMPTY
|
||||
PersistentQueue/EMPTY]
|
||||
ps (seq ps)]
|
||||
(if (seq ps)
|
||||
(let [p (first ps)]
|
||||
(if (< p 1)
|
||||
(recur (inc i)
|
||||
[(conj small i) large]
|
||||
(rest ps))
|
||||
(recur (inc i)
|
||||
[small (conj large i)]
|
||||
(rest ps))))
|
||||
[small large]))
|
||||
|
||||
[small large] (loop [small small
|
||||
large large]
|
||||
(if (and (seq small) (seq large))
|
||||
(let [l (first small)
|
||||
g (first large)
|
||||
small (pop small)
|
||||
large (pop large)]
|
||||
(aset-double prob l (aget ps l))
|
||||
(aset-int alias l g)
|
||||
(let [pg (- (+ (aget ps g) (aget ps l))
|
||||
1.0)]
|
||||
(aset-double ps g pg)
|
||||
(if (< pg 1)
|
||||
(recur (conj small g) large)
|
||||
(recur small (conj large g)))))
|
||||
[small large]))]
|
||||
(doseq [g (concat large small)]
|
||||
(aset-double prob g 1))
|
||||
(->Vose N alias prob)))))
|
||||
|
||||
(defn from-weights [ws]
|
||||
(let [N (count ws)
|
||||
tot (reduce + 0.0 ws)
|
||||
dist (if (zero? tot)
|
||||
(repeat N (/ 1 tot))
|
||||
(map #(/ % tot) ws))]
|
||||
(make-vose (vec dist))))
|
||||
|
||||
(comment
|
||||
(let [ws [1 2 1 3 3]
|
||||
rng (from-weights ws)
|
||||
chosen (repeatedly 1000000 #(nextr rng nil))
|
||||
accuracy (mapv (comp float
|
||||
#(/ % 100000)
|
||||
(frequencies chosen))
|
||||
(range (count ws)))]
|
||||
accuracy))
|
||||
|
||||
(defn weighted-selection
|
||||
"If given a coll, assumes the coll is weights and returns the selected index by
|
||||
weighted random selection.
|
||||
|
||||
If given a key function and a collection, uses the key function to get a
|
||||
collection of weights and returns the value at the randomly selected index."
|
||||
([coll]
|
||||
(let [rng (from-weights coll)
|
||||
index (nextr rng nil)]
|
||||
index))
|
||||
([key-fn coll]
|
||||
(let [rng (from-weights (map key-fn coll))
|
||||
index (nextr rng nil)
|
||||
selection (nth coll index)]
|
||||
selection)))
|
Loading…
Reference in New Issue