Add markovian lovecraft generation
parent
3782b6072a
commit
98ffd872ba
@ -1,15 +1,16 @@
|
|||||||
(ns example.real-estate)
|
(ns example.real-estate
|
||||||
|
(:require [com.owoga.frp.infrastructure :as frp]))
|
||||||
|
|
||||||
(defrelvar Offer
|
(frp/defrelvar Offer
|
||||||
:address string?
|
#(string? (:address %))
|
||||||
:offer-price number?
|
#(number? (:offer-price %))
|
||||||
:offer-date inst?
|
#(inst? (:offer-date %))
|
||||||
:bidder-name string?
|
#(string? (:bidder-name %))
|
||||||
:bidder-address string?)
|
#(string? (:bidder-address %)))
|
||||||
|
|
||||||
(defrelvar Property
|
(frp/defrelvar Property
|
||||||
:address string?
|
#(string? (:address %))
|
||||||
:price number?
|
#(number? (:price %))
|
||||||
:photo string?
|
#(string? (:photo %))
|
||||||
:agent-name string?
|
#(string? (:agent-name %))
|
||||||
:date-registered inst?)
|
#(inst? (:date-registered %)))
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1,523 @@
|
|||||||
|
(ns com.owoga.prhyme.util.lovecraft
|
||||||
|
(:require [net.cgrand.enlive-html :as html]
|
||||||
|
[clojure.string :as string]
|
||||||
|
[com.owoga.prhyme.util.weighted-rand :as wr]
|
||||||
|
[com.owoga.prhyme.core :as prhyme]
|
||||||
|
[taoensso.tufte :as tufte :refer [defnp p profiled profile]]
|
||||||
|
[com.owoga.prhyme.frp :as frp]
|
||||||
|
[clojure.java.io :as io]
|
||||||
|
[clojure.set :as set]))
|
||||||
|
|
||||||
|
(tufte/add-basic-println-handler! {})
|
||||||
|
|
||||||
|
(def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/")
|
||||||
|
|
||||||
|
(def words-map
|
||||||
|
(into {} (map #(vector (string/lower-case (:word %)) %) frp/words)))
|
||||||
|
|
||||||
|
(defn fetch-url [url]
|
||||||
|
(html/html-resource (java.net.URL. url)))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(fetch-url *base-url*))
|
||||||
|
|
||||||
|
(defn links []
|
||||||
|
(map
|
||||||
|
#(str *base-url* (first (html/attr-values % :href)))
|
||||||
|
(html/select
|
||||||
|
(fetch-url *base-url*)
|
||||||
|
[:li :> [:a (html/attr? :href)]])))
|
||||||
|
|
||||||
|
(defn contentful-sections [nodes]
|
||||||
|
(->> nodes
|
||||||
|
(map html/text)
|
||||||
|
(filter #(> (count %) 100))))
|
||||||
|
|
||||||
|
(defn text-from-link [link]
|
||||||
|
(->> (html/select
|
||||||
|
(fetch-url link)
|
||||||
|
[:body])
|
||||||
|
(first)
|
||||||
|
(html/text)
|
||||||
|
((fn [s] (string/replace s #"[\s\u00A0]+" " ")))))
|
||||||
|
|
||||||
|
(defn cleanup [content]
|
||||||
|
(-> content
|
||||||
|
(string/replace #"Return to.*$" "")
|
||||||
|
(string/replace #"Home.*?This Site" "")
|
||||||
|
(string/replace #"[^a-zA-Z -]+" "")))
|
||||||
|
|
||||||
|
(defn tokens [content]
|
||||||
|
(string/split content #"\s+"))
|
||||||
|
|
||||||
|
(defn append-to-file [filepath text]
|
||||||
|
(with-open [w (io/writer filepath :append true)]
|
||||||
|
(.write w text)))
|
||||||
|
|
||||||
|
(defn scrape []
|
||||||
|
(run!
|
||||||
|
(fn [link]
|
||||||
|
(->> (text-from-link link)
|
||||||
|
(cleanup)
|
||||||
|
(#(str % "\n"))
|
||||||
|
(append-to-file "lovecraft.txt")))
|
||||||
|
(take 10 (links))))
|
||||||
|
|
||||||
|
(defn tokens-from-file [file]
|
||||||
|
(with-open [r (io/reader file)]
|
||||||
|
(tokens (slurp r))))
|
||||||
|
|
||||||
|
(defn window [n]
|
||||||
|
(fn [coll]
|
||||||
|
(cond
|
||||||
|
(empty? coll) []
|
||||||
|
(< (count coll) n) []
|
||||||
|
:else (cons (take n coll)
|
||||||
|
(lazy-seq ((window n) (drop n coll)))))))
|
||||||
|
|
||||||
|
(defnp markov [tokens]
|
||||||
|
(->> tokens
|
||||||
|
(map
|
||||||
|
(fn [token]
|
||||||
|
(let [k (butlast token)
|
||||||
|
v (last token)]
|
||||||
|
[k v])))
|
||||||
|
(reduce
|
||||||
|
(fn [a [k v]]
|
||||||
|
(update-in a [k v] (fnil inc 0)))
|
||||||
|
{})))
|
||||||
|
|
||||||
|
(defnp running-total
|
||||||
|
([coll]
|
||||||
|
(running-total coll 0))
|
||||||
|
([coll last-val]
|
||||||
|
(cond
|
||||||
|
(empty? coll) nil
|
||||||
|
:else (cons (+ last-val (first coll))
|
||||||
|
(lazy-seq
|
||||||
|
(running-total
|
||||||
|
(rest coll)
|
||||||
|
(+ last-val (first coll))))))))
|
||||||
|
|
||||||
|
(defnp weighted-rand [weights]
|
||||||
|
(let [running-weights (running-total weights)
|
||||||
|
rand-val (rand (last running-weights))]
|
||||||
|
(loop [i 0]
|
||||||
|
(if (> (nth running-weights i) rand-val)
|
||||||
|
i
|
||||||
|
(recur (inc i))))))
|
||||||
|
|
||||||
|
(defnp choose-from-markov-possibilities [possibilities]
|
||||||
|
(if (empty? possibilities)
|
||||||
|
nil
|
||||||
|
(let [weights (vals possibilities)
|
||||||
|
rng (wr/from-weights weights)
|
||||||
|
index (wr/nextr rng nil)]
|
||||||
|
(nth (keys possibilities) index))))
|
||||||
|
|
||||||
|
(def word-set (into #{} (->> prhyme/words
|
||||||
|
(map first)
|
||||||
|
(map string/lower-case)
|
||||||
|
(map #(string/replace % #"\(\d+\)" "")))))
|
||||||
|
|
||||||
|
(defn normalize-tokens [tokens]
|
||||||
|
(->> tokens
|
||||||
|
(map string/lower-case)
|
||||||
|
(filter word-set)))
|
||||||
|
|
||||||
|
(defn main []
|
||||||
|
(->> (tokens-from-file "lovecraft.txt")
|
||||||
|
(reverse)
|
||||||
|
(normalize-tokens)
|
||||||
|
((window 2))
|
||||||
|
(markov)
|
||||||
|
(into {})))
|
||||||
|
|
||||||
|
(defn make-markov-picker [markov-data]
|
||||||
|
(fn [k]
|
||||||
|
(choose-from-markov-possibilities
|
||||||
|
(get markov-data k {}))))
|
||||||
|
|
||||||
|
(defn synonym?
|
||||||
|
"Given a possibility, like [\"foo\" 3]
|
||||||
|
which says that foo follows a particular key with
|
||||||
|
a weight of 3, a word is a synonym of that possibility
|
||||||
|
if the word is a synonym ."
|
||||||
|
[p synonyms]
|
||||||
|
(synonyms p))
|
||||||
|
|
||||||
|
(defnp adjust-for-synonyms
|
||||||
|
"If a word is in a set of synonyms, adjust its weight upwards."
|
||||||
|
[synonyms]
|
||||||
|
(fn [possibilities]
|
||||||
|
(reduce
|
||||||
|
(fn [p s]
|
||||||
|
(if (s p)
|
||||||
|
(update p s #(* 5 %))
|
||||||
|
p))
|
||||||
|
possibilities
|
||||||
|
synonyms)))
|
||||||
|
|
||||||
|
(defnp adjust-for-rimes
|
||||||
|
[target-rime dictionary]
|
||||||
|
(fn [possibilities]
|
||||||
|
(into
|
||||||
|
{}
|
||||||
|
(map
|
||||||
|
(fn [[p v]]
|
||||||
|
(let [possibility (get dictionary p)
|
||||||
|
factor (count
|
||||||
|
(frp/consecutive-matching
|
||||||
|
target-rime
|
||||||
|
possibility
|
||||||
|
:rimes))]
|
||||||
|
[p (* v (max 1 (* factor 4)))]))
|
||||||
|
possibilities))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
((adjust-for-synonyms #{"war" "famine"})
|
||||||
|
{"war" 1
|
||||||
|
"disease" 3})
|
||||||
|
;; => {"war" 5, "disease" 3}
|
||||||
|
((adjust-for-rimes
|
||||||
|
(frp/make-word ["magic" "M" "AE" "JH" "IH" "K"])
|
||||||
|
words-map)
|
||||||
|
{"tragic" 3
|
||||||
|
"trick" 2
|
||||||
|
"foo" 1})
|
||||||
|
;; => {"tragic" 24, "trick" 8, "foo" 1}
|
||||||
|
)
|
||||||
|
|
||||||
|
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
|
||||||
|
(defonce markover (make-markov-picker lovecraft-markov))
|
||||||
|
|
||||||
|
(defn markov-key [key-fn]
|
||||||
|
(fn [text]
|
||||||
|
(key-fn text)))
|
||||||
|
|
||||||
|
(defn gen-from [m p initial]
|
||||||
|
(loop [r (list initial)]
|
||||||
|
(cond
|
||||||
|
(p r) (recur (cons (m (list (first r))) r))
|
||||||
|
:else r)))
|
||||||
|
|
||||||
|
|
||||||
|
(defn rhyming-words
|
||||||
|
"List of rhyming words sorted by quality of rhyme."
|
||||||
|
[target]
|
||||||
|
(let [target-phrase (->> target
|
||||||
|
(frp/phrase->word frp/words)
|
||||||
|
(#(assoc % :rimes? true)))]
|
||||||
|
(->> target-phrase
|
||||||
|
(#(assoc % :rimes? true))
|
||||||
|
(frp/prhyme frp/words)
|
||||||
|
(sort-by
|
||||||
|
#(- (count
|
||||||
|
(frp/consecutive-matching
|
||||||
|
%
|
||||||
|
target-phrase
|
||||||
|
:rimes)))))))
|
||||||
|
|
||||||
|
(defn markov-rhymes [markov-data rhyming-words]
|
||||||
|
(->> (map
|
||||||
|
(fn [word]
|
||||||
|
(->> word
|
||||||
|
:word
|
||||||
|
string/lower-case
|
||||||
|
(#(string/replace % #"\(\d+\)" ""))
|
||||||
|
(#(vector % (get markov-data (list %))))))
|
||||||
|
rhyming-words)
|
||||||
|
(into #{})
|
||||||
|
(remove
|
||||||
|
(fn [[w p]]
|
||||||
|
(nil? p)))))
|
||||||
|
|
||||||
|
(defn markov-gen [markov-data initial]
|
||||||
|
(let [m (make-markov-picker markov-data)]
|
||||||
|
(loop [r initial]
|
||||||
|
(if (> (count r) 5)
|
||||||
|
r
|
||||||
|
(recur (cons (m (list (first r)))
|
||||||
|
r))))))
|
||||||
|
|
||||||
|
(defn make-rhymes [markov-data target]
|
||||||
|
(let [target-word (frp/phrase->word frp/words target)
|
||||||
|
rhyming-words (rhyming-words target)
|
||||||
|
markov--rhymes (markov-rhymes markov-data rhyming-words)
|
||||||
|
rime-adjuster (adjust-for-rimes target-word words-map)
|
||||||
|
modified-markov-data
|
||||||
|
(merge
|
||||||
|
markov-data
|
||||||
|
(into {}
|
||||||
|
(map (fn [[word weights]]
|
||||||
|
[word (rime-adjuster weights)])
|
||||||
|
markov--rhymes)))]
|
||||||
|
(->> rhyming-words
|
||||||
|
(markov-rhymes modified-markov-data)
|
||||||
|
(map
|
||||||
|
(fn [[k v]]
|
||||||
|
(markov-gen modified-markov-data (list k))))
|
||||||
|
(map #(remove nil? %)))))
|
||||||
|
|
||||||
|
(defn adjust-for-over-syllables
|
||||||
|
"Adjust weights to prefer not going over the number
|
||||||
|
of syllables of the target word."
|
||||||
|
[target]
|
||||||
|
(fn [words]
|
||||||
|
(p :adjust-for-syllables
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(if (or (nil? (:syllable-count word))
|
||||||
|
(nil? (:syllables target)))
|
||||||
|
(println word target))
|
||||||
|
(cond
|
||||||
|
(= (:syllable-count word) (count (:syllables target)))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 3 (:weight word)))
|
||||||
|
(assoc word :adjusted-for-syllables-factor 3))
|
||||||
|
|
||||||
|
(< (:syllable-count word) (count (:syllables target)))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 2 (:weight word)))
|
||||||
|
(assoc word :adjusted-for-syllables-factor 2))
|
||||||
|
|
||||||
|
:else
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 1 (:weight word)))
|
||||||
|
(assoc word :adjusted-for-syllables-factor 1))))
|
||||||
|
words))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [words (->> ["distort" "kiss" "sport"]
|
||||||
|
(map #(frp/phrase->word frp/words %))
|
||||||
|
(map #(assoc % :weight 1)))
|
||||||
|
target (->> "report"
|
||||||
|
(frp/phrase->word frp/words)
|
||||||
|
(#(assoc % :syllables (:syllables %))))
|
||||||
|
adjuster (adjust-for-over-syllables target)]
|
||||||
|
(adjuster words)))
|
||||||
|
|
||||||
|
(defn adjust-for-rhymes
|
||||||
|
"Adjust weights to prefer words that rhyme"
|
||||||
|
[target]
|
||||||
|
(fn [words]
|
||||||
|
(p :adjust-for-rhymes
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(let [factor (max 0.001 (count (frp/consecutive-matching word target :rimes)))]
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* factor (:weight word)))
|
||||||
|
(assoc word :adjust-for-rhyme-factor factor))))
|
||||||
|
words))))
|
||||||
|
|
||||||
|
(defn adjust-for-rhymes-1
|
||||||
|
"Adjust weights to prefer words that rhyme"
|
||||||
|
[target percent]
|
||||||
|
(fn [words]
|
||||||
|
(let [ratio (/ percent (- 1 percent))
|
||||||
|
[rhymes non-rhymes]
|
||||||
|
((juxt filter remove)
|
||||||
|
(fn [word]
|
||||||
|
(< 0 (count (frp/consecutive-matching word target :rimes))))
|
||||||
|
words)
|
||||||
|
weight-non-rhymes (apply + (map :weight non-rhymes))
|
||||||
|
target-weight-rhymes (* ratio weight-non-rhymes)
|
||||||
|
count-rhymes (max 1 (count rhymes))
|
||||||
|
adjustment-rhyme (/ target-weight-rhymes count-rhymes)]
|
||||||
|
(concat
|
||||||
|
non-rhymes
|
||||||
|
(map
|
||||||
|
(fn [rhyme]
|
||||||
|
(as-> rhyme rhyme
|
||||||
|
(assoc rhyme :weight (* adjustment-rhyme (:weight rhyme)))
|
||||||
|
(assoc rhyme :adjust-for-rhyme-factor adjustment-rhyme)))
|
||||||
|
rhymes)))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [words (->> ["distort" "kiss" "sport"]
|
||||||
|
(map #(frp/phrase->word frp/words %))
|
||||||
|
(map #(assoc % :weight 1)))
|
||||||
|
target (->> "report"
|
||||||
|
(frp/phrase->word frp/words)
|
||||||
|
(#(assoc % :remaining-syllables (:syllables %))))
|
||||||
|
rhyme-adjuster (adjust-for-rhymes target)
|
||||||
|
syllable-count-adjuster (adjust-for-over-syllables target)]
|
||||||
|
(syllable-count-adjuster (rhyme-adjuster words))))
|
||||||
|
|
||||||
|
(defn adjust-for-membership [set_]
|
||||||
|
(fn [words]
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(if (set_ (:norm-word word))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 2 (:weight word)))
|
||||||
|
(assoc word :adjust-for-membership-factor 2))
|
||||||
|
(assoc word :adjust-for-membership-factor 1)))
|
||||||
|
words)))
|
||||||
|
|
||||||
|
(defn filter-for-membership [set_]
|
||||||
|
(fn [words]
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(if-not (set_ (:norm-word word))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 0.01 (:weight word)))
|
||||||
|
(assoc word :filter-for-membership-factor 0.01))
|
||||||
|
word))
|
||||||
|
words)))
|
||||||
|
|
||||||
|
(defn adjust-for-markov [markov-options]
|
||||||
|
(let [markov-set (into #{} (map first (keys markov-options)))]
|
||||||
|
(fn [words]
|
||||||
|
(let [result (map
|
||||||
|
(fn [word]
|
||||||
|
(if (markov-set (:norm-word word))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 100 (:weight word)))
|
||||||
|
(assoc word :adjust-for-markov-factor 100))
|
||||||
|
(assoc word :adjust-for-markov-factor 1)))
|
||||||
|
words)]
|
||||||
|
result))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))]
|
||||||
|
(take 5 (markov-adjuster frp/words))))
|
||||||
|
|
||||||
|
(defn e-prhyme
|
||||||
|
"2020-10-21 iteration"
|
||||||
|
[words markov target stop?]
|
||||||
|
(let [target (assoc target :original-syllables (:syllables target))
|
||||||
|
words (map #(assoc % :weight 1) words)
|
||||||
|
words (take (int 1e5) words)]
|
||||||
|
(loop [target target
|
||||||
|
result '()
|
||||||
|
sentinel 0]
|
||||||
|
(if (or (stop? target result)
|
||||||
|
(> sentinel 5))
|
||||||
|
result
|
||||||
|
(let [markov-options (markov (list (first result)))
|
||||||
|
markov-adjuster (adjust-for-markov markov-options)
|
||||||
|
syllable-count-adjuster (adjust-for-over-syllables target)
|
||||||
|
rhyme-adjuster (adjust-for-rhymes-1 target 0.8)
|
||||||
|
lovecraft-set (into #{} (map (comp first first) lovecraft-markov))
|
||||||
|
lovecraft-adjuster (adjust-for-membership lovecraft-set)
|
||||||
|
lovecraft-filter (filter-for-membership lovecraft-set)
|
||||||
|
adjust (comp lovecraft-adjuster
|
||||||
|
rhyme-adjuster
|
||||||
|
syllable-count-adjuster
|
||||||
|
markov-adjuster
|
||||||
|
lovecraft-filter)
|
||||||
|
weighted-words (p :adjust
|
||||||
|
(->> (adjust words)
|
||||||
|
(remove #(= 0 (:weight %)))))
|
||||||
|
rng (p :from-weights (wr/from-weights (map :weight weighted-words)))
|
||||||
|
index (p :nextr (wr/nextr rng nil))
|
||||||
|
selection (nth weighted-words index)
|
||||||
|
new-target (->> target
|
||||||
|
(#(assoc % :syllables (drop-last
|
||||||
|
(:syllable-count
|
||||||
|
selection)
|
||||||
|
(:syllables
|
||||||
|
target))))
|
||||||
|
(#(assoc % :rimes (prhyme/rimes (:syllables %))))
|
||||||
|
(#(assoc % :onsets (prhyme/onset+nucleus (:syllables %))))
|
||||||
|
(#(assoc % :nuclei (prhyme/nucleus (:syllables %)))))
|
||||||
|
result (cons selection result)]
|
||||||
|
(recur new-target result (inc sentinel)))))))
|
||||||
|
|
||||||
|
(def words (map #(assoc % :weight 1) frp/words))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [orig-target (frp/phrase->word frp/words "please turn on your magic beam")]
|
||||||
|
(repeatedly
|
||||||
|
10
|
||||||
|
(fn []
|
||||||
|
(e-prhyme
|
||||||
|
frp/words
|
||||||
|
lovecraft-markov
|
||||||
|
(frp/phrase->word frp/words "please turn on your magic beam")
|
||||||
|
(fn [target result]
|
||||||
|
(<= (count (:syllables orig-target))
|
||||||
|
(apply + (map :syllable-count result)))))))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(frp/phrase->word frp/words "distort bad man")
|
||||||
|
(repeatedly 10 #(make-rhymes lovecraft-markov "bad man"))
|
||||||
|
|
||||||
|
(rhyming-words "magic beam")
|
||||||
|
((make-markov-picker lovecraft-markov) '("no"))
|
||||||
|
(markov-gen lovecraft-markov '("world"))
|
||||||
|
(interleave
|
||||||
|
(->> "your eyes"
|
||||||
|
(make-rhymes lovecraft-markov)
|
||||||
|
(map
|
||||||
|
(fn [[k v]]
|
||||||
|
(markov-gen lovecraft-markov (list k)))))
|
||||||
|
(->> "pretty"
|
||||||
|
(make-rhymes lovecraft-markov)
|
||||||
|
(map
|
||||||
|
(fn [[k v]]
|
||||||
|
(markov-gen lovecraft-markov (list k))))
|
||||||
|
(remove nil?)))
|
||||||
|
|
||||||
|
(frp/phrase->word frp/words "well-off")
|
||||||
|
(frp/prhyme frp/words (assoc (words-map "well") :rimes? true))
|
||||||
|
)
|
||||||
|
(defn ghost
|
||||||
|
"Rhyme a phrase with markov"
|
||||||
|
[words word]
|
||||||
|
(let [rhymes (frp/prhyme words word)
|
||||||
|
norm-rhyme-words (->> rhymes
|
||||||
|
(map :word)
|
||||||
|
(map string/lower-case)
|
||||||
|
(map #(string/replace % #"\(\d+\)" ""))
|
||||||
|
(into #{})
|
||||||
|
(filter #(get lovecraft-markov (list %))))
|
||||||
|
keyer (markov-key #(list (first (string/split % #"\s"))))]
|
||||||
|
(->> norm-rhyme-words
|
||||||
|
(map (fn [w]
|
||||||
|
(gen-from markover #(< (count %) 5) w))))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(take 10 lovecraft-markov)
|
||||||
|
(ghost frp/words (assoc (frp/make-word ["dream" "D" "R" "IY" "M"])
|
||||||
|
:rimes?
|
||||||
|
true)))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(->> (frp/make-word ["dream" "D" "R" "IY" "M"])
|
||||||
|
(#(assoc % :rimes? true))
|
||||||
|
(frp/prhyme frp/words)
|
||||||
|
(take 10))
|
||||||
|
|
||||||
|
|
||||||
|
(->> (main)
|
||||||
|
(#(spit "lovecraft.edn" (pr-str %))))
|
||||||
|
|
||||||
|
(let [t (read-string (slurp "lovecraft.edn"))]
|
||||||
|
(take 20 t))
|
||||||
|
)
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(->> (tokens-from-file "lovecraft.txt")
|
||||||
|
(reverse)
|
||||||
|
(normalize-tokens)
|
||||||
|
((window 2))
|
||||||
|
(markov)
|
||||||
|
(take 10)
|
||||||
|
(into {})
|
||||||
|
(#(get % '("away")))
|
||||||
|
(choose-from-markov-possibilities))
|
||||||
|
|
||||||
|
(markov [["boy" "good"] ["the" "over"]
|
||||||
|
["ran" "he"] ["walked" "he"]
|
||||||
|
["walked" "he"] ["walked" "she"]])
|
||||||
|
(tokens-from-file "lovecraft.txt")
|
||||||
|
(scrape)
|
||||||
|
(def test-links (take 3 (links)))
|
||||||
|
(->> (text-from-link (first test-links))
|
||||||
|
(cleanup))
|
||||||
|
(->> (text-from-link (first test-links))
|
||||||
|
(append-to-file "test.txt" "hi"))
|
||||||
|
(take 3 (html/select (fetch-url (first test-links)) [:body]))
|
||||||
|
)
|
@ -0,0 +1,86 @@
|
|||||||
|
(ns com.owoga.prhyme.util.weighted-rand
|
||||||
|
(:import clojure.lang.PersistentQueue))
|
||||||
|
|
||||||
|
(defprotocol Rand
|
||||||
|
(nextr [_ rng]))
|
||||||
|
|
||||||
|
;; Vose's alias method
|
||||||
|
;; http://www.keithschwarz.com/darts-dice-coins/
|
||||||
|
|
||||||
|
(deftype Vose [n ^ints alias ^doubles prob]
|
||||||
|
Rand
|
||||||
|
;; returns the index of the chosen weight
|
||||||
|
(nextr [_ rng] ;; not using the rng for now
|
||||||
|
(let [i (rand-int n)
|
||||||
|
p (aget prob i)]
|
||||||
|
(if (or (= p 1.0)
|
||||||
|
(< (rand) p))
|
||||||
|
i
|
||||||
|
(aget alias i)))))
|
||||||
|
|
||||||
|
(defn ^:private make-vose [dist]
|
||||||
|
(let [N (count dist)
|
||||||
|
alias (int-array N)
|
||||||
|
prob (double-array N)]
|
||||||
|
(if (zero? N)
|
||||||
|
(->Vose N alias prob)
|
||||||
|
(let [^doubles ps (->> dist
|
||||||
|
(map (partial * N))
|
||||||
|
(into-array Double/TYPE))
|
||||||
|
|
||||||
|
[small large] (loop [i 0
|
||||||
|
[small large] [PersistentQueue/EMPTY
|
||||||
|
PersistentQueue/EMPTY]
|
||||||
|
ps (seq ps)]
|
||||||
|
(if (seq ps)
|
||||||
|
(let [p (first ps)]
|
||||||
|
(if (< p 1)
|
||||||
|
(recur (inc i)
|
||||||
|
[(conj small i) large]
|
||||||
|
(rest ps))
|
||||||
|
(recur (inc i)
|
||||||
|
[small (conj large i)]
|
||||||
|
(rest ps))))
|
||||||
|
[small large]))
|
||||||
|
|
||||||
|
[small large] (loop [small small
|
||||||
|
large large]
|
||||||
|
(if (and (seq small) (seq large))
|
||||||
|
(let [l (first small)
|
||||||
|
g (first large)
|
||||||
|
small (pop small)
|
||||||
|
large (pop large)]
|
||||||
|
(aset-double prob l (aget ps l))
|
||||||
|
(aset-int alias l g)
|
||||||
|
(let [pg (- (+ (aget ps g) (aget ps l))
|
||||||
|
1.0)]
|
||||||
|
(aset-double ps g pg)
|
||||||
|
(if (< pg 1)
|
||||||
|
(recur (conj small g) large)
|
||||||
|
(recur small (conj large g)))))
|
||||||
|
[small large]))]
|
||||||
|
(doseq [g (concat large small)]
|
||||||
|
(aset-double prob g 1))
|
||||||
|
(->Vose N alias prob)))))
|
||||||
|
|
||||||
|
(defn from-weights [ws]
|
||||||
|
(let [N (count ws)
|
||||||
|
tot (reduce + 0.0 ws)
|
||||||
|
dist (if (zero? tot)
|
||||||
|
(repeat N (/ 1 tot))
|
||||||
|
(map #(/ % tot) ws))]
|
||||||
|
(make-vose (vec dist))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [ws [1 2 4 8]
|
||||||
|
rng (from-weights ws)]
|
||||||
|
(nextr rng nil)))
|
||||||
|
(comment
|
||||||
|
(let [ws [1 2 1 3 3]
|
||||||
|
rng (from-weights ws)
|
||||||
|
chosen (repeatedly 1000000 #(nextr rng nil))
|
||||||
|
accuracy (mapv (comp float
|
||||||
|
#(/ % 100000)
|
||||||
|
(frequencies chosen))
|
||||||
|
(range (count ws)))]
|
||||||
|
accuracy))
|
Loading…
Reference in New Issue