Add example code, more nlp options
parent
f0ea2bc513
commit
86546c7d01
@ -0,0 +1,192 @@
|
|||||||
|
(ns examples.core
|
||||||
|
(:require [clojure.string :as string]
|
||||||
|
[clojure.set]
|
||||||
|
[com.owoga.prhyme.frp :as frp]
|
||||||
|
[com.owoga.prhyme.core :as prhyme]
|
||||||
|
[com.owoga.prhyme.data.bigrams :as bigrams]
|
||||||
|
[com.owoga.prhyme.gen :as gen]
|
||||||
|
[com.owoga.prhyme.data.dictionary :as dict]
|
||||||
|
[com.owoga.prhyme.data.thesaurus :as thesaurus]
|
||||||
|
[com.owoga.prhyme.data.darklyrics :as darklyrics]
|
||||||
|
[com.owoga.prhyme.generation.weighted-selection :as weighted]
|
||||||
|
[clojure.set :as set]
|
||||||
|
[clojure.zip :as zip]))
|
||||||
|
|
||||||
|
(defn weight-fn [word target result]
|
||||||
|
(let [rimes (frp/consecutive-matching word target :rimes)
|
||||||
|
nuclei (frp/consecutive-matching word target :nuclei)
|
||||||
|
onsets (frp/consecutive-matching word target :onsets)
|
||||||
|
total (apply + (map count [rimes nuclei onsets]))]
|
||||||
|
total))
|
||||||
|
|
||||||
|
(defn pred-fn [word target result]
|
||||||
|
(< 0 (weight-fn word target result)))
|
||||||
|
|
||||||
|
(defn weight-popular [word target result]
|
||||||
|
(if (dict/popular (:normalized-word word))
|
||||||
|
10
|
||||||
|
1))
|
||||||
|
|
||||||
|
(defn pred-popular [word target result]
|
||||||
|
(< 1 (weight-popular word target result)))
|
||||||
|
|
||||||
|
(def words-by-rime (prhyme/words-by-rime*
|
||||||
|
(filter
|
||||||
|
(fn [[word & _]]
|
||||||
|
(get
|
||||||
|
dict/popular
|
||||||
|
(string/lower-case word)))
|
||||||
|
dict/cmu-dict)))
|
||||||
|
|
||||||
|
(defn rime-1 [target]
|
||||||
|
(let [rime (last (:rimes target))]
|
||||||
|
(fn [x]
|
||||||
|
(= rime (last (:rimes x))))))
|
||||||
|
|
||||||
|
(defn rime-2 [target]
|
||||||
|
(let [rime (last (butlast (:rimes target)))]
|
||||||
|
(fn [x]
|
||||||
|
(= rime (last (butlast (:rimes x)))))))
|
||||||
|
|
||||||
|
(defn rime-member? [coll]
|
||||||
|
(let [coll (into #{} coll)]
|
||||||
|
(fn [x]
|
||||||
|
(coll (:normalized-word x)))))
|
||||||
|
|
||||||
|
(defn rime-compare [& comparators]
|
||||||
|
(let [juxtcomp (apply juxt comparators)]
|
||||||
|
(fn [a b]
|
||||||
|
(let [a (juxtcomp a)
|
||||||
|
b (juxtcomp b)]
|
||||||
|
(compare a b)))))
|
||||||
|
(def c
|
||||||
|
(fn [a b]
|
||||||
|
((rime-compare
|
||||||
|
(rime-1 {:rimes '(1 2)})
|
||||||
|
(rime-2 {:rimes '(1 2)})
|
||||||
|
(rime-member? ["foo" "bar"]))
|
||||||
|
b a)))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [coll [{:rimes '(3 2) :normalized-word "foo"}
|
||||||
|
{:rimes '(1 2) :normalized-word "foo"}
|
||||||
|
{:rimes '(4 5) :normalized-word "foo"}
|
||||||
|
{:rimes '(1 2) :normalized-word "buzz"}]]
|
||||||
|
(sort c coll))
|
||||||
|
|
||||||
|
(let [coll '("woman"
|
||||||
|
"union"
|
||||||
|
"passion"
|
||||||
|
"infatuation"
|
||||||
|
"emotion"
|
||||||
|
"disposition"
|
||||||
|
"communion"
|
||||||
|
"attraction"
|
||||||
|
"affection"
|
||||||
|
"adoration"
|
||||||
|
"admiration")
|
||||||
|
coll (map #(prhyme/phrase->word dict/popular %) coll)
|
||||||
|
target (prhyme/phrase->word dict/popular "devotion")
|
||||||
|
synonyms (thesaurus/synonyms "love" "heart")
|
||||||
|
comparisons (fn [target]
|
||||||
|
(fn [a b]
|
||||||
|
((rime-compare
|
||||||
|
(rime-1 target)
|
||||||
|
(rime-2 target)
|
||||||
|
(rime-member? synonyms))
|
||||||
|
b a)))]
|
||||||
|
(sort (comparisons target) coll))
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
(defn rhymestorm [& words]
|
||||||
|
(let [synonyms (->> (apply thesaurus/synonyms words)
|
||||||
|
(filter #(get dict/popular %))
|
||||||
|
(into #{}))
|
||||||
|
comparisons (fn [target]
|
||||||
|
(fn [a b]
|
||||||
|
((rime-compare
|
||||||
|
(rime-1 target)
|
||||||
|
(rime-2 target)
|
||||||
|
(rime-member? synonyms))
|
||||||
|
b a)))]
|
||||||
|
(->> synonyms
|
||||||
|
(map
|
||||||
|
(fn [synonym]
|
||||||
|
(let [word (prhyme/phrase->word dict/prhyme-dict synonym)
|
||||||
|
rhymes (get words-by-rime (last (:rimes word)))]
|
||||||
|
(when rhymes
|
||||||
|
(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
|
||||||
|
rhyming-synonyms (remove #{(:normalized-word word)} (filter synonyms rhyming-words))]
|
||||||
|
[(:normalized-word word) rhyming-synonyms])))))
|
||||||
|
(remove (fn [[_ rhymes]]
|
||||||
|
(empty? rhymes)))
|
||||||
|
(map (fn [[target rhymes]]
|
||||||
|
[target (->> rhymes
|
||||||
|
(map prhyme/phrase->word dict/popular)
|
||||||
|
(sort (comparisons (prhyme/phrase->word dict/popular target)))
|
||||||
|
(map :normalized-word))]))
|
||||||
|
(into {}))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(rhymestorm "love")
|
||||||
|
(take 3 (drop 500 dict/prhyme-dict))
|
||||||
|
(take 3 dict/cmu-dict)
|
||||||
|
(take 3 dict/popular)
|
||||||
|
|
||||||
|
(let [node (get-in words-by-rime ['("AH" "V")])]
|
||||||
|
(->> (prhyme/flatten-node node)))
|
||||||
|
|
||||||
|
(let [love-synonyms (thesaurus/thesaurus "love")
|
||||||
|
heart-synonyms (thesaurus/thesaurus "heart")]
|
||||||
|
(->> (clojure.set/intersection
|
||||||
|
(into #{} love-synonyms)
|
||||||
|
(into #{} heart-synonyms))
|
||||||
|
(map string/lower-case)
|
||||||
|
(filter #(dict/popular %))))
|
||||||
|
|
||||||
|
(let [synonyms (thesaurus/synonyms "love" "heart")]
|
||||||
|
synonyms)
|
||||||
|
|
||||||
|
(def love-rhymes
|
||||||
|
(let [synonyms (->> (thesaurus/synonyms "love" "heart")
|
||||||
|
(filter #(get dict/popular %))
|
||||||
|
(into #{}))]
|
||||||
|
(->>
|
||||||
|
(map
|
||||||
|
(fn [synonym]
|
||||||
|
(let [word (prhyme/phrase->word dict/prhyme-dict synonym)
|
||||||
|
rhymes (get words-by-rime (last (:rimes word)))]
|
||||||
|
(when rhymes
|
||||||
|
(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
|
||||||
|
rhyming-synonyms (filter synonyms rhyming-words)]
|
||||||
|
[(:normalized-word word) rhyming-synonyms]))))
|
||||||
|
synonyms)
|
||||||
|
(into {}))))
|
||||||
|
|
||||||
|
(count love-rhymes)
|
||||||
|
(get-in words-by-rime ['("AH" "V")])
|
||||||
|
|
||||||
|
(weight-fn
|
||||||
|
(first (filter #(= (:normalized-word %) "gotshal's") dict/prhyme-dict))
|
||||||
|
(prhyme/phrase->word dict/prhyme-dict "bye bye")
|
||||||
|
nil)
|
||||||
|
|
||||||
|
(take 10 darklyrics/darklyrics-markov-2)
|
||||||
|
(get darklyrics/darklyrics-markov-2 '("memory" "my"))
|
||||||
|
(repeatedly
|
||||||
|
5
|
||||||
|
(fn []
|
||||||
|
(let [rhymes (gen/selection-seq
|
||||||
|
dict/prhyme-dict
|
||||||
|
(comp (weighted/adjust-for-tail-rhyme 0.90)
|
||||||
|
#_(weighted/adjust-for-rhymes 0.50)
|
||||||
|
#_(weighted/adjust-for-fn :adj-rimes 0.80 pred-fn weight-fn)
|
||||||
|
(weighted/adjust-for-fn :adj-popular 0.95 pred-popular weight-popular)
|
||||||
|
(weighted/adjust-for-markov darklyrics/darklyrics-markov-2 0.99))
|
||||||
|
(prhyme/phrase->word dict/prhyme-dict "happy birthday taylor my love"))]
|
||||||
|
(->> rhymes
|
||||||
|
(take 5)
|
||||||
|
(map :normalized-word)))))
|
||||||
|
|
||||||
|
)
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@ -0,0 +1,339 @@
|
|||||||
|
(ns com.owoga.corpus.lovecraft
|
||||||
|
(:require [net.cgrand.enlive-html :as html]
|
||||||
|
[clojure.string :as string]
|
||||||
|
[com.owoga.prhyme.util.weighted-rand :as wr]
|
||||||
|
[com.owoga.prhyme.data.dictionary :as dict]
|
||||||
|
[com.owoga.prhyme.util :as util]
|
||||||
|
[com.owoga.prhyme.core :as prhyme]
|
||||||
|
[com.owoga.prhyme.util.nlp :as nlp]
|
||||||
|
[com.owoga.prhyme.gen :as gen]
|
||||||
|
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
|
||||||
|
[taoensso.tufte :as tufte :refer [defnp p profiled profile]]
|
||||||
|
[com.owoga.prhyme.frp :as frp]
|
||||||
|
[clojure.java.io :as io]))
|
||||||
|
|
||||||
|
(tufte/add-basic-println-handler! {})
|
||||||
|
|
||||||
|
(def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/")
|
||||||
|
|
||||||
|
(def words-map
|
||||||
|
(into {} (map #(vector (string/lower-case (:word %)) %) frp/words)))
|
||||||
|
|
||||||
|
(defn fetch-url [url]
|
||||||
|
(html/html-resource (java.net.URL. url)))
|
||||||
|
|
||||||
|
(defn links []
|
||||||
|
(map
|
||||||
|
#(str *base-url* (first (html/attr-values % :href)))
|
||||||
|
(html/select
|
||||||
|
(fetch-url *base-url*)
|
||||||
|
[:li :> [:a (html/attr? :href)]])))
|
||||||
|
|
||||||
|
(defn contentful-sections [nodes]
|
||||||
|
(->> nodes
|
||||||
|
(map html/text)
|
||||||
|
(filter #(> (count %) 100))))
|
||||||
|
|
||||||
|
(defn text-from-link [link]
|
||||||
|
(->> (html/select
|
||||||
|
(fetch-url link)
|
||||||
|
[:body])
|
||||||
|
(first)
|
||||||
|
(html/text)
|
||||||
|
((fn [s] (string/replace s #"[\s\u00A0]+" " ")))))
|
||||||
|
|
||||||
|
(defn cleanup [content]
|
||||||
|
(-> content
|
||||||
|
(string/replace #"Return to.*$" "")
|
||||||
|
(string/replace #"Home.*?This Site" "")
|
||||||
|
(string/replace #"[^a-zA-Z -]+" "")))
|
||||||
|
|
||||||
|
(defn tokens [content]
|
||||||
|
(string/split content #"\s+"))
|
||||||
|
|
||||||
|
(defn append-to-file [filepath text]
|
||||||
|
(with-open [w (io/writer filepath :append true)]
|
||||||
|
(.write w text)))
|
||||||
|
|
||||||
|
(defn scrape []
|
||||||
|
(run!
|
||||||
|
(fn [link]
|
||||||
|
(->> (text-from-link link)
|
||||||
|
(cleanup)
|
||||||
|
(#(str % "\n"))
|
||||||
|
(append-to-file "lovecraft.txt")))
|
||||||
|
(take 10 (links))))
|
||||||
|
|
||||||
|
(defn tokens-from-file [file]
|
||||||
|
(with-open [r (io/reader file)]
|
||||||
|
(tokens (slurp r))))
|
||||||
|
|
||||||
|
(defn window [n]
|
||||||
|
(fn [coll]
|
||||||
|
(cond
|
||||||
|
(empty? coll) []
|
||||||
|
(< (count coll) n) []
|
||||||
|
:else (cons (take n coll)
|
||||||
|
(lazy-seq ((window n) (drop n coll)))))))
|
||||||
|
|
||||||
|
(defn markov [tokens]
|
||||||
|
(->> tokens
|
||||||
|
(map
|
||||||
|
(fn [token]
|
||||||
|
(let [k (butlast token)
|
||||||
|
v (last token)]
|
||||||
|
[k v])))
|
||||||
|
(reduce
|
||||||
|
(fn [a [k v]]
|
||||||
|
(update-in a [k v] (fnil inc 0)))
|
||||||
|
{})))
|
||||||
|
|
||||||
|
(defn running-total
|
||||||
|
([coll]
|
||||||
|
(running-total coll 0))
|
||||||
|
([coll last-val]
|
||||||
|
(cond
|
||||||
|
(empty? coll) nil
|
||||||
|
:else (cons (+ last-val (first coll))
|
||||||
|
(lazy-seq
|
||||||
|
(running-total
|
||||||
|
(rest coll)
|
||||||
|
(+ last-val (first coll))))))))
|
||||||
|
|
||||||
|
(defn weighted-rand [weights]
|
||||||
|
(let [running-weights (running-total weights)
|
||||||
|
rand-val (rand (last running-weights))]
|
||||||
|
(loop [i 0]
|
||||||
|
(if (> (nth running-weights i) rand-val)
|
||||||
|
i
|
||||||
|
(recur (inc i))))))
|
||||||
|
|
||||||
|
(def word-set (into #{} (->> prhyme/words
|
||||||
|
(map first)
|
||||||
|
(map string/lower-case)
|
||||||
|
(map #(string/replace % #"\(\d+\)" "")))))
|
||||||
|
|
||||||
|
(defn normalize-tokens [tokens]
|
||||||
|
(->> tokens
|
||||||
|
(map string/lower-case)
|
||||||
|
(filter word-set)))
|
||||||
|
|
||||||
|
(defn main []
|
||||||
|
(->> (tokens-from-file "lovecraft.txt")
|
||||||
|
(reverse)
|
||||||
|
(normalize-tokens)
|
||||||
|
((window 2))
|
||||||
|
(markov)
|
||||||
|
(into {})))
|
||||||
|
|
||||||
|
(defn synonym?
|
||||||
|
"Given a possibility, like [\"foo\" 3]
|
||||||
|
which says that foo follows a particular key with
|
||||||
|
a weight of 3, a word is a synonym of that possibility
|
||||||
|
if the word is a synonym ."
|
||||||
|
[p synonyms]
|
||||||
|
(synonyms p))
|
||||||
|
|
||||||
|
(defn adjust-for-synonyms
|
||||||
|
"If a word is in a set of synonyms, adjust its weight upwards."
|
||||||
|
[synonyms]
|
||||||
|
(fn [possibilities]
|
||||||
|
(reduce
|
||||||
|
(fn [p s]
|
||||||
|
(if (s p)
|
||||||
|
(update p s #(* 5 %))
|
||||||
|
p))
|
||||||
|
possibilities
|
||||||
|
synonyms)))
|
||||||
|
|
||||||
|
(defn adjust-for-rimes
|
||||||
|
[target-rime dictionary]
|
||||||
|
(fn [possibilities]
|
||||||
|
(into
|
||||||
|
{}
|
||||||
|
(map
|
||||||
|
(fn [[p v]]
|
||||||
|
(let [possibility (get dictionary p)
|
||||||
|
factor (count
|
||||||
|
(frp/consecutive-matching
|
||||||
|
target-rime
|
||||||
|
possibility
|
||||||
|
:rimes))]
|
||||||
|
[p (* v (max 1 (* factor 4)))]))
|
||||||
|
possibilities))))
|
||||||
|
|
||||||
|
|
||||||
|
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
|
||||||
|
|
||||||
|
(defn markov-key [key-fn]
|
||||||
|
(fn [text]
|
||||||
|
(key-fn text)))
|
||||||
|
|
||||||
|
(defn gen-from [m p initial]
|
||||||
|
(loop [r (list initial)]
|
||||||
|
(cond
|
||||||
|
(p r) (recur (cons (m (list (first r))) r))
|
||||||
|
:else r)))
|
||||||
|
|
||||||
|
|
||||||
|
(defn rhyming-words
|
||||||
|
"List of rhyming words sorted by quality of rhyme."
|
||||||
|
[target]
|
||||||
|
(let [target-phrase (->> target
|
||||||
|
(prhyme/phrase->word frp/words)
|
||||||
|
(#(assoc % :rimes? true)))]
|
||||||
|
(->> target-phrase
|
||||||
|
(#(assoc % :rimes? true))
|
||||||
|
(frp/prhyme frp/words)
|
||||||
|
(sort-by
|
||||||
|
#(- (count
|
||||||
|
(frp/consecutive-matching
|
||||||
|
%
|
||||||
|
target-phrase
|
||||||
|
:rimes)))))))
|
||||||
|
|
||||||
|
(defn markov-rhymes [markov-data rhyming-words]
|
||||||
|
(->> (map
|
||||||
|
(fn [word]
|
||||||
|
(->> word
|
||||||
|
:word
|
||||||
|
string/lower-case
|
||||||
|
(#(string/replace % #"\(\d+\)" ""))
|
||||||
|
(#(vector % (get markov-data (list %))))))
|
||||||
|
rhyming-words)
|
||||||
|
(into #{})
|
||||||
|
(remove
|
||||||
|
(fn [[w p]]
|
||||||
|
(nil? p)))))
|
||||||
|
|
||||||
|
(defn adjust-for-over-syllables
|
||||||
|
"Adjust weights to prefer not going over the number
|
||||||
|
of syllables of the target word."
|
||||||
|
[target]
|
||||||
|
(fn [words]
|
||||||
|
(p :adjust-for-syllables
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(if (or (nil? (:syllable-count word))
|
||||||
|
(nil? (:syllables target)))
|
||||||
|
(println word target))
|
||||||
|
(cond
|
||||||
|
(= (:syllable-count word) (count (:syllables target)))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 3 (:weight word)))
|
||||||
|
(assoc word :adjusted-for-syllables-factor 3))
|
||||||
|
|
||||||
|
(< (:syllable-count word) (count (:syllables target)))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 2 (:weight word)))
|
||||||
|
(assoc word :adjusted-for-syllables-factor 2))
|
||||||
|
|
||||||
|
:else
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 1 (:weight word)))
|
||||||
|
(assoc word :adjusted-for-syllables-factor 1))))
|
||||||
|
words))))
|
||||||
|
|
||||||
|
(defn adjust-for-membership-1
|
||||||
|
[set_ percent]
|
||||||
|
(let [ratio (- 1 percent)]
|
||||||
|
(fn [words]
|
||||||
|
(let [[members non-members]
|
||||||
|
((juxt filter remove)
|
||||||
|
#(set_ (:normalized-word %))
|
||||||
|
words)
|
||||||
|
weight-non-members (apply + (map :weight non-members))
|
||||||
|
target-weight-members (* ratio weight-non-members)
|
||||||
|
count-members (count members)
|
||||||
|
adjustment-members (/ target-weight-members count-members)]
|
||||||
|
(concat
|
||||||
|
(map
|
||||||
|
(fn [member]
|
||||||
|
(as-> member member
|
||||||
|
(assoc member :weight (* adjustment-members (:weight member)))
|
||||||
|
(assoc member :adjustment-for-membership adjustment-members)))
|
||||||
|
members)
|
||||||
|
non-members)))))
|
||||||
|
|
||||||
|
(defn adjust-for-membership [set_]
|
||||||
|
(fn [words]
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(if (set_ (:normalized-word word))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 2 (:weight word)))
|
||||||
|
(assoc word :adjust-for-membership-factor 2))
|
||||||
|
(assoc word :adjust-for-membership-factor 1)))
|
||||||
|
words)))
|
||||||
|
|
||||||
|
(defn filter-for-membership [set_]
|
||||||
|
(fn [words]
|
||||||
|
(map
|
||||||
|
(fn [word]
|
||||||
|
(if-not (set_ (:normalized-word word))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 0.01 (:weight word)))
|
||||||
|
(assoc word :filter-for-membership-factor 0.01))
|
||||||
|
word))
|
||||||
|
words)))
|
||||||
|
|
||||||
|
(defn adjust-for-markov [markov-options]
|
||||||
|
(let [markov-set (into #{} (map first (keys markov-options)))]
|
||||||
|
(fn [words]
|
||||||
|
(let [result (map
|
||||||
|
(fn [word]
|
||||||
|
(if (markov-set (:normalized-word word))
|
||||||
|
(as-> word word
|
||||||
|
(assoc word :weight (* 100 (:weight word)))
|
||||||
|
(assoc word :adjust-for-markov-factor 100))
|
||||||
|
(assoc word :adjust-for-markov-factor 1)))
|
||||||
|
words)]
|
||||||
|
result))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))]
|
||||||
|
(take 5 (markov-adjuster frp/words))))
|
||||||
|
|
||||||
|
(defn adjust-for-membership-1
|
||||||
|
[set_ percent]
|
||||||
|
(let [ratio (- 1 percent)]
|
||||||
|
(fn [words]
|
||||||
|
(let [[members non-members]
|
||||||
|
((juxt filter remove)
|
||||||
|
#(set_ (:normalized-word %))
|
||||||
|
words)
|
||||||
|
weight-non-members (apply + (map :weight non-members))
|
||||||
|
target-weight-members (* ratio weight-non-members)
|
||||||
|
count-members (count members)
|
||||||
|
adjustment-members (/ target-weight-members count-members)]
|
||||||
|
(concat
|
||||||
|
(map
|
||||||
|
(fn [member]
|
||||||
|
(as-> member member
|
||||||
|
(assoc member :weight (* adjustment-members (:weight member)))
|
||||||
|
(assoc member :adjustment-for-membership adjustment-members)))
|
||||||
|
members)
|
||||||
|
non-members)))))
|
||||||
|
|
||||||
|
(defn adjust-for-markov-1
|
||||||
|
[markov-options percent]
|
||||||
|
(let [ratio (- 1 percent)]
|
||||||
|
(fn [words]
|
||||||
|
(if (nil? markov-options)
|
||||||
|
words
|
||||||
|
(let [[markovs non-markovs]
|
||||||
|
((juxt filter remove)
|
||||||
|
#(markov-options (:normalized-word %))
|
||||||
|
words)
|
||||||
|
weight-non-markovs (apply + (map :weight non-markovs))
|
||||||
|
target-weight-markovs (* ratio weight-non-markovs)
|
||||||
|
count-markovs (count markovs)
|
||||||
|
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
|
||||||
|
(concat
|
||||||
|
(map
|
||||||
|
(fn [markov]
|
||||||
|
(as-> markov markov
|
||||||
|
(assoc markov :weight (* adjustment-markovs (:weight markov)))
|
||||||
|
(assoc markov :adjustment-for-markov adjustment-markovs)))
|
||||||
|
markovs)
|
||||||
|
non-markovs))))))
|
@ -0,0 +1,63 @@
|
|||||||
|
(ns com.owoga.prhyme.data.bigrams
|
||||||
|
(:require [clojure.java.io :as io]
|
||||||
|
[clojure.string :as string]
|
||||||
|
[com.owoga.prhyme.syllabify :refer [syllabify]]
|
||||||
|
[com.owoga.prhyme.data.dictionary :as dict]
|
||||||
|
[com.owoga.prhyme.util :as util]
|
||||||
|
[com.owoga.prhyme.core :as prhyme]))
|
||||||
|
|
||||||
|
(def popular-bigrams
|
||||||
|
(->> (io/resource "bigrams-with-counts.txt")
|
||||||
|
io/reader
|
||||||
|
line-seq
|
||||||
|
(map #(string/split % #"\s+"))
|
||||||
|
(map #(map string/lower-case %))
|
||||||
|
(filter (fn [[w1 w2 num]]
|
||||||
|
(and (dict/popular w1)
|
||||||
|
(dict/popular w2))))
|
||||||
|
(map (partial take 2))))
|
||||||
|
|
||||||
|
(def phrase->Word
|
||||||
|
(prhyme/make-phrase->Word
|
||||||
|
(into
|
||||||
|
{}
|
||||||
|
(map
|
||||||
|
(fn [[word & phonemes]]
|
||||||
|
[(string/lower-case word)
|
||||||
|
phonemes])
|
||||||
|
dict/cmu-dict))))
|
||||||
|
|
||||||
|
(def popular-bigrams-with-syllables
|
||||||
|
(->> popular-bigrams
|
||||||
|
(map (partial string/join " "))
|
||||||
|
(map phrase->Word)))
|
||||||
|
|
||||||
|
(def popular-bigram-words
|
||||||
|
(->> popular-bigrams
|
||||||
|
(map (partial string/join " "))
|
||||||
|
(map phrase->Word)))
|
||||||
|
|
||||||
|
(def popular-mono-and-bi-grams
|
||||||
|
(concat
|
||||||
|
dict/prhyme-dict
|
||||||
|
popular-bigram-words))
|
||||||
|
|
||||||
|
(def popular-mono-and-bi-grams-by-rime
|
||||||
|
(prhyme/prhyme-words-by-rime*
|
||||||
|
popular-mono-and-bi-grams))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(->> (reverse (:rimes (phrase->Word "beautiful")))
|
||||||
|
(get-in popular-mono-and-bi-grams-by-rime)
|
||||||
|
(prhyme/flatten-node)
|
||||||
|
(remove #(re-matches #".*beautiful" (:word %)))
|
||||||
|
(map :word)
|
||||||
|
(map string/lower-case))
|
||||||
|
(get-in
|
||||||
|
popular-mono-and-bi-grams-by-rime
|
||||||
|
['("AA") '("ER") '("IY")])
|
||||||
|
(take 5 popular-bigrams)
|
||||||
|
(take 5 popular-bigram-words)
|
||||||
|
(take-last 5 popular-bigram-words)
|
||||||
|
(prhyme/phrase->word dict/prhyme-dict "hi there"))
|
||||||
|
(def words-and-bigrams())
|
@ -0,0 +1,39 @@
|
|||||||
|
(ns com.owoga.prhyme.data.darklyrics
|
||||||
|
(:require [clojure.java.io :as io]
|
||||||
|
[taoensso.nippy :as nippy])
|
||||||
|
(:import [java.io DataInputStream ByteArrayOutputStream]))
|
||||||
|
|
||||||
|
(defn thaw-from-file
|
||||||
|
"Convenience util: like `thaw`, but reads from `(clojure.java.io/file <file>)`.
|
||||||
|
|
||||||
|
To thaw from a resource on classpath (e.g in Leiningen `resources` dir):
|
||||||
|
(thaw-from-file (clojure.java.io/resource \"my-resource-name.npy\"))
|
||||||
|
|
||||||
|
See also `freeze-to-file`."
|
||||||
|
([file ] (thaw-from-file file nil))
|
||||||
|
([file thaw-opts]
|
||||||
|
(let [xin (io/input-stream file)
|
||||||
|
xout (ByteArrayOutputStream.)]
|
||||||
|
(io/copy xin xout)
|
||||||
|
(nippy/thaw (.toByteArray xout) thaw-opts))))
|
||||||
|
|
||||||
|
;; (thaw-from-file (io/resource "test.bin"))
|
||||||
|
;; (bytes (byte-array (take 20 (range))))
|
||||||
|
|
||||||
|
;; (byte-array (map (comp byte int) "ascii"))
|
||||||
|
;; ;; => [97, 115, 99, 105, 105]
|
||||||
|
;; (bytes (byte-array (map (comp byte int) "ascii")))
|
||||||
|
;; ;; => [97, 115, 99, 105, 105]
|
||||||
|
|
||||||
|
;; (let [xin (io/input-stream (io/resource "test.bin"))
|
||||||
|
;; xout (ByteArrayOutputStream.)]
|
||||||
|
;; (io/copy xin xout)
|
||||||
|
;; (nippy/thaw (.toByteArray xout)))
|
||||||
|
|
||||||
|
;; (.fullyRead (io/input-stream (io/resource "test.bin")))
|
||||||
|
|
||||||
|
;; (.getSize (io/input-stream (io/resource "dark-corpus-2.bin")))
|
||||||
|
;; (def data (into {} (map vec (partition 2 (range 20)))))
|
||||||
|
;; (nippy/freeze-to-file "resources/test.bin" data)
|
||||||
|
(def darklyrics-markov-2
|
||||||
|
(thaw-from-file (io/resource "dark-corpus-2.bin")))
|
@ -1,9 +1,18 @@
|
|||||||
(ns com.owoga.prhyme.data.thesaurus
|
(ns com.owoga.prhyme.data.thesaurus
|
||||||
(:require [clojure.string :as string]
|
(:require [clojure.string :as string]
|
||||||
[clojure.java.io :as io]))
|
[clojure.java.io :as io]
|
||||||
|
[clojure.set]))
|
||||||
|
|
||||||
(def thesaurus
|
(def thesaurus
|
||||||
(->> (line-seq (io/reader (io/resource "mthesaur.txt")))
|
(->> (line-seq (io/reader (io/resource "mthesaur.txt")))
|
||||||
(map #(string/split % #","))
|
(map #(string/split % #","))
|
||||||
(map #(vector (first %) (rest %)))
|
(map #(vector (first %) (rest %)))
|
||||||
(into {})))
|
(into {})))
|
||||||
|
|
||||||
|
(defn synonyms
|
||||||
|
([& words]
|
||||||
|
(->> words
|
||||||
|
(map string/lower-case)
|
||||||
|
(map #(get thesaurus %))
|
||||||
|
(map (fn [ws] (into #{} ws)))
|
||||||
|
(apply clojure.set/intersection))))
|
||||||
|
@ -0,0 +1,76 @@
|
|||||||
|
(ns com.owoga.prhyme.limerick
|
||||||
|
(:require [com.owoga.prhyme.gen :as gen]
|
||||||
|
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
|
||||||
|
[clojure.string :as string]
|
||||||
|
[com.owoga.prhyme.core :as prhyme]
|
||||||
|
[com.owoga.prhyme.util :as util]))
|
||||||
|
|
||||||
|
(defn rhyme-from-scheme
|
||||||
|
"scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
|
||||||
|
[words markov scheme]
|
||||||
|
(loop [scheme scheme
|
||||||
|
rhymes {}
|
||||||
|
result []]
|
||||||
|
(cond
|
||||||
|
(empty? scheme) result
|
||||||
|
:else
|
||||||
|
(let [[pattern syllable-count] (first scheme)
|
||||||
|
banned-words (into #{} (->> result
|
||||||
|
(map #(string/split % #" "))
|
||||||
|
(map #(last %))))
|
||||||
|
adj (util/comp-rnil
|
||||||
|
(weighted-selection/adjust-for-markov
|
||||||
|
markov
|
||||||
|
0.99)
|
||||||
|
(when (rhymes pattern)
|
||||||
|
(weighted-selection/adjust-for-tail-rhyme 0.99)))
|
||||||
|
rhyme (if (nil? (get rhymes pattern))
|
||||||
|
(gen/gen-sentence-with-syllable-count
|
||||||
|
adj
|
||||||
|
syllable-count
|
||||||
|
words)
|
||||||
|
(gen/gen-rhyme-with-syllable-count
|
||||||
|
adj
|
||||||
|
syllable-count
|
||||||
|
(remove #(banned-words (:normalized-word %)) words)
|
||||||
|
(prhyme/phrase->word words (get rhymes pattern))))]
|
||||||
|
(recur (rest scheme)
|
||||||
|
(assoc rhymes pattern rhyme)
|
||||||
|
(conj result rhyme))))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(require '[com.owoga.prhyme.data.dictionary :as dict]
|
||||||
|
'[com.owoga.prhyme.data.darklyrics :refer [darklyrics-markov-2]]
|
||||||
|
'[clojure.java.io :as io])
|
||||||
|
(rhyme-from-scheme dict/prhyme-dict darklyrics-markov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
(comment
|
||||||
|
["bishop larch smitten us dwell"
|
||||||
|
"solely first week in hell"
|
||||||
|
"and take that for three"
|
||||||
|
"come wrapped in glory"
|
||||||
|
"you ever leave it so well"]
|
||||||
|
["romancing realized too late"
|
||||||
|
"my crown revive my withered state"
|
||||||
|
"reign is obsolete"
|
||||||
|
"i sit in the street"
|
||||||
|
"but nobody cares of my fate"]
|
||||||
|
["flesh is hacked to get me sedate"
|
||||||
|
"demonstration obsessed with hate"
|
||||||
|
"justice will be written in stone"
|
||||||
|
"and you will be shown"
|
||||||
|
"bedrooms of icons suffocate"]
|
||||||
|
["you will bow to their hungry gods"
|
||||||
|
"come will come whatever the odds"
|
||||||
|
"now we see the light"
|
||||||
|
"you can't put it right"
|
||||||
|
"recklessly chopping firing squads"]
|
||||||
|
["untimely they fool their poor life"
|
||||||
|
"it wither away with this knife"
|
||||||
|
"hate is my virtue"
|
||||||
|
"my feelings are well overdue"
|
||||||
|
"war we await the afterlife"])
|
||||||
|
|
||||||
|
|
@ -1,86 +0,0 @@
|
|||||||
(ns com.owoga.prhyme.lymeric
|
|
||||||
(:require [com.owoga.prhyme.gen :as gen]
|
|
||||||
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
|
|
||||||
[clojure.string :as string]
|
|
||||||
[com.owoga.prhyme.core :as prhyme]
|
|
||||||
[com.owoga.prhyme.frp :as frp]
|
|
||||||
[com.owoga.prhyme.util :as util]
|
|
||||||
[taoensso.nippy :as nippy]
|
|
||||||
[clojure.java.io :as io]))
|
|
||||||
|
|
||||||
|
|
||||||
(defn rhyme-from-scheme
|
|
||||||
"scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
|
|
||||||
[rhymer markov scheme]
|
|
||||||
(let [base-words (map #(assoc % :weight 1) frp/popular)]
|
|
||||||
(loop [scheme scheme
|
|
||||||
rhymes {}
|
|
||||||
result []]
|
|
||||||
(cond
|
|
||||||
(empty? scheme) result
|
|
||||||
:else
|
|
||||||
(let [[pattern syllable-count] (first scheme)
|
|
||||||
banned-words (into #{} (->> result
|
|
||||||
(map #(string/split % #" "))
|
|
||||||
(map #(last %))))
|
|
||||||
adj (util/comp-rnil
|
|
||||||
(weighted-selection/adjust-for-markov
|
|
||||||
markov
|
|
||||||
0.99)
|
|
||||||
(when (rhymes pattern)
|
|
||||||
(weighted-selection/adjust-for-tail-rhyme 0.99)))
|
|
||||||
rhyme (if (nil? (get rhymes pattern))
|
|
||||||
(gen/gen-sentence-with-syllable-count
|
|
||||||
adj
|
|
||||||
syllable-count
|
|
||||||
base-words)
|
|
||||||
(gen/gen-rhyme-with-syllable-count
|
|
||||||
adj
|
|
||||||
syllable-count
|
|
||||||
(remove #(banned-words (:norm-word %))
|
|
||||||
base-words)
|
|
||||||
(prhyme/phrase->word frp/words (get rhymes pattern))))]
|
|
||||||
(recur (rest scheme)
|
|
||||||
(assoc rhymes pattern rhyme)
|
|
||||||
(conj result rhyme)))))))
|
|
||||||
|
|
||||||
(comment
|
|
||||||
(time (def darkov-2 (nippy/thaw-from-file (io/resource "darkov-2.bin"))))
|
|
||||||
(rhyme-from-scheme nil darkov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
(comment
|
|
||||||
(->> (gen/selection-seq
|
|
||||||
(map #(assoc % :weight 1) frp/words)
|
|
||||||
(weighted-selection/adjust-for-rhymes 0.99)
|
|
||||||
(prhyme/phrase->word frp/words "hi there my boy"))
|
|
||||||
(take 3))
|
|
||||||
|
|
||||||
["bishop larch smitten us dwell"
|
|
||||||
"solely first week in hell"
|
|
||||||
"and take that for three"
|
|
||||||
"come wrapped in glory"
|
|
||||||
"you ever leave it so well"]
|
|
||||||
["romancing realized too late"
|
|
||||||
"my crown revive my withered state"
|
|
||||||
"reign is obsolete"
|
|
||||||
"i sit in the street"
|
|
||||||
"but nobody cares of my fate"]
|
|
||||||
["flesh is hacked to get me sedate"
|
|
||||||
"demonstration obsessed with hate"
|
|
||||||
"justice will be written in stone"
|
|
||||||
"and you will be shown"
|
|
||||||
"bedrooms of icons suffocate"]
|
|
||||||
["you will bow to their hungry gods"
|
|
||||||
"come will come whatever the odds"
|
|
||||||
"now we see the light"
|
|
||||||
"you can't put it right"
|
|
||||||
"recklessly chopping firing squads"]
|
|
||||||
["untimely they fool their poor life"
|
|
||||||
"it wither away with this knife"
|
|
||||||
"hate is my virtue"
|
|
||||||
"my feelings are well overdue"
|
|
||||||
"war we await the afterlife"])
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,81 @@
|
|||||||
|
(ns com.owoga.prhyme.nlp.tag-sets.treebank-ii)
|
||||||
|
|
||||||
|
;;;; Penn Treebank II Constituent Tags
|
||||||
|
;;;; http://www.surdeanu.info/mihai/teaching/ista555-fall13/readings/PennTreebankConstituents.html#MD
|
||||||
|
(def clauses
|
||||||
|
{'S "simple declarative clause, i.e. one that is not introduced by a (possible empty) subordinating conjunction or a wh-word and that does not exhibit subject-verb inversion."
|
||||||
|
'SBAR "Clause introduced by a (possibly empty) subordinating conjunction."
|
||||||
|
'SBARQ "Direct question introduced by a wh-word or a wh-phrase. Indirect questions and relative clauses should be bracketed as SBAR, not SBARQ."
|
||||||
|
'SINV "Inverted declarative sentence, i.e. one in which the subject follows the tensed verb or modal."
|
||||||
|
'SQ "Inverted yes/no question, or main clause of a wh-question, following the wh-phrase in SBARQ."})
|
||||||
|
|
||||||
|
(def phrases
|
||||||
|
{'ADJP "Adjective Phrase."
|
||||||
|
'ADVP "Adverb Phrase."
|
||||||
|
'CONJP "Conjunction Phrase."
|
||||||
|
'FRAG "Fragment."
|
||||||
|
'INTJ "Interjection. Corresponds approximately to the part-of-speech tag UH."
|
||||||
|
'LST "List marker. Includes surrounding punctuation."
|
||||||
|
'NAC "Not a Constituent; used to show the scope of certain prenominal modifiers within an NP."
|
||||||
|
'NP "Noun Phrase."
|
||||||
|
'NX "Used within certain complex NPs to mark the head of the NP. Corresponds very roughly to N-bar level but used quite differently."
|
||||||
|
'PP "Prepositional Phrase."
|
||||||
|
'PRN "Parenthetical."
|
||||||
|
'PRT "Particle. Category for words that should be tagged RP."
|
||||||
|
'QP "Quantifier Phrase (i.e. complex measure/amount phrase); used within NP."
|
||||||
|
'RRC "Reduced Relative Clause."
|
||||||
|
'UCP "Unlike Coordinated Phrase."
|
||||||
|
'VP "Vereb Phrase."
|
||||||
|
'WHADJP "Wh-adjective Phrase. Adjectival phrase containing a wh-adverb, as in how hot."
|
||||||
|
'WHAVP "Wh-adverb Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing a wh-adverb such as how or why."
|
||||||
|
'WHNP "Wh-noun Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing some wh-word, e.g. who, which book, whose daughter, none of which, or how many leopards."
|
||||||
|
'WHPP "Wh-prepositional Phrase. Prepositional phrase containing a wh-noun phrase (such as of which or by whose authority) that either introduces a PP gap or is contained by a WHNP."
|
||||||
|
'X "Unknown, uncertain, or unbracketable. X is often used for bracketing typos and in bracketing the...the-constructions."})
|
||||||
|
|
||||||
|
(def words
|
||||||
|
{'CC "Coordinating conjunction"
|
||||||
|
'CD "Cardinal number"
|
||||||
|
'DT "Determiner"
|
||||||
|
'EX "Existential there"
|
||||||
|
'FW "Foreign word"
|
||||||
|
'IN "Preposition or subordinating conjunction"
|
||||||
|
'JJ "Adjective"
|
||||||
|
'JJR "Adjective, comparative"
|
||||||
|
'JJS "Adjective, superlative"
|
||||||
|
'LS "List item marker"
|
||||||
|
'MD "Modal"
|
||||||
|
'NN "Noun, singular or mass"
|
||||||
|
'NNS "Noun, plural"
|
||||||
|
'NNP "Proper noun, singular"
|
||||||
|
'NNPS "Proper noun, plural"
|
||||||
|
'PDT "Predeterminer"
|
||||||
|
'POS "Possessive ending"
|
||||||
|
'PRP "Personal pronoun"
|
||||||
|
'PRP$ "Possessive pronoun (prolog version PRP-S)"
|
||||||
|
'RB "Adverb"
|
||||||
|
'RBR "Adverb, comparative"
|
||||||
|
'RBS "Adverb, superlative"
|
||||||
|
'RP "Particle"
|
||||||
|
'SYM "Symbol"
|
||||||
|
'TO "to"
|
||||||
|
'UH "Interjection"
|
||||||
|
'VB "Verb, base form"
|
||||||
|
'VBD "Verb, past tense"
|
||||||
|
'VBG "Verb, gerund or present participle"
|
||||||
|
'VBN "Verb, past participle"
|
||||||
|
'VBP "Verb, non-3rd person singular present"
|
||||||
|
'VBZ "Verb, 3rd person singular present"
|
||||||
|
'WDT "Wh-determiner"
|
||||||
|
'WP "Wh-pronoun"
|
||||||
|
'WP$ "Possessive wh-pronoun (prolog version WP-S)"
|
||||||
|
'WRB "Wh-adverb"})
|
||||||
|
|
||||||
|
(def form-function-discrepencies
|
||||||
|
{'-ADV "(adverbial) - marks a constituent
|
||||||
|
other than ADVP or PP when it is used adverbially (e.g. NPs or
|
||||||
|
free (\"headless\" relatives). However, constituents that themselves are
|
||||||
|
modifying an ADVP generally do not get -ADV. If a more specific tag is
|
||||||
|
available (for example, -TMP) then it is used alone and -ADV is implied. See
|
||||||
|
the Adverbials section."
|
||||||
|
'-NOM "(nominal) - marks free (\"headless\") relatives
|
||||||
|
and gerunds when they act nominally."})
|
Loading…
Reference in New Issue