You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
286 lines
9.1 KiB
Clojure
286 lines
9.1 KiB
Clojure
(ns examples.core
|
|
(:require [clojure.string :as string]
|
|
[clojure.set]
|
|
[clojure.java.io :as io]
|
|
[com.owoga.prhyme.frp :as frp]
|
|
[com.owoga.prhyme.util :as util]
|
|
[com.owoga.prhyme.core :as prhyme]
|
|
[com.owoga.prhyme.data.bigrams :as bigrams]
|
|
[com.owoga.prhyme.gen :as gen]
|
|
[com.owoga.prhyme.nlp.core :as nlp]
|
|
[com.owoga.prhyme.nlg.core :as nlg]
|
|
[com.owoga.prhyme.data.dictionary :as dict]
|
|
[com.owoga.prhyme.data.thesaurus :as thesaurus]
|
|
[com.owoga.prhyme.data.darklyrics :as darklyrics]
|
|
[com.owoga.prhyme.generation.weighted-selection :as weighted]
|
|
[clojure.set :as set]
|
|
[clojure.zip :as zip]
|
|
[clojure.walk :as walk]))
|
|
|
|
(defn weight-fn [word target result]
|
|
(let [rimes (frp/consecutive-matching word target :rimes)
|
|
nuclei (frp/consecutive-matching word target :nuclei)
|
|
onsets (frp/consecutive-matching word target :onsets)
|
|
total (apply + (map count [rimes nuclei onsets]))]
|
|
total))
|
|
|
|
(defn pred-fn [word target result]
|
|
(< 0 (weight-fn word target result)))
|
|
|
|
(defn weight-popular [word target result]
|
|
(if (dict/popular (:normalized-word word))
|
|
10
|
|
1))
|
|
|
|
(defn pred-popular [word target result]
|
|
(< 1 (weight-popular word target result)))
|
|
|
|
(def words-by-rime (prhyme/words-by-rime*
|
|
(filter
|
|
(fn [[word & _]]
|
|
(get
|
|
dict/popular
|
|
(string/lower-case word)))
|
|
dict/cmu-dict)))
|
|
|
|
(defn rime-1 [target]
|
|
(let [rime (last (:rimes target))]
|
|
(fn [x]
|
|
(= rime (last (:rimes x))))))
|
|
|
|
(defn rime-2 [target]
|
|
(let [rime (last (butlast (:rimes target)))]
|
|
(fn [x]
|
|
(= rime (last (butlast (:rimes x)))))))
|
|
|
|
(defn rime-member? [coll]
|
|
(let [coll (into #{} coll)]
|
|
(fn [x]
|
|
(coll (:normalized-word x)))))
|
|
|
|
(defn rime-compare [& comparators]
|
|
(let [juxtcomp (apply juxt comparators)]
|
|
(fn [a b]
|
|
(let [a (juxtcomp a)
|
|
b (juxtcomp b)]
|
|
(compare a b)))))
|
|
(def c
|
|
(fn [a b]
|
|
((rime-compare
|
|
(rime-1 {:rimes '(1 2)})
|
|
(rime-2 {:rimes '(1 2)})
|
|
(rime-member? ["foo" "bar"]))
|
|
b a)))
|
|
|
|
(comment
|
|
(let [coll [{:rimes '(3 2) :normalized-word "foo"}
|
|
{:rimes '(1 2) :normalized-word "foo"}
|
|
{:rimes '(4 5) :normalized-word "foo"}
|
|
{:rimes '(1 2) :normalized-word "buzz"}]]
|
|
(sort c coll))
|
|
|
|
(let [coll '("woman"
|
|
"union"
|
|
"passion"
|
|
"infatuation"
|
|
"emotion"
|
|
"disposition"
|
|
"communion"
|
|
"attraction"
|
|
"affection"
|
|
"adoration"
|
|
"admiration")
|
|
coll (map #(prhyme/phrase->word dict/popular %) coll)
|
|
target (prhyme/phrase->word dict/popular "devotion")
|
|
synonyms (thesaurus/synonyms "love" "heart")
|
|
comparisons (fn [target]
|
|
(fn [a b]
|
|
((rime-compare
|
|
(rime-1 target)
|
|
(rime-2 target)
|
|
(rime-member? synonyms))
|
|
b a)))]
|
|
(sort (comparisons target) coll))
|
|
|
|
)
|
|
|
|
(defn rhymestorm [& words]
|
|
(let [synonyms (->> (apply thesaurus/synonyms words)
|
|
(filter #(get dict/popular %))
|
|
(into #{}))
|
|
comparisons (fn [target]
|
|
(fn [a b]
|
|
((rime-compare
|
|
(rime-1 target)
|
|
(rime-2 target)
|
|
(rime-member? synonyms))
|
|
b a)))]
|
|
(->> synonyms
|
|
(map
|
|
(fn [synonym]
|
|
(let [word (prhyme/phrase->word dict/prhyme-dict synonym)
|
|
rhymes (get words-by-rime (last (:rimes word)))]
|
|
(when rhymes
|
|
(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
|
|
rhyming-synonyms (remove #{(:normalized-word word)} (filter synonyms rhyming-words))]
|
|
[(:normalized-word word) rhyming-synonyms])))))
|
|
(remove (fn [[_ rhymes]]
|
|
(empty? rhymes)))
|
|
(map (fn [[target rhymes]]
|
|
[target (->> rhymes
|
|
(map prhyme/phrase->word dict/popular)
|
|
(sort (comparisons (prhyme/phrase->word dict/popular target)))
|
|
(map :normalized-word))]))
|
|
(into {}))))
|
|
|
|
(comment
|
|
(rhymestorm "love")
|
|
(take 3 (drop 500 dict/prhyme-dict))
|
|
(take 3 dict/cmu-dict)
|
|
(take 3 dict/popular)
|
|
|
|
(let [node (get-in words-by-rime ['("AH" "V")])]
|
|
(->> (prhyme/flatten-node node)))
|
|
|
|
(let [love-synonyms (thesaurus/thesaurus "love")
|
|
heart-synonyms (thesaurus/thesaurus "heart")]
|
|
(->> (clojure.set/intersection
|
|
(into #{} love-synonyms)
|
|
(into #{} heart-synonyms))
|
|
(map string/lower-case)
|
|
(filter #(dict/popular %))))
|
|
|
|
(let [synonyms (thesaurus/synonyms "love" "heart")]
|
|
synonyms)
|
|
|
|
(def love-rhymes
|
|
(let [synonyms (->> (thesaurus/synonyms "love" "heart")
|
|
(filter #(get dict/popular %))
|
|
(into #{}))]
|
|
(->>
|
|
(map
|
|
(fn [synonym]
|
|
(let [word (prhyme/phrase->word dict/prhyme-dict synonym)
|
|
rhymes (get words-by-rime (last (:rimes word)))]
|
|
(when rhymes
|
|
(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
|
|
rhyming-synonyms (filter synonyms rhyming-words)]
|
|
[(:normalized-word word) rhyming-synonyms]))))
|
|
synonyms)
|
|
(into {}))))
|
|
|
|
(count love-rhymes)
|
|
(get-in words-by-rime ['("AH" "V")])
|
|
|
|
(weight-fn
|
|
(first (filter #(= (:normalized-word %) "gotshal's") dict/prhyme-dict))
|
|
(prhyme/phrase->word dict/prhyme-dict "bye bye")
|
|
nil)
|
|
|
|
(take 10 darklyrics/darklyrics-markov-2)
|
|
(get darklyrics/darklyrics-markov-2 '("memory" "my"))
|
|
(repeatedly
|
|
5
|
|
(fn []
|
|
(let [rhymes (gen/selection-seq
|
|
dict/prhyme-dict
|
|
(comp (weighted/adjust-for-tail-rhyme 0.90)
|
|
#_(weighted/adjust-for-rhymes 0.50)
|
|
#_(weighted/adjust-for-fn :adj-rimes 0.80 pred-fn weight-fn)
|
|
(weighted/adjust-for-fn :adj-popular 0.95 pred-popular weight-popular)
|
|
(weighted/adjust-for-markov darklyrics/darklyrics-markov-2 0.99))
|
|
(prhyme/phrase->word dict/prhyme-dict "happy birthday taylor my love"))]
|
|
(->> rhymes
|
|
(take 5)
|
|
(map :normalized-word)))))
|
|
|
|
)
|
|
|
|
(defn remove-sentences-with-words-not-in-dictionary [dictionary]
|
|
(let [dictionary (into #{} dictionary)]
|
|
(fn [sentences]
|
|
(->> sentences
|
|
(map #(string/split % #" "))
|
|
(remove #(some (complement dictionary) %))
|
|
(remove #(some string/blank? %))
|
|
(map #(string/join " " %))))))
|
|
|
|
(defn dark-pos-freqs []
|
|
(let [directory "dark-corpus"]
|
|
(->> (file-seq (io/file directory))
|
|
(remove #(.isDirectory %))
|
|
(drop 10)
|
|
(take 10)
|
|
(map slurp)
|
|
(map util/clean-text)
|
|
(filter dict/english?)
|
|
(map #(string/split % #"\n+"))
|
|
(map (remove-sentences-with-words-not-in-dictionary dict/popular))
|
|
(map nlp/treebank-zipper)
|
|
(map nlp/leaf-pos-path-word-freqs)
|
|
(apply nlp/deep-merge-with +))))
|
|
|
|
(defn dark-structures []
|
|
(let [directory "dark-corpus"]
|
|
(->> (file-seq (io/file directory))
|
|
(remove #(.isDirectory %))
|
|
(take 1000)
|
|
(map slurp)
|
|
(map util/clean-text)
|
|
(filter dict/english?)
|
|
(map #(string/split % #"\n+"))
|
|
(map #(remove string/blank? %))
|
|
(map nlp/parse-to-simple-tree)
|
|
(map nlp/parse-tree-sans-leaf-words)
|
|
(map
|
|
(fn [lines]
|
|
(map
|
|
(fn [line]
|
|
(hash-map line 1))
|
|
lines)))
|
|
(map (partial merge-with +))
|
|
flatten
|
|
(apply merge-with +))))
|
|
|
|
(comment
|
|
(time (def example-pos-freqs (dark-pos-freqs)))
|
|
|
|
example-pos-freqs
|
|
|
|
(take 20 example-pos-freqs)
|
|
(time (def example-structures (dark-structures)))
|
|
|
|
(def common-example-structures
|
|
(filter
|
|
#(< 10 (second %))
|
|
example-structures))
|
|
(count common-example-structures)
|
|
(let [structure (rand-nth (seq common-example-structures))
|
|
zipper (zip/seq-zip (first structure))]
|
|
(loop [zipper zipper]
|
|
(let [path (map first (zip/path zipper))]
|
|
(cond
|
|
(zip/end? zipper) (zip/root zipper)
|
|
(and (not-empty path)
|
|
(example-pos-freqs path))
|
|
(recur
|
|
(-> zipper
|
|
zip/up
|
|
(zip/append-child
|
|
(first
|
|
(rand-nth
|
|
(seq
|
|
(example-pos-freqs path)))))
|
|
zip/down
|
|
zip/next
|
|
zip/next))
|
|
:else (recur (zip/next zipper))))))
|
|
|
|
(get-in {:a 1} '())
|
|
(let [zipper (zip/seq-zip '(TOP (S (NP) (VB))))]
|
|
(-> zipper
|
|
zip/down
|
|
zip/right
|
|
zip/node))
|
|
)
|