Add example code, more nlp options
parent
f0ea2bc513
commit
86546c7d01
@ -0,0 +1,192 @@
|
||||
(ns examples.core
|
||||
(:require [clojure.string :as string]
|
||||
[clojure.set]
|
||||
[com.owoga.prhyme.frp :as frp]
|
||||
[com.owoga.prhyme.core :as prhyme]
|
||||
[com.owoga.prhyme.data.bigrams :as bigrams]
|
||||
[com.owoga.prhyme.gen :as gen]
|
||||
[com.owoga.prhyme.data.dictionary :as dict]
|
||||
[com.owoga.prhyme.data.thesaurus :as thesaurus]
|
||||
[com.owoga.prhyme.data.darklyrics :as darklyrics]
|
||||
[com.owoga.prhyme.generation.weighted-selection :as weighted]
|
||||
[clojure.set :as set]
|
||||
[clojure.zip :as zip]))
|
||||
|
||||
(defn weight-fn [word target result]
|
||||
(let [rimes (frp/consecutive-matching word target :rimes)
|
||||
nuclei (frp/consecutive-matching word target :nuclei)
|
||||
onsets (frp/consecutive-matching word target :onsets)
|
||||
total (apply + (map count [rimes nuclei onsets]))]
|
||||
total))
|
||||
|
||||
(defn pred-fn [word target result]
|
||||
(< 0 (weight-fn word target result)))
|
||||
|
||||
(defn weight-popular [word target result]
|
||||
(if (dict/popular (:normalized-word word))
|
||||
10
|
||||
1))
|
||||
|
||||
(defn pred-popular [word target result]
|
||||
(< 1 (weight-popular word target result)))
|
||||
|
||||
(def words-by-rime (prhyme/words-by-rime*
|
||||
(filter
|
||||
(fn [[word & _]]
|
||||
(get
|
||||
dict/popular
|
||||
(string/lower-case word)))
|
||||
dict/cmu-dict)))
|
||||
|
||||
(defn rime-1 [target]
|
||||
(let [rime (last (:rimes target))]
|
||||
(fn [x]
|
||||
(= rime (last (:rimes x))))))
|
||||
|
||||
(defn rime-2 [target]
|
||||
(let [rime (last (butlast (:rimes target)))]
|
||||
(fn [x]
|
||||
(= rime (last (butlast (:rimes x)))))))
|
||||
|
||||
(defn rime-member? [coll]
|
||||
(let [coll (into #{} coll)]
|
||||
(fn [x]
|
||||
(coll (:normalized-word x)))))
|
||||
|
||||
(defn rime-compare [& comparators]
|
||||
(let [juxtcomp (apply juxt comparators)]
|
||||
(fn [a b]
|
||||
(let [a (juxtcomp a)
|
||||
b (juxtcomp b)]
|
||||
(compare a b)))))
|
||||
(def c
|
||||
(fn [a b]
|
||||
((rime-compare
|
||||
(rime-1 {:rimes '(1 2)})
|
||||
(rime-2 {:rimes '(1 2)})
|
||||
(rime-member? ["foo" "bar"]))
|
||||
b a)))
|
||||
|
||||
(comment
|
||||
(let [coll [{:rimes '(3 2) :normalized-word "foo"}
|
||||
{:rimes '(1 2) :normalized-word "foo"}
|
||||
{:rimes '(4 5) :normalized-word "foo"}
|
||||
{:rimes '(1 2) :normalized-word "buzz"}]]
|
||||
(sort c coll))
|
||||
|
||||
(let [coll '("woman"
|
||||
"union"
|
||||
"passion"
|
||||
"infatuation"
|
||||
"emotion"
|
||||
"disposition"
|
||||
"communion"
|
||||
"attraction"
|
||||
"affection"
|
||||
"adoration"
|
||||
"admiration")
|
||||
coll (map #(prhyme/phrase->word dict/popular %) coll)
|
||||
target (prhyme/phrase->word dict/popular "devotion")
|
||||
synonyms (thesaurus/synonyms "love" "heart")
|
||||
comparisons (fn [target]
|
||||
(fn [a b]
|
||||
((rime-compare
|
||||
(rime-1 target)
|
||||
(rime-2 target)
|
||||
(rime-member? synonyms))
|
||||
b a)))]
|
||||
(sort (comparisons target) coll))
|
||||
|
||||
)
|
||||
|
||||
(defn rhymestorm [& words]
|
||||
(let [synonyms (->> (apply thesaurus/synonyms words)
|
||||
(filter #(get dict/popular %))
|
||||
(into #{}))
|
||||
comparisons (fn [target]
|
||||
(fn [a b]
|
||||
((rime-compare
|
||||
(rime-1 target)
|
||||
(rime-2 target)
|
||||
(rime-member? synonyms))
|
||||
b a)))]
|
||||
(->> synonyms
|
||||
(map
|
||||
(fn [synonym]
|
||||
(let [word (prhyme/phrase->word dict/prhyme-dict synonym)
|
||||
rhymes (get words-by-rime (last (:rimes word)))]
|
||||
(when rhymes
|
||||
(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
|
||||
rhyming-synonyms (remove #{(:normalized-word word)} (filter synonyms rhyming-words))]
|
||||
[(:normalized-word word) rhyming-synonyms])))))
|
||||
(remove (fn [[_ rhymes]]
|
||||
(empty? rhymes)))
|
||||
(map (fn [[target rhymes]]
|
||||
[target (->> rhymes
|
||||
(map prhyme/phrase->word dict/popular)
|
||||
(sort (comparisons (prhyme/phrase->word dict/popular target)))
|
||||
(map :normalized-word))]))
|
||||
(into {}))))
|
||||
|
||||
(comment
|
||||
(rhymestorm "love")
|
||||
(take 3 (drop 500 dict/prhyme-dict))
|
||||
(take 3 dict/cmu-dict)
|
||||
(take 3 dict/popular)
|
||||
|
||||
(let [node (get-in words-by-rime ['("AH" "V")])]
|
||||
(->> (prhyme/flatten-node node)))
|
||||
|
||||
(let [love-synonyms (thesaurus/thesaurus "love")
|
||||
heart-synonyms (thesaurus/thesaurus "heart")]
|
||||
(->> (clojure.set/intersection
|
||||
(into #{} love-synonyms)
|
||||
(into #{} heart-synonyms))
|
||||
(map string/lower-case)
|
||||
(filter #(dict/popular %))))
|
||||
|
||||
(let [synonyms (thesaurus/synonyms "love" "heart")]
|
||||
synonyms)
|
||||
|
||||
(def love-rhymes
|
||||
(let [synonyms (->> (thesaurus/synonyms "love" "heart")
|
||||
(filter #(get dict/popular %))
|
||||
(into #{}))]
|
||||
(->>
|
||||
(map
|
||||
(fn [synonym]
|
||||
(let [word (prhyme/phrase->word dict/prhyme-dict synonym)
|
||||
rhymes (get words-by-rime (last (:rimes word)))]
|
||||
(when rhymes
|
||||
(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
|
||||
rhyming-synonyms (filter synonyms rhyming-words)]
|
||||
[(:normalized-word word) rhyming-synonyms]))))
|
||||
synonyms)
|
||||
(into {}))))
|
||||
|
||||
(count love-rhymes)
|
||||
(get-in words-by-rime ['("AH" "V")])
|
||||
|
||||
(weight-fn
|
||||
(first (filter #(= (:normalized-word %) "gotshal's") dict/prhyme-dict))
|
||||
(prhyme/phrase->word dict/prhyme-dict "bye bye")
|
||||
nil)
|
||||
|
||||
(take 10 darklyrics/darklyrics-markov-2)
|
||||
(get darklyrics/darklyrics-markov-2 '("memory" "my"))
|
||||
(repeatedly
|
||||
5
|
||||
(fn []
|
||||
(let [rhymes (gen/selection-seq
|
||||
dict/prhyme-dict
|
||||
(comp (weighted/adjust-for-tail-rhyme 0.90)
|
||||
#_(weighted/adjust-for-rhymes 0.50)
|
||||
#_(weighted/adjust-for-fn :adj-rimes 0.80 pred-fn weight-fn)
|
||||
(weighted/adjust-for-fn :adj-popular 0.95 pred-popular weight-popular)
|
||||
(weighted/adjust-for-markov darklyrics/darklyrics-markov-2 0.99))
|
||||
(prhyme/phrase->word dict/prhyme-dict "happy birthday taylor my love"))]
|
||||
(->> rhymes
|
||||
(take 5)
|
||||
(map :normalized-word)))))
|
||||
|
||||
)
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@ -0,0 +1,339 @@
|
||||
(ns com.owoga.corpus.lovecraft
|
||||
(:require [net.cgrand.enlive-html :as html]
|
||||
[clojure.string :as string]
|
||||
[com.owoga.prhyme.util.weighted-rand :as wr]
|
||||
[com.owoga.prhyme.data.dictionary :as dict]
|
||||
[com.owoga.prhyme.util :as util]
|
||||
[com.owoga.prhyme.core :as prhyme]
|
||||
[com.owoga.prhyme.util.nlp :as nlp]
|
||||
[com.owoga.prhyme.gen :as gen]
|
||||
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
|
||||
[taoensso.tufte :as tufte :refer [defnp p profiled profile]]
|
||||
[com.owoga.prhyme.frp :as frp]
|
||||
[clojure.java.io :as io]))
|
||||
|
||||
(tufte/add-basic-println-handler! {})
|
||||
|
||||
(def ^:dynamic *base-url* "https://www.hplovecraft.com/writings/texts/")
|
||||
|
||||
(def words-map
|
||||
(into {} (map #(vector (string/lower-case (:word %)) %) frp/words)))
|
||||
|
||||
(defn fetch-url [url]
|
||||
(html/html-resource (java.net.URL. url)))
|
||||
|
||||
(defn links []
|
||||
(map
|
||||
#(str *base-url* (first (html/attr-values % :href)))
|
||||
(html/select
|
||||
(fetch-url *base-url*)
|
||||
[:li :> [:a (html/attr? :href)]])))
|
||||
|
||||
(defn contentful-sections [nodes]
|
||||
(->> nodes
|
||||
(map html/text)
|
||||
(filter #(> (count %) 100))))
|
||||
|
||||
(defn text-from-link [link]
|
||||
(->> (html/select
|
||||
(fetch-url link)
|
||||
[:body])
|
||||
(first)
|
||||
(html/text)
|
||||
((fn [s] (string/replace s #"[\s\u00A0]+" " ")))))
|
||||
|
||||
(defn cleanup [content]
|
||||
(-> content
|
||||
(string/replace #"Return to.*$" "")
|
||||
(string/replace #"Home.*?This Site" "")
|
||||
(string/replace #"[^a-zA-Z -]+" "")))
|
||||
|
||||
(defn tokens [content]
|
||||
(string/split content #"\s+"))
|
||||
|
||||
(defn append-to-file [filepath text]
|
||||
(with-open [w (io/writer filepath :append true)]
|
||||
(.write w text)))
|
||||
|
||||
(defn scrape []
|
||||
(run!
|
||||
(fn [link]
|
||||
(->> (text-from-link link)
|
||||
(cleanup)
|
||||
(#(str % "\n"))
|
||||
(append-to-file "lovecraft.txt")))
|
||||
(take 10 (links))))
|
||||
|
||||
(defn tokens-from-file [file]
|
||||
(with-open [r (io/reader file)]
|
||||
(tokens (slurp r))))
|
||||
|
||||
(defn window [n]
|
||||
(fn [coll]
|
||||
(cond
|
||||
(empty? coll) []
|
||||
(< (count coll) n) []
|
||||
:else (cons (take n coll)
|
||||
(lazy-seq ((window n) (drop n coll)))))))
|
||||
|
||||
(defn markov [tokens]
|
||||
(->> tokens
|
||||
(map
|
||||
(fn [token]
|
||||
(let [k (butlast token)
|
||||
v (last token)]
|
||||
[k v])))
|
||||
(reduce
|
||||
(fn [a [k v]]
|
||||
(update-in a [k v] (fnil inc 0)))
|
||||
{})))
|
||||
|
||||
(defn running-total
|
||||
([coll]
|
||||
(running-total coll 0))
|
||||
([coll last-val]
|
||||
(cond
|
||||
(empty? coll) nil
|
||||
:else (cons (+ last-val (first coll))
|
||||
(lazy-seq
|
||||
(running-total
|
||||
(rest coll)
|
||||
(+ last-val (first coll))))))))
|
||||
|
||||
(defn weighted-rand [weights]
|
||||
(let [running-weights (running-total weights)
|
||||
rand-val (rand (last running-weights))]
|
||||
(loop [i 0]
|
||||
(if (> (nth running-weights i) rand-val)
|
||||
i
|
||||
(recur (inc i))))))
|
||||
|
||||
(def word-set (into #{} (->> prhyme/words
|
||||
(map first)
|
||||
(map string/lower-case)
|
||||
(map #(string/replace % #"\(\d+\)" "")))))
|
||||
|
||||
(defn normalize-tokens [tokens]
|
||||
(->> tokens
|
||||
(map string/lower-case)
|
||||
(filter word-set)))
|
||||
|
||||
(defn main []
|
||||
(->> (tokens-from-file "lovecraft.txt")
|
||||
(reverse)
|
||||
(normalize-tokens)
|
||||
((window 2))
|
||||
(markov)
|
||||
(into {})))
|
||||
|
||||
(defn synonym?
|
||||
"Given a possibility, like [\"foo\" 3]
|
||||
which says that foo follows a particular key with
|
||||
a weight of 3, a word is a synonym of that possibility
|
||||
if the word is a synonym ."
|
||||
[p synonyms]
|
||||
(synonyms p))
|
||||
|
||||
(defn adjust-for-synonyms
|
||||
"If a word is in a set of synonyms, adjust its weight upwards."
|
||||
[synonyms]
|
||||
(fn [possibilities]
|
||||
(reduce
|
||||
(fn [p s]
|
||||
(if (s p)
|
||||
(update p s #(* 5 %))
|
||||
p))
|
||||
possibilities
|
||||
synonyms)))
|
||||
|
||||
(defn adjust-for-rimes
|
||||
[target-rime dictionary]
|
||||
(fn [possibilities]
|
||||
(into
|
||||
{}
|
||||
(map
|
||||
(fn [[p v]]
|
||||
(let [possibility (get dictionary p)
|
||||
factor (count
|
||||
(frp/consecutive-matching
|
||||
target-rime
|
||||
possibility
|
||||
:rimes))]
|
||||
[p (* v (max 1 (* factor 4)))]))
|
||||
possibilities))))
|
||||
|
||||
|
||||
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
|
||||
|
||||
(defn markov-key [key-fn]
|
||||
(fn [text]
|
||||
(key-fn text)))
|
||||
|
||||
(defn gen-from [m p initial]
|
||||
(loop [r (list initial)]
|
||||
(cond
|
||||
(p r) (recur (cons (m (list (first r))) r))
|
||||
:else r)))
|
||||
|
||||
|
||||
(defn rhyming-words
|
||||
"List of rhyming words sorted by quality of rhyme."
|
||||
[target]
|
||||
(let [target-phrase (->> target
|
||||
(prhyme/phrase->word frp/words)
|
||||
(#(assoc % :rimes? true)))]
|
||||
(->> target-phrase
|
||||
(#(assoc % :rimes? true))
|
||||
(frp/prhyme frp/words)
|
||||
(sort-by
|
||||
#(- (count
|
||||
(frp/consecutive-matching
|
||||
%
|
||||
target-phrase
|
||||
:rimes)))))))
|
||||
|
||||
(defn markov-rhymes [markov-data rhyming-words]
|
||||
(->> (map
|
||||
(fn [word]
|
||||
(->> word
|
||||
:word
|
||||
string/lower-case
|
||||
(#(string/replace % #"\(\d+\)" ""))
|
||||
(#(vector % (get markov-data (list %))))))
|
||||
rhyming-words)
|
||||
(into #{})
|
||||
(remove
|
||||
(fn [[w p]]
|
||||
(nil? p)))))
|
||||
|
||||
(defn adjust-for-over-syllables
|
||||
"Adjust weights to prefer not going over the number
|
||||
of syllables of the target word."
|
||||
[target]
|
||||
(fn [words]
|
||||
(p :adjust-for-syllables
|
||||
(map
|
||||
(fn [word]
|
||||
(if (or (nil? (:syllable-count word))
|
||||
(nil? (:syllables target)))
|
||||
(println word target))
|
||||
(cond
|
||||
(= (:syllable-count word) (count (:syllables target)))
|
||||
(as-> word word
|
||||
(assoc word :weight (* 3 (:weight word)))
|
||||
(assoc word :adjusted-for-syllables-factor 3))
|
||||
|
||||
(< (:syllable-count word) (count (:syllables target)))
|
||||
(as-> word word
|
||||
(assoc word :weight (* 2 (:weight word)))
|
||||
(assoc word :adjusted-for-syllables-factor 2))
|
||||
|
||||
:else
|
||||
(as-> word word
|
||||
(assoc word :weight (* 1 (:weight word)))
|
||||
(assoc word :adjusted-for-syllables-factor 1))))
|
||||
words))))
|
||||
|
||||
(defn adjust-for-membership-1
|
||||
[set_ percent]
|
||||
(let [ratio (- 1 percent)]
|
||||
(fn [words]
|
||||
(let [[members non-members]
|
||||
((juxt filter remove)
|
||||
#(set_ (:normalized-word %))
|
||||
words)
|
||||
weight-non-members (apply + (map :weight non-members))
|
||||
target-weight-members (* ratio weight-non-members)
|
||||
count-members (count members)
|
||||
adjustment-members (/ target-weight-members count-members)]
|
||||
(concat
|
||||
(map
|
||||
(fn [member]
|
||||
(as-> member member
|
||||
(assoc member :weight (* adjustment-members (:weight member)))
|
||||
(assoc member :adjustment-for-membership adjustment-members)))
|
||||
members)
|
||||
non-members)))))
|
||||
|
||||
(defn adjust-for-membership [set_]
|
||||
(fn [words]
|
||||
(map
|
||||
(fn [word]
|
||||
(if (set_ (:normalized-word word))
|
||||
(as-> word word
|
||||
(assoc word :weight (* 2 (:weight word)))
|
||||
(assoc word :adjust-for-membership-factor 2))
|
||||
(assoc word :adjust-for-membership-factor 1)))
|
||||
words)))
|
||||
|
||||
(defn filter-for-membership [set_]
|
||||
(fn [words]
|
||||
(map
|
||||
(fn [word]
|
||||
(if-not (set_ (:normalized-word word))
|
||||
(as-> word word
|
||||
(assoc word :weight (* 0.01 (:weight word)))
|
||||
(assoc word :filter-for-membership-factor 0.01))
|
||||
word))
|
||||
words)))
|
||||
|
||||
(defn adjust-for-markov [markov-options]
|
||||
(let [markov-set (into #{} (map first (keys markov-options)))]
|
||||
(fn [words]
|
||||
(let [result (map
|
||||
(fn [word]
|
||||
(if (markov-set (:normalized-word word))
|
||||
(as-> word word
|
||||
(assoc word :weight (* 100 (:weight word)))
|
||||
(assoc word :adjust-for-markov-factor 100))
|
||||
(assoc word :adjust-for-markov-factor 1)))
|
||||
words)]
|
||||
result))))
|
||||
|
||||
(comment
|
||||
(let [markov-adjuster (adjust-for-markov (lovecraft-markov '("help")))]
|
||||
(take 5 (markov-adjuster frp/words))))
|
||||
|
||||
(defn adjust-for-membership-1
|
||||
[set_ percent]
|
||||
(let [ratio (- 1 percent)]
|
||||
(fn [words]
|
||||
(let [[members non-members]
|
||||
((juxt filter remove)
|
||||
#(set_ (:normalized-word %))
|
||||
words)
|
||||
weight-non-members (apply + (map :weight non-members))
|
||||
target-weight-members (* ratio weight-non-members)
|
||||
count-members (count members)
|
||||
adjustment-members (/ target-weight-members count-members)]
|
||||
(concat
|
||||
(map
|
||||
(fn [member]
|
||||
(as-> member member
|
||||
(assoc member :weight (* adjustment-members (:weight member)))
|
||||
(assoc member :adjustment-for-membership adjustment-members)))
|
||||
members)
|
||||
non-members)))))
|
||||
|
||||
(defn adjust-for-markov-1
|
||||
[markov-options percent]
|
||||
(let [ratio (- 1 percent)]
|
||||
(fn [words]
|
||||
(if (nil? markov-options)
|
||||
words
|
||||
(let [[markovs non-markovs]
|
||||
((juxt filter remove)
|
||||
#(markov-options (:normalized-word %))
|
||||
words)
|
||||
weight-non-markovs (apply + (map :weight non-markovs))
|
||||
target-weight-markovs (* ratio weight-non-markovs)
|
||||
count-markovs (count markovs)
|
||||
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
|
||||
(concat
|
||||
(map
|
||||
(fn [markov]
|
||||
(as-> markov markov
|
||||
(assoc markov :weight (* adjustment-markovs (:weight markov)))
|
||||
(assoc markov :adjustment-for-markov adjustment-markovs)))
|
||||
markovs)
|
||||
non-markovs))))))
|
@ -0,0 +1,63 @@
|
||||
(ns com.owoga.prhyme.data.bigrams
|
||||
(:require [clojure.java.io :as io]
|
||||
[clojure.string :as string]
|
||||
[com.owoga.prhyme.syllabify :refer [syllabify]]
|
||||
[com.owoga.prhyme.data.dictionary :as dict]
|
||||
[com.owoga.prhyme.util :as util]
|
||||
[com.owoga.prhyme.core :as prhyme]))
|
||||
|
||||
(def popular-bigrams
|
||||
(->> (io/resource "bigrams-with-counts.txt")
|
||||
io/reader
|
||||
line-seq
|
||||
(map #(string/split % #"\s+"))
|
||||
(map #(map string/lower-case %))
|
||||
(filter (fn [[w1 w2 num]]
|
||||
(and (dict/popular w1)
|
||||
(dict/popular w2))))
|
||||
(map (partial take 2))))
|
||||
|
||||
(def phrase->Word
|
||||
(prhyme/make-phrase->Word
|
||||
(into
|
||||
{}
|
||||
(map
|
||||
(fn [[word & phonemes]]
|
||||
[(string/lower-case word)
|
||||
phonemes])
|
||||
dict/cmu-dict))))
|
||||
|
||||
(def popular-bigrams-with-syllables
|
||||
(->> popular-bigrams
|
||||
(map (partial string/join " "))
|
||||
(map phrase->Word)))
|
||||
|
||||
(def popular-bigram-words
|
||||
(->> popular-bigrams
|
||||
(map (partial string/join " "))
|
||||
(map phrase->Word)))
|
||||
|
||||
(def popular-mono-and-bi-grams
|
||||
(concat
|
||||
dict/prhyme-dict
|
||||
popular-bigram-words))
|
||||
|
||||
(def popular-mono-and-bi-grams-by-rime
|
||||
(prhyme/prhyme-words-by-rime*
|
||||
popular-mono-and-bi-grams))
|
||||
|
||||
(comment
|
||||
(->> (reverse (:rimes (phrase->Word "beautiful")))
|
||||
(get-in popular-mono-and-bi-grams-by-rime)
|
||||
(prhyme/flatten-node)
|
||||
(remove #(re-matches #".*beautiful" (:word %)))
|
||||
(map :word)
|
||||
(map string/lower-case))
|
||||
(get-in
|
||||
popular-mono-and-bi-grams-by-rime
|
||||
['("AA") '("ER") '("IY")])
|
||||
(take 5 popular-bigrams)
|
||||
(take 5 popular-bigram-words)
|
||||
(take-last 5 popular-bigram-words)
|
||||
(prhyme/phrase->word dict/prhyme-dict "hi there"))
|
||||
(def words-and-bigrams())
|
@ -0,0 +1,39 @@
|
||||
(ns com.owoga.prhyme.data.darklyrics
|
||||
(:require [clojure.java.io :as io]
|
||||
[taoensso.nippy :as nippy])
|
||||
(:import [java.io DataInputStream ByteArrayOutputStream]))
|
||||
|
||||
(defn thaw-from-file
|
||||
"Convenience util: like `thaw`, but reads from `(clojure.java.io/file <file>)`.
|
||||
|
||||
To thaw from a resource on classpath (e.g in Leiningen `resources` dir):
|
||||
(thaw-from-file (clojure.java.io/resource \"my-resource-name.npy\"))
|
||||
|
||||
See also `freeze-to-file`."
|
||||
([file ] (thaw-from-file file nil))
|
||||
([file thaw-opts]
|
||||
(let [xin (io/input-stream file)
|
||||
xout (ByteArrayOutputStream.)]
|
||||
(io/copy xin xout)
|
||||
(nippy/thaw (.toByteArray xout) thaw-opts))))
|
||||
|
||||
;; (thaw-from-file (io/resource "test.bin"))
|
||||
;; (bytes (byte-array (take 20 (range))))
|
||||
|
||||
;; (byte-array (map (comp byte int) "ascii"))
|
||||
;; ;; => [97, 115, 99, 105, 105]
|
||||
;; (bytes (byte-array (map (comp byte int) "ascii")))
|
||||
;; ;; => [97, 115, 99, 105, 105]
|
||||
|
||||
;; (let [xin (io/input-stream (io/resource "test.bin"))
|
||||
;; xout (ByteArrayOutputStream.)]
|
||||
;; (io/copy xin xout)
|
||||
;; (nippy/thaw (.toByteArray xout)))
|
||||
|
||||
;; (.fullyRead (io/input-stream (io/resource "test.bin")))
|
||||
|
||||
;; (.getSize (io/input-stream (io/resource "dark-corpus-2.bin")))
|
||||
;; (def data (into {} (map vec (partition 2 (range 20)))))
|
||||
;; (nippy/freeze-to-file "resources/test.bin" data)
|
||||
(def darklyrics-markov-2
|
||||
(thaw-from-file (io/resource "dark-corpus-2.bin")))
|
@ -1,9 +1,18 @@
|
||||
(ns com.owoga.prhyme.data.thesaurus
|
||||
(:require [clojure.string :as string]
|
||||
[clojure.java.io :as io]))
|
||||
[clojure.java.io :as io]
|
||||
[clojure.set]))
|
||||
|
||||
(def thesaurus
|
||||
(->> (line-seq (io/reader (io/resource "mthesaur.txt")))
|
||||
(map #(string/split % #","))
|
||||
(map #(vector (first %) (rest %)))
|
||||
(into {})))
|
||||
|
||||
(defn synonyms
|
||||
([& words]
|
||||
(->> words
|
||||
(map string/lower-case)
|
||||
(map #(get thesaurus %))
|
||||
(map (fn [ws] (into #{} ws)))
|
||||
(apply clojure.set/intersection))))
|
||||
|
@ -0,0 +1,76 @@
|
||||
(ns com.owoga.prhyme.limerick
|
||||
(:require [com.owoga.prhyme.gen :as gen]
|
||||
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
|
||||
[clojure.string :as string]
|
||||
[com.owoga.prhyme.core :as prhyme]
|
||||
[com.owoga.prhyme.util :as util]))
|
||||
|
||||
(defn rhyme-from-scheme
|
||||
"scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
|
||||
[words markov scheme]
|
||||
(loop [scheme scheme
|
||||
rhymes {}
|
||||
result []]
|
||||
(cond
|
||||
(empty? scheme) result
|
||||
:else
|
||||
(let [[pattern syllable-count] (first scheme)
|
||||
banned-words (into #{} (->> result
|
||||
(map #(string/split % #" "))
|
||||
(map #(last %))))
|
||||
adj (util/comp-rnil
|
||||
(weighted-selection/adjust-for-markov
|
||||
markov
|
||||
0.99)
|
||||
(when (rhymes pattern)
|
||||
(weighted-selection/adjust-for-tail-rhyme 0.99)))
|
||||
rhyme (if (nil? (get rhymes pattern))
|
||||
(gen/gen-sentence-with-syllable-count
|
||||
adj
|
||||
syllable-count
|
||||
words)
|
||||
(gen/gen-rhyme-with-syllable-count
|
||||
adj
|
||||
syllable-count
|
||||
(remove #(banned-words (:normalized-word %)) words)
|
||||
(prhyme/phrase->word words (get rhymes pattern))))]
|
||||
(recur (rest scheme)
|
||||
(assoc rhymes pattern rhyme)
|
||||
(conj result rhyme))))))
|
||||
|
||||
(comment
|
||||
(require '[com.owoga.prhyme.data.dictionary :as dict]
|
||||
'[com.owoga.prhyme.data.darklyrics :refer [darklyrics-markov-2]]
|
||||
'[clojure.java.io :as io])
|
||||
(rhyme-from-scheme dict/prhyme-dict darklyrics-markov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
|
||||
|
||||
)
|
||||
|
||||
(comment
|
||||
["bishop larch smitten us dwell"
|
||||
"solely first week in hell"
|
||||
"and take that for three"
|
||||
"come wrapped in glory"
|
||||
"you ever leave it so well"]
|
||||
["romancing realized too late"
|
||||
"my crown revive my withered state"
|
||||
"reign is obsolete"
|
||||
"i sit in the street"
|
||||
"but nobody cares of my fate"]
|
||||
["flesh is hacked to get me sedate"
|
||||
"demonstration obsessed with hate"
|
||||
"justice will be written in stone"
|
||||
"and you will be shown"
|
||||
"bedrooms of icons suffocate"]
|
||||
["you will bow to their hungry gods"
|
||||
"come will come whatever the odds"
|
||||
"now we see the light"
|
||||
"you can't put it right"
|
||||
"recklessly chopping firing squads"]
|
||||
["untimely they fool their poor life"
|
||||
"it wither away with this knife"
|
||||
"hate is my virtue"
|
||||
"my feelings are well overdue"
|
||||
"war we await the afterlife"])
|
||||
|
||||
|
@ -1,86 +0,0 @@
|
||||
(ns com.owoga.prhyme.lymeric
|
||||
(:require [com.owoga.prhyme.gen :as gen]
|
||||
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
|
||||
[clojure.string :as string]
|
||||
[com.owoga.prhyme.core :as prhyme]
|
||||
[com.owoga.prhyme.frp :as frp]
|
||||
[com.owoga.prhyme.util :as util]
|
||||
[taoensso.nippy :as nippy]
|
||||
[clojure.java.io :as io]))
|
||||
|
||||
|
||||
(defn rhyme-from-scheme
|
||||
"scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
|
||||
[rhymer markov scheme]
|
||||
(let [base-words (map #(assoc % :weight 1) frp/popular)]
|
||||
(loop [scheme scheme
|
||||
rhymes {}
|
||||
result []]
|
||||
(cond
|
||||
(empty? scheme) result
|
||||
:else
|
||||
(let [[pattern syllable-count] (first scheme)
|
||||
banned-words (into #{} (->> result
|
||||
(map #(string/split % #" "))
|
||||
(map #(last %))))
|
||||
adj (util/comp-rnil
|
||||
(weighted-selection/adjust-for-markov
|
||||
markov
|
||||
0.99)
|
||||
(when (rhymes pattern)
|
||||
(weighted-selection/adjust-for-tail-rhyme 0.99)))
|
||||
rhyme (if (nil? (get rhymes pattern))
|
||||
(gen/gen-sentence-with-syllable-count
|
||||
adj
|
||||
syllable-count
|
||||
base-words)
|
||||
(gen/gen-rhyme-with-syllable-count
|
||||
adj
|
||||
syllable-count
|
||||
(remove #(banned-words (:norm-word %))
|
||||
base-words)
|
||||
(prhyme/phrase->word frp/words (get rhymes pattern))))]
|
||||
(recur (rest scheme)
|
||||
(assoc rhymes pattern rhyme)
|
||||
(conj result rhyme)))))))
|
||||
|
||||
(comment
|
||||
(time (def darkov-2 (nippy/thaw-from-file (io/resource "darkov-2.bin"))))
|
||||
(rhyme-from-scheme nil darkov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
|
||||
|
||||
)
|
||||
|
||||
(comment
|
||||
(->> (gen/selection-seq
|
||||
(map #(assoc % :weight 1) frp/words)
|
||||
(weighted-selection/adjust-for-rhymes 0.99)
|
||||
(prhyme/phrase->word frp/words "hi there my boy"))
|
||||
(take 3))
|
||||
|
||||
["bishop larch smitten us dwell"
|
||||
"solely first week in hell"
|
||||
"and take that for three"
|
||||
"come wrapped in glory"
|
||||
"you ever leave it so well"]
|
||||
["romancing realized too late"
|
||||
"my crown revive my withered state"
|
||||
"reign is obsolete"
|
||||
"i sit in the street"
|
||||
"but nobody cares of my fate"]
|
||||
["flesh is hacked to get me sedate"
|
||||
"demonstration obsessed with hate"
|
||||
"justice will be written in stone"
|
||||
"and you will be shown"
|
||||
"bedrooms of icons suffocate"]
|
||||
["you will bow to their hungry gods"
|
||||
"come will come whatever the odds"
|
||||
"now we see the light"
|
||||
"you can't put it right"
|
||||
"recklessly chopping firing squads"]
|
||||
["untimely they fool their poor life"
|
||||
"it wither away with this knife"
|
||||
"hate is my virtue"
|
||||
"my feelings are well overdue"
|
||||
"war we await the afterlife"])
|
||||
|
||||
|
@ -0,0 +1,81 @@
|
||||
(ns com.owoga.prhyme.nlp.tag-sets.treebank-ii)
|
||||
|
||||
;;;; Penn Treebank II Constituent Tags
|
||||
;;;; http://www.surdeanu.info/mihai/teaching/ista555-fall13/readings/PennTreebankConstituents.html#MD
|
||||
(def clauses
|
||||
{'S "simple declarative clause, i.e. one that is not introduced by a (possible empty) subordinating conjunction or a wh-word and that does not exhibit subject-verb inversion."
|
||||
'SBAR "Clause introduced by a (possibly empty) subordinating conjunction."
|
||||
'SBARQ "Direct question introduced by a wh-word or a wh-phrase. Indirect questions and relative clauses should be bracketed as SBAR, not SBARQ."
|
||||
'SINV "Inverted declarative sentence, i.e. one in which the subject follows the tensed verb or modal."
|
||||
'SQ "Inverted yes/no question, or main clause of a wh-question, following the wh-phrase in SBARQ."})
|
||||
|
||||
(def phrases
|
||||
{'ADJP "Adjective Phrase."
|
||||
'ADVP "Adverb Phrase."
|
||||
'CONJP "Conjunction Phrase."
|
||||
'FRAG "Fragment."
|
||||
'INTJ "Interjection. Corresponds approximately to the part-of-speech tag UH."
|
||||
'LST "List marker. Includes surrounding punctuation."
|
||||
'NAC "Not a Constituent; used to show the scope of certain prenominal modifiers within an NP."
|
||||
'NP "Noun Phrase."
|
||||
'NX "Used within certain complex NPs to mark the head of the NP. Corresponds very roughly to N-bar level but used quite differently."
|
||||
'PP "Prepositional Phrase."
|
||||
'PRN "Parenthetical."
|
||||
'PRT "Particle. Category for words that should be tagged RP."
|
||||
'QP "Quantifier Phrase (i.e. complex measure/amount phrase); used within NP."
|
||||
'RRC "Reduced Relative Clause."
|
||||
'UCP "Unlike Coordinated Phrase."
|
||||
'VP "Vereb Phrase."
|
||||
'WHADJP "Wh-adjective Phrase. Adjectival phrase containing a wh-adverb, as in how hot."
|
||||
'WHAVP "Wh-adverb Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing a wh-adverb such as how or why."
|
||||
'WHNP "Wh-noun Phrase. Introduces a clause with an NP gap. May be null (containing the 0 complementizer) or lexical, containing some wh-word, e.g. who, which book, whose daughter, none of which, or how many leopards."
|
||||
'WHPP "Wh-prepositional Phrase. Prepositional phrase containing a wh-noun phrase (such as of which or by whose authority) that either introduces a PP gap or is contained by a WHNP."
|
||||
'X "Unknown, uncertain, or unbracketable. X is often used for bracketing typos and in bracketing the...the-constructions."})
|
||||
|
||||
(def words
|
||||
{'CC "Coordinating conjunction"
|
||||
'CD "Cardinal number"
|
||||
'DT "Determiner"
|
||||
'EX "Existential there"
|
||||
'FW "Foreign word"
|
||||
'IN "Preposition or subordinating conjunction"
|
||||
'JJ "Adjective"
|
||||
'JJR "Adjective, comparative"
|
||||
'JJS "Adjective, superlative"
|
||||
'LS "List item marker"
|
||||
'MD "Modal"
|
||||
'NN "Noun, singular or mass"
|
||||
'NNS "Noun, plural"
|
||||
'NNP "Proper noun, singular"
|
||||
'NNPS "Proper noun, plural"
|
||||
'PDT "Predeterminer"
|
||||
'POS "Possessive ending"
|
||||
'PRP "Personal pronoun"
|
||||
'PRP$ "Possessive pronoun (prolog version PRP-S)"
|
||||
'RB "Adverb"
|
||||
'RBR "Adverb, comparative"
|
||||
'RBS "Adverb, superlative"
|
||||
'RP "Particle"
|
||||
'SYM "Symbol"
|
||||
'TO "to"
|
||||
'UH "Interjection"
|
||||
'VB "Verb, base form"
|
||||
'VBD "Verb, past tense"
|
||||
'VBG "Verb, gerund or present participle"
|
||||
'VBN "Verb, past participle"
|
||||
'VBP "Verb, non-3rd person singular present"
|
||||
'VBZ "Verb, 3rd person singular present"
|
||||
'WDT "Wh-determiner"
|
||||
'WP "Wh-pronoun"
|
||||
'WP$ "Possessive wh-pronoun (prolog version WP-S)"
|
||||
'WRB "Wh-adverb"})
|
||||
|
||||
(def form-function-discrepencies
|
||||
{'-ADV "(adverbial) - marks a constituent
|
||||
other than ADVP or PP when it is used adverbially (e.g. NPs or
|
||||
free (\"headless\" relatives). However, constituents that themselves are
|
||||
modifying an ADVP generally do not get -ADV. If a more specific tag is
|
||||
available (for example, -TMP) then it is used alone and -ADV is implied. See
|
||||
the Adverbials section."
|
||||
'-NOM "(nominal) - marks free (\"headless\") relatives
|
||||
and gerunds when they act nominally."})
|
Loading…
Reference in New Issue