|
|
@ -12,6 +12,7 @@
|
|
|
|
[com.owoga.prhyme.data.dictionary :as dict]
|
|
|
|
[com.owoga.prhyme.data.dictionary :as dict]
|
|
|
|
[com.owoga.prhyme.data.thesaurus :as thesaurus]
|
|
|
|
[com.owoga.prhyme.data.thesaurus :as thesaurus]
|
|
|
|
[com.owoga.prhyme.data.darklyrics :as darklyrics]
|
|
|
|
[com.owoga.prhyme.data.darklyrics :as darklyrics]
|
|
|
|
|
|
|
|
[com.owoga.prhyme.util.weighted-rand :as weighted-rand]
|
|
|
|
[com.owoga.prhyme.generation.weighted-selection :as weighted]
|
|
|
|
[com.owoga.prhyme.generation.weighted-selection :as weighted]
|
|
|
|
[clojure.set :as set]
|
|
|
|
[clojure.set :as set]
|
|
|
|
[clojure.zip :as zip]
|
|
|
|
[clojure.zip :as zip]
|
|
|
@ -193,7 +194,6 @@
|
|
|
|
(->> rhymes
|
|
|
|
(->> rhymes
|
|
|
|
(take 5)
|
|
|
|
(take 5)
|
|
|
|
(map :normalized-word)))))
|
|
|
|
(map :normalized-word)))))
|
|
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
(defn remove-sentences-with-words-not-in-dictionary [dictionary]
|
|
|
|
(defn remove-sentences-with-words-not-in-dictionary [dictionary]
|
|
|
@ -209,13 +209,14 @@
|
|
|
|
(let [directory "dark-corpus"]
|
|
|
|
(let [directory "dark-corpus"]
|
|
|
|
(->> (file-seq (io/file directory))
|
|
|
|
(->> (file-seq (io/file directory))
|
|
|
|
(remove #(.isDirectory %))
|
|
|
|
(remove #(.isDirectory %))
|
|
|
|
(drop 10)
|
|
|
|
(take 1000)
|
|
|
|
(take 10)
|
|
|
|
|
|
|
|
(map slurp)
|
|
|
|
(map slurp)
|
|
|
|
(map util/clean-text)
|
|
|
|
(map util/clean-text)
|
|
|
|
(filter dict/english?)
|
|
|
|
(filter dict/english?)
|
|
|
|
(map #(string/split % #"\n+"))
|
|
|
|
(map #(string/split % #"\n+"))
|
|
|
|
(map (remove-sentences-with-words-not-in-dictionary dict/popular))
|
|
|
|
(map (remove-sentences-with-words-not-in-dictionary dict/popular))
|
|
|
|
|
|
|
|
(remove empty?)
|
|
|
|
|
|
|
|
(remove #(some empty? %))
|
|
|
|
(map nlp/treebank-zipper)
|
|
|
|
(map nlp/treebank-zipper)
|
|
|
|
(map nlp/leaf-pos-path-word-freqs)
|
|
|
|
(map nlp/leaf-pos-path-word-freqs)
|
|
|
|
(apply nlp/deep-merge-with +))))
|
|
|
|
(apply nlp/deep-merge-with +))))
|
|
|
@ -224,12 +225,14 @@
|
|
|
|
(let [directory "dark-corpus"]
|
|
|
|
(let [directory "dark-corpus"]
|
|
|
|
(->> (file-seq (io/file directory))
|
|
|
|
(->> (file-seq (io/file directory))
|
|
|
|
(remove #(.isDirectory %))
|
|
|
|
(remove #(.isDirectory %))
|
|
|
|
(take 1000)
|
|
|
|
(take 500)
|
|
|
|
(map slurp)
|
|
|
|
(map slurp)
|
|
|
|
(map util/clean-text)
|
|
|
|
(map util/clean-text)
|
|
|
|
(filter dict/english?)
|
|
|
|
(filter dict/english?)
|
|
|
|
(map #(string/split % #"\n+"))
|
|
|
|
(map #(string/split % #"\n+"))
|
|
|
|
(map #(remove string/blank? %))
|
|
|
|
(map (remove-sentences-with-words-not-in-dictionary dict/popular))
|
|
|
|
|
|
|
|
(remove empty?)
|
|
|
|
|
|
|
|
(remove #(some empty? %))
|
|
|
|
(map nlp/parse-to-simple-tree)
|
|
|
|
(map nlp/parse-to-simple-tree)
|
|
|
|
(map nlp/parse-tree-sans-leaf-words)
|
|
|
|
(map nlp/parse-tree-sans-leaf-words)
|
|
|
|
(map
|
|
|
|
(map
|
|
|
@ -242,39 +245,55 @@
|
|
|
|
flatten
|
|
|
|
flatten
|
|
|
|
(apply merge-with +))))
|
|
|
|
(apply merge-with +))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(defn weighted-selection-from-map [m]
|
|
|
|
|
|
|
|
(first (weighted-rand/weighted-selection second (seq m))))
|
|
|
|
|
|
|
|
|
|
|
|
(comment
|
|
|
|
(comment
|
|
|
|
(time (def example-pos-freqs (dark-pos-freqs)))
|
|
|
|
(time (def example-pos-freqs (dark-pos-freqs)))
|
|
|
|
|
|
|
|
|
|
|
|
example-pos-freqs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
(take 20 example-pos-freqs)
|
|
|
|
|
|
|
|
(time (def example-structures (dark-structures)))
|
|
|
|
(time (def example-structures (dark-structures)))
|
|
|
|
|
|
|
|
|
|
|
|
(def common-example-structures
|
|
|
|
(let [structure (weighted-selection-from-map example-structures)]
|
|
|
|
(filter
|
|
|
|
(repeatedly
|
|
|
|
#(< 10 (second %))
|
|
|
|
10
|
|
|
|
example-structures))
|
|
|
|
(fn []
|
|
|
|
(count common-example-structures)
|
|
|
|
(->> (nlp/generate-from-structure-and-pos-freqs
|
|
|
|
(let [structure (rand-nth (seq common-example-structures))
|
|
|
|
structure
|
|
|
|
zipper (zip/seq-zip (first structure))]
|
|
|
|
example-pos-freqs)
|
|
|
|
(loop [zipper zipper]
|
|
|
|
nlp/leaf-nodes
|
|
|
|
(let [path (map first (zip/path zipper))]
|
|
|
|
(string/join " ")))))
|
|
|
|
(cond
|
|
|
|
;; => ("then get your life"
|
|
|
|
(zip/end? zipper) (zip/root zipper)
|
|
|
|
;; "sometimes lie my hand"
|
|
|
|
(and (not-empty path)
|
|
|
|
;; "still become your chapter"
|
|
|
|
(example-pos-freqs path))
|
|
|
|
;; "alright fade our surfing"
|
|
|
|
(recur
|
|
|
|
;; "far care my band"
|
|
|
|
(-> zipper
|
|
|
|
;; "all fake my fallow"
|
|
|
|
zip/up
|
|
|
|
;; "here gimme our head"
|
|
|
|
(zip/append-child
|
|
|
|
;; "long back my guide"
|
|
|
|
(first
|
|
|
|
;; "never stop their seed"
|
|
|
|
(rand-nth
|
|
|
|
;; "never consume our tomorrow")
|
|
|
|
(seq
|
|
|
|
|
|
|
|
(example-pos-freqs path)))))
|
|
|
|
;; => ("now scarred towards the future"
|
|
|
|
zip/down
|
|
|
|
;; "never gone among the side"
|
|
|
|
zip/next
|
|
|
|
;; "ill removed with the end"
|
|
|
|
zip/next))
|
|
|
|
;; "well filled in the life"
|
|
|
|
:else (recur (zip/next zipper))))))
|
|
|
|
;; "again torn towards the world"
|
|
|
|
|
|
|
|
;; "desperately matched in the love"
|
|
|
|
|
|
|
|
;; "nowadays matched in the ark"
|
|
|
|
|
|
|
|
;; "awhile needed through all night"
|
|
|
|
|
|
|
|
;; "so torn in the darkness"
|
|
|
|
|
|
|
|
;; "first erased on the land")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;; => ("pictures of the destiny"
|
|
|
|
|
|
|
|
;; "tears on the pain"
|
|
|
|
|
|
|
|
;; "lights in the disaster"
|
|
|
|
|
|
|
|
;; "corpses on the fire"
|
|
|
|
|
|
|
|
;; "castles on the universe"
|
|
|
|
|
|
|
|
;; "efforts for the king"
|
|
|
|
|
|
|
|
;; "visions of the night"
|
|
|
|
|
|
|
|
;; "retreats into the darker"
|
|
|
|
|
|
|
|
;; "tales into the attack"
|
|
|
|
|
|
|
|
;; "pictures into the play")
|
|
|
|
|
|
|
|
|
|
|
|
(get-in {:a 1} '())
|
|
|
|
(get-in {:a 1} '())
|
|
|
|
(let [zipper (zip/seq-zip '(TOP (S (NP) (VB))))]
|
|
|
|
(let [zipper (zip/seq-zip '(TOP (S (NP) (VB))))]
|
|
|
|