diff --git a/.gitignore b/.gitignore index 9dc09b0..2b03272 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ tmp/ .cpcache .nrepl-port +dark-corpus diff --git a/src/com/owoga/corpus/darklyrics.clj b/src/com/owoga/corpus/darklyrics.clj index 6c2eb3d..9163a01 100644 --- a/src/com/owoga/corpus/darklyrics.clj +++ b/src/com/owoga/corpus/darklyrics.clj @@ -82,7 +82,7 @@ (defn scrape ([base-url] - (scrape (drop 3 (parse-letters-urls (fetch-url base-url))) '() '())) + (scrape (drop 10 (parse-letters-urls (fetch-url base-url))) '() '())) ([letters-urls artists-urls [artist-name albums-urls]] (cond (not-empty albums-urls) @@ -141,7 +141,6 @@ (def darkov-2 (util/read-markov "dark-corpus-2.edn")) - (defn norm-filepath [text] (-> text string/lower-case diff --git a/src/com/owoga/corpus/markov.clj b/src/com/owoga/corpus/markov.clj index f8b57b4..26c85fa 100644 --- a/src/com/owoga/corpus/markov.clj +++ b/src/com/owoga/corpus/markov.clj @@ -38,8 +38,7 @@ (apply merge-with (fn [a-possibilities b-possibilities] - (apply - merge-with + (merge-with (fn [a b] ((fnil + 0) a b)) a-possibilities @@ -57,7 +56,23 @@ "them" 50 "baz" 99}})) +(defn gen-markov [] + (->> (file-seq (io/file "dark-corpus")) + (remove #(.isDirectory %)) + (map #(slurp %)) + (map clean-text) + (filter util/english?) + (map #(string/split % #"\n+")) + (flatten) + (map #(string/split % #"\s+")) + (map reverse) + (map #(util/extend-coll % nil 2)) + (map #(make-markov % 2)) + (apply merge-markov) + (util/write-markov "dark-corpus-2.edn"))) + (comment + (gen-markov) (->> (file-seq (io/file "dark-corpus")) (remove #(.isDirectory %)) (map #(slurp %)) diff --git a/src/com/owoga/prhyme/gen.clj b/src/com/owoga/prhyme/gen.clj index 2470f62..3a99f49 100644 --- a/src/com/owoga/prhyme/gen.clj +++ b/src/com/owoga/prhyme/gen.clj @@ -4,7 +4,6 @@ [com.owoga.prhyme.util :as util] [com.owoga.prhyme.util.weighted-rand :as weighted-rand] [com.owoga.prhyme.util.nlp :as nlp] - [com.owoga.corpus.darklyrics :as dr] [com.owoga.prhyme.frp :as frp] [com.owoga.prhyme.core :as prhyme])) @@ -176,37 +175,6 @@ target result]))) -(defn adjust-for-tail-rimes - [dictionary percent] - (fn [[words target result]] - (if (empty? result) - (let [words-with-rime-count - (map - (fn [word] - (assoc word :num-matching (if (prhyme/rimes? target word) 1 0))) - words) - - [rhyming non-rhyming] - ((juxt filter remove) - #(< 0 (:num-matching %)) - words-with-rime-count) - - weight-non-rhyming (apply + (map :weight non-rhyming)) - target-weight-rhyming (* 100 percent weight-non-rhyming) - count-rhyming (count rhyming) - adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))] - [(concat - (map - (fn [word] - (as-> word word - (assoc word :weight (* adjustment-rhyming (:weight word))) - (assoc word :adjustment-for-rimes adjustment-rhyming))) - rhyming) - non-rhyming) - target - result]) - [words target result]))) - (defn attempt-gen-target-by-syllable-count [adj syllable-count words] (loop [result '()] (cond @@ -376,102 +344,3 @@ r)) (map (fn [line] (map #(:norm-word %) line))) (map #(string/join " " %)))) - -(comment - (let [adj (comp (adjust-for-markov-with-boundaries dr/darkov-2 0.9) - (adjust-for-tail-rimes words-map 0.9))] - (->> (generate-rhyme-for-phrase frp/popular adj "make him the cutest that i've ever seen") - (take 20) - (map #(map :norm-word %)) - (map #(string/join " " %)))) - - (let [adj (comp (adjust-for-markov-with-boundaries dr/darkov-2 0.9) - (adjust-for-tail-rimes words-map 0.9))] - (->> (generate-rhyme-for-phrase frp/popular adj "mister sandman give me a dream") - (take 20) - (map #(map :norm-word %)) - (map #(string/join " " %)))) - - (let [adj (adjust-for-markov-with-boundaries dr/darkov-2 0.9)] - (apply map vector - (->> ["mister sandman give me a dream" - "make him the cutest that i've ever seen" - "give him two lips like roses in clover" - "then tell him that his lonesome nights are over"] - (map #(generate-prhymes-darkov util/popular adj %))))) - - (apply map vector (->> ["taylor is my star" - "she brightens my day"] - (generate-prhymes) - (repeatedly) - (take 10))) - - (frp/phrase->word frp/popular "homer") - (frp/phrase->word frp/popular "") - (apply map vector (->> ["mister sandman" - "give me a dream" - "make him the cutest" - "that i've ever seen"] - (generate-prhymes) - (repeatedly) - (take 10))) - - (def adj (comp (adjust-for-markov dr/darkov 0.9) - (adjust-for-tail-rimes words-map 0.9))) - - (let [r (generate-rhyme-for-phrase - frp/popular - adj - "mister sandman")] - (take 3 r)) - - (def r (partial generate-rhyme-for-phrase frp/popular adj)) - (take - 10 - (repeatedly - (fn [] - (->> ["mister sandman" - "give me a dream" - "make him the cutest" - "that i've ever seen"] - (map (fn [phrase] - (let [target (phrase->word frp/popular phrase)] - (first - (filter - #(= (:syllable-count target) - (apply + (map :syllable-count %))) - (r phrase)))))) - (map (fn [line] (map #(:norm-word %) line))) - (map #(string/join " " %)))))) - - (map #(take 1 %) (map r ["mister sandman" - "give me a dream" - "make him the cutest" - "that i've ever seen"])) - (take 3 frp/words) - (phrase->word frp/popular "well-off") - (map (fn [line] (phrase->word frp/popular line)) - ["mister sandman" - "give me dream" - "make him the cutest" - "that i've ever seen"]) - - (defonce lovecraft-markov (read-string (slurp "lovecraft.edn"))) - - (->> (gen-prhymes frp/popular - adj - ["mister sandman" - "give me dream" - "make him the cutest" - "that i've ever seen"])) - - (take 5 (filter #(= 7 (phrase-syllable-count (first %))) - (repeatedly #(gen-prhymes frp/popular adj ["taylor is my beautiful"])))) - - (let [target (frp/phrase->word frp/words "i solemnly swear i am up to no good") - words (map #(assoc % :weight 1) frp/popular) - weights-adjuster (comp (adjust-for-markov lovecraft-markov 0.9) - (adjust-for-rimes target words-map 0.9)) - stop (sentence-stop target) - r (prhymer words weights-adjuster target stop)] - (map (fn [p] (string/join " " (map #(:norm-word %) p))) (take 5 r)))) diff --git a/src/com/owoga/prhyme/lymeric.clj b/src/com/owoga/prhyme/lymeric.clj index bb2e238..1b2ab2a 100644 --- a/src/com/owoga/prhyme/lymeric.clj +++ b/src/com/owoga/prhyme/lymeric.clj @@ -26,9 +26,11 @@ comp (remove nil? - [(gen/adjust-for-markov-with-boundaries darklyrics/darkov-2 0.9) + [(weighted-selection/adjust-for-markov + darklyrics/darkov-2 + 0.99) (when (rhymes pattern) - (gen/adjust-for-tail-rimes util/words-map 0.9))])) + (weighted-selection/adjust-for-rhymes 0.99))])) rhyme (if (nil? (get rhymes pattern)) (gen/gen-sentence-with-syllable-count adj @@ -44,14 +46,17 @@ (assoc rhymes pattern rhyme) (conj result rhyme))))))) - (comment (rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8))) ) (comment - (rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7))) + (->> (repeatedly + (fn [] + (rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7))))) + (take 2)) + (apply map vector (list '(1 2 3) '(4 5 6))) (->> (gen/selection-seq (map #(assoc % :weight 1) frp/words) (weighted-selection/adjust-for-rhymes 0.99) @@ -83,18 +88,3 @@ "hate is my virtue" "my feelings are well overdue" "war we await the afterlife"]) -(->> (repeatedly - (fn [] - (gen/gen-target-by-syllable-count darklyrics/darkov-2 8 (map #(assoc % :weight 1) frp/popular)))) - (filter #(= 8 (apply + (map :syllable-count %)))) - (map #(map :norm-word %)) - (map #(string/join " " %)) - (filter nlp/valid-sentence?) - (take 5)) - -(take 3 frp/popular) -(defn genlymeric [] - (let [adj (comp (gen/adjust-for-markov darklyrics/darkov-2) - (gen/adjust-for-tail-rimes util/words-map))])) - -(map :syllable-count '())