Add 2-chain markov and darklyrics

main
Eric Ihli 4 years ago
parent 1228739fd2
commit f3274a8c2f

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,5 +1,8 @@
;; TODO: Filter out non-English lyrics.
(ns com.owoga.corpus.darklyrics (ns com.owoga.corpus.darklyrics
(:require [net.cgrand.enlive-html :as html] (:require [net.cgrand.enlive-html :as html]
[com.owoga.prhyme.util :as util]
[clojure.string :as string] [clojure.string :as string]
[clojure.java.io :as io])) [clojure.java.io :as io]))
@ -9,14 +12,6 @@
(defn fetch-url [url] (defn fetch-url [url]
(html/html-resource (java.net.URL. url))) (html/html-resource (java.net.URL. url)))
(defn pages-urls [index]
(-> index
(html/select [:div.listrow])
(first)
(html/select [:a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url %)))))
(defn parse-letters-urls [index] (defn parse-letters-urls [index]
(-> index (-> index
(html/select [:div.listrow]) (html/select [:div.listrow])
@ -25,36 +20,12 @@
((partial map #(get-in % [:attrs :href]))) ((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url %))))) ((partial map #(apply str root-url %)))))
(defn artists-urls [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url "/" %)))))
(defn parse-artists-urls [page] (defn parse-artists-urls [page]
(-> page (-> page
(html/select [:div.artists :a]) (html/select [:div.artists :a])
((partial map #(get-in % [:attrs :href]))) ((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url "/" %))))) ((partial map #(apply str root-url "/" %)))))
(defn artists-names [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:content])))))
(defn artists-albums [page]
(-> page
(html/select [:div.album])
((partial
map
(fn [album]
(cons
(first (map html/text (html/select album [:h2 :strong])))
(list
(map
#(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." ""))
(html/select album [:a])))))))))
(defn parse-artists-albums [page] (defn parse-artists-albums [page]
(-> page (-> page
(html/select [:div.album]) (html/select [:div.album])
@ -66,17 +37,6 @@
#(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." "")) #(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." ""))
(html/select album [:a])))))))) (html/select album [:a]))))))))
(defn album-lyrics [page]
(-> page
(html/select [:div.lyrics])
first
:content
((partial partition-by #(and (map? %) (= :h3 (:tag %)))))
flatten
((partial filter string?))
((partial apply str))
(string/replace #"\s+" " ")))
(defn parse-album-lyrics [page] (defn parse-album-lyrics [page]
(-> page (-> page
(html/select [:div.lyrics]) (html/select [:div.lyrics])
@ -88,43 +48,11 @@
((partial apply str)) ((partial apply str))
(string/replace #"\s+" " "))) (string/replace #"\s+" " ")))
(defn lazy-artists (defn english? [text]
([urls] (let [words (string/split text #"\s+")
(lazy-artists urls '())) english-words
([urls artists] (->> words (filter #(util/words-map (string/lower-case %))))]
(cond (< 0.5 (/ (count english-words) (count words)))))
(empty? urls)
nil
(empty? artists)
(lazy-artists (rest urls)
(artists-urls (fetch-url (first urls))))
:else
(cons (fetch-url (first artists))
(lazy-seq (lazy-artists urls (rest artists)))))))
(defn lazy-lyrics
([page]
(let [album-urls (->> (artists-albums page)
(map #(vector (first %) (first (second %)))))]
(lazy-lyrics page album-urls)))
([page albums]
(cond
(empty? albums) nil
:else
(cons (album-lyrics (fetch-url (second (first albums))))
(lazy-seq (lazy-lyrics page (rest albums)))))))
(defn lazy-scrape
([base-url]
(let [response (fetch-url base-url)
alphabetical (pages-urls response)
artists (lazy-artists alphabetical)]))
([response artists albums]
(cond
(empty? artists) nil
)))
(defn scrape (defn scrape
([base-url] ([base-url]
@ -144,28 +72,36 @@
:else :else
nil))) nil)))
(defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn make-markov [text n]
(let [tokens (reverse (string/split (clean-text text) #"\s+"))]
(reduce
(fn [a w]
(let [k (butlast w)
v (last w)]
(update-in a [k v] (fnil inc 0))))
{}
((util/window (inc n)) tokens))))
(def darkov-2
(into
{}
(map (fn [[k v]] (vector (list k) v))
(make-markov (slurp "darklyrics.txt") 2))))
(comment (comment
(parse-letters-urls (fetch-url base-url)) (def darkov
(into
{}
(map (fn [[k v]] (vector (list k) v))
(make-markov (slurp "darklyrics.txt")))))
(take 100 darkov)
(util/write-markov "darklyrics.edn" darkov)
(spit "test.txt" (pr-str {:foo "1"}))
(def lyrics (scrape base-url)) (def lyrics (scrape base-url))
(with-open [writer (io/writer "darklyrics.txt")] (with-open [writer (io/writer "darklyrics.txt")]
(run! (run!
#(.write writer %) #(.write writer %)
(take 20 lyrics))) (take 200 (filter english? lyrics)))))
(def response (fetch-url base-url))
(def a (fetch-url (first (pages-urls response))))
(artists-urls (fetch-url (second (pages-urls response))))
(def la (lazy-artists (pages-urls response)))
(first la)
(def first-artists-page (first la))
(def first-artists-album-url (first (second (first (artists-albums first-artists-page)))))
(album-lyrics (fetch-url first-artists-album-url))
(first (lazy-albums (first la)))
(def artist-1 (first (artists-urls a)))
(def artist-1-page (fetch-url artist-1))
(-> artist-1-page
(html/select [:div.album]))
(def artists-albums-1 (artists-albums artist-1-page))
(def artist-album (first (second (first artists-albums-1))))
artist-album
(def album (fetch-url artist-album))
)

@ -3,6 +3,7 @@
[com.owoga.prhyme.util :as util] [com.owoga.prhyme.util :as util]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand] [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
[com.owoga.prhyme.util.nlp :as nlp] [com.owoga.prhyme.util.nlp :as nlp]
[com.owoga.corpus.darklyrics :as dr]
[com.owoga.prhyme.frp :as frp] [com.owoga.prhyme.frp :as frp]
[com.owoga.prhyme.core :as prhyme])) [com.owoga.prhyme.core :as prhyme]))
@ -53,8 +54,12 @@
(defn adjust-for-markov (defn adjust-for-markov
[markov percent] [markov percent]
(let [target-markov-n (count (first (first markov)))]
(fn [[words target result]] (fn [[words target result]]
(let [markov-options (markov (list (:norm-word (first result)))) (if (>= (count result) target-markov-n)
(let [markov-options (markov (->> result
(take target-markov-n)
(map :norm-word)))
markov-option-avg (/ (apply + (vals markov-options)) markov-option-avg (/ (apply + (vals markov-options))
(max 1 (count markov-options)))] (max 1 (count markov-options)))]
(if (nil? markov-options) (if (nil? markov-options)
@ -77,7 +82,8 @@
markovs) markovs)
non-markovs) non-markovs)
target target
result]))))) result])))
[words target result]))))
(defn adjust-for-rimes (defn adjust-for-rimes
[dictionary percent] [dictionary percent]
@ -108,6 +114,37 @@
target target
result]))) result])))
(defn adjust-for-tail-rimes
[dictionary percent]
(fn [[words target result]]
(if (empty? result)
(let [words-with-rime-count
(map
(fn [word]
(assoc word :num-matching (count (frp/consecutive-matching target word :rimes))))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])
[words target result])))
(defn prhyme (defn prhyme
"2020-10-21 iteration" "2020-10-21 iteration"
[words weights-adjuster target stop?] [words weights-adjuster target stop?]
@ -172,7 +209,149 @@
(defn filter-for-syllable-count [syllable-count coll] (defn filter-for-syllable-count [syllable-count coll]
(filter #(= syllable-count (phrase-syllable-count %)) coll)) (filter #(= syllable-count (phrase-syllable-count %)) coll))
(defn syllable-stop
[target]
(fn [inner-target result]
(<= (:syllable-count target)
(apply + (map :syllable-count result)))))
(defn generate-rhyme-for-phrase
[words adjust phrase]
(let [words (map #(assoc % :weight 1) words)
words-map (into {} (map #(vector (:norm-word %) %) words))
target (phrase->word words phrase)]
(prhymer words adjust target (syllable-stop target))))
(def adj (comp (adjust-for-markov dr/darkov 0.25)
(adjust-for-markov dr/darkov-2 0.9)
(adjust-for-tail-rimes words-map 0.9)))
(defn generate-prhymes [poem]
(let [r (partial generate-rhyme-for-phrase frp/popular adj)]
(fn []
(->> poem
(map (fn [phrase]
(let [target (phrase->word frp/popular phrase)]
(first
(filter
#(and
(or (< 0.9 (rand))
(nlp/valid-sentence? (string/join " " (map :norm-word %))))
(= (:syllable-count target)
(apply + (map :syllable-count %))))
(r phrase))))))
(map (fn [line] (map #(:norm-word %) line)))
(map #(string/join " " %))))))
(comment (comment
(->> ["mister sandman give me a dream"
"make him the cutest that i've ever seen"
"give him two lips like roses in clover"
"then tell him that his lonesome nights are over"]
(generate-prhymes)
(repeatedly)
(take 2))
(->> ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]
(generate-prhymes)
(repeatedly)
(take 10))
;; => (("it parts loran"
;; "some kind supreme"
;; "beaming idealist"
;; "just some more lair queen")
;; ("where do we ran"
;; "whole hold back steam"
;; "the true terrorist"
;; "murders rocked chlorine")
;; ("that cages span" "water the steam" "personnel stylist" "slavery marine")
;; ("from distant bran" "admissions ream" "and deaf elitist" "persuaded soybean")
;; ("auction merman"
;; "and fills my dream"
;; "to his bows poorest"
;; "disappearing wean")
;; ("get under an" "appetite seam" "we must ingest gist" "overboard caffeine")
;; ("moody madman"
;; "tableau downstream"
;; "enormously hissed"
;; "unzip hap file sheen")
;; ("mistakes merman"
;; "there is this dream"
;; "cherries publicist"
;; "name of my routine")
;; ("for the choir" "remote extreme" "olives internist" "too late to you mean")
;; ("the ghosts that tan"
;; "built on the dream"
;; "is this band is pissed"
;; "arts summon fifteen"))
(def adj (comp (adjust-for-markov dr/darkov 0.9)
(adjust-for-tail-rimes words-map 0.9)))
(let [r (generate-rhyme-for-phrase
frp/popular
adj
"mister sandman")]
(take 3 r))
(def r (partial generate-rhyme-for-phrase frp/popular adj))
(take
10
(repeatedly
(fn []
(->> ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]
(map (fn [phrase]
(let [target (phrase->word frp/popular phrase)]
(first
(filter
#(= (:syllable-count target)
(apply + (map :syllable-count %)))
(r phrase))))))
(map (fn [line] (map #(:norm-word %) line)))
(map #(string/join " " %))))))
;; ("farther caveman"
;; "pain primal scream"
;; "and this fucking pissed"
;; "all become true green")
;; ("guarding mailman"
;; "stand striving beam"
;; "in gothic earnest"
;; "chaos unforeseen")
;; ("face the sandman"
;; "push comes the steam"
;; "industrialist"
;; "well thought that thrives bean")
;; ("restore milkman"
;; "even first gleam"
;; "contract alchemist"
;; "slavery marine")
;; ("clouds nights the pan"
;; "blissful peace theme"
;; "treason guitarist"
;; "chaos unforeseen")
;; ("painter japan"
;; "from hell extreme"
;; "with me to resist"
;; "to your bet fifteen")
;; ("he trusts doorman"
;; "bang bang the dream"
;; "truth recruit fascist"
;; "to the wealth saline")
;; ("accounting bran"
;; "rainy clouds gleam"
;; "cardiologist"
;; "yang trader eighteen"))
(map #(take 1 %) (map r ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]))
(take 3 frp/words) (take 3 frp/words)
(phrase->word frp/popular "well-off") (phrase->word frp/popular "well-off")
(map (fn [line] (phrase->word frp/popular line)) (map (fn [line] (phrase->word frp/popular line))
@ -180,13 +359,15 @@
"give me dream" "give me dream"
"make him the cutest" "make him the cutest"
"that i've ever seen"]) "that i've ever seen"])
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn"))) (defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
(def adj (comp (adjust-for-markov lovecraft-markov 0.9)
(adjust-for-rimes words-map 0.9))) (->> (gen-prhymes frp/popular
(repeatedly 10 #(gen-prhymes frp/popular
adj adj
["i'm testing rhyme software" ["mister sandman"
"what do you think"])) "give me dream"
"make him the cutest"
"that i've ever seen"]))
(take 5 (filter #(= 7 (phrase-syllable-count (first %))) (take 5 (filter #(= 7 (phrase-syllable-count (first %)))
(repeatedly #(gen-prhymes frp/popular adj ["taylor is my beautiful"])))) (repeatedly #(gen-prhymes frp/popular adj ["taylor is my beautiful"]))))

@ -72,6 +72,31 @@
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"}) (def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
(defn window [n]
(fn [coll]
(cond
(empty? coll) []
(< (count coll) n) []
:else (cons (take n coll)
(lazy-seq ((window n) (drop n coll)))))))
(defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn make-markov [text]
(let [tokens (reverse (string/split (clean-text text) #"\s+"))]
(reduce
(fn [a [t1 t2]]
(update-in a [t1 t2] (fnil inc 0)))
{}
((window 2) tokens))))
(defn write-markov [filename markov]
(spit filename (pr-str markov)))
(defn read-markov [filename]
(read-string (slurp filename)))
(defn take-through (defn take-through
"(take-through even? [1 2 3 4 7 7 5 2 8 10]) "(take-through even? [1 2 3 4 7 7 5 2 8 10])
returns '((1 2 3 4) (7 7 5 2) (8) (10))" returns '((1 2 3 4) (7 7 5 2) (8) (10))"

Loading…
Cancel
Save