Add 2-chain markov and darklyrics

main
Eric Ihli
parent 1228739fd2
commit f3274a8c2f

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,5 +1,8 @@
;; TODO: Filter out non-English lyrics.
(ns com.owoga.corpus.darklyrics
(:require [net.cgrand.enlive-html :as html]
[com.owoga.prhyme.util :as util]
[clojure.string :as string]
[clojure.java.io :as io]))
@ -9,14 +12,6 @@
(defn fetch-url [url]
(html/html-resource (java.net.URL. url)))
(defn pages-urls [index]
(-> index
(html/select [:div.listrow])
(first)
(html/select [:a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url %)))))
(defn parse-letters-urls [index]
(-> index
(html/select [:div.listrow])
@ -25,36 +20,12 @@
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url %)))))
(defn artists-urls [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url "/" %)))))
(defn parse-artists-urls [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url "/" %)))))
(defn artists-names [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:content])))))
(defn artists-albums [page]
(-> page
(html/select [:div.album])
((partial
map
(fn [album]
(cons
(first (map html/text (html/select album [:h2 :strong])))
(list
(map
#(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." ""))
(html/select album [:a])))))))))
(defn parse-artists-albums [page]
(-> page
(html/select [:div.album])
@ -66,17 +37,6 @@
#(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." ""))
(html/select album [:a]))))))))
(defn album-lyrics [page]
(-> page
(html/select [:div.lyrics])
first
:content
((partial partition-by #(and (map? %) (= :h3 (:tag %)))))
flatten
((partial filter string?))
((partial apply str))
(string/replace #"\s+" " ")))
(defn parse-album-lyrics [page]
(-> page
(html/select [:div.lyrics])
@ -88,43 +48,11 @@
((partial apply str))
(string/replace #"\s+" " ")))
(defn lazy-artists
([urls]
(lazy-artists urls '()))
([urls artists]
(cond
(empty? urls)
nil
(empty? artists)
(lazy-artists (rest urls)
(artists-urls (fetch-url (first urls))))
:else
(cons (fetch-url (first artists))
(lazy-seq (lazy-artists urls (rest artists)))))))
(defn lazy-lyrics
([page]
(let [album-urls (->> (artists-albums page)
(map #(vector (first %) (first (second %)))))]
(lazy-lyrics page album-urls)))
([page albums]
(cond
(empty? albums) nil
:else
(cons (album-lyrics (fetch-url (second (first albums))))
(lazy-seq (lazy-lyrics page (rest albums)))))))
(defn lazy-scrape
([base-url]
(let [response (fetch-url base-url)
alphabetical (pages-urls response)
artists (lazy-artists alphabetical)]))
([response artists albums]
(cond
(empty? artists) nil
)))
(defn english? [text]
(let [words (string/split text #"\s+")
english-words
(->> words (filter #(util/words-map (string/lower-case %))))]
(< 0.5 (/ (count english-words) (count words)))))
(defn scrape
([base-url]
@ -144,28 +72,36 @@
:else
nil)))
(defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn make-markov [text n]
(let [tokens (reverse (string/split (clean-text text) #"\s+"))]
(reduce
(fn [a w]
(let [k (butlast w)
v (last w)]
(update-in a [k v] (fnil inc 0))))
{}
((util/window (inc n)) tokens))))
(def darkov-2
(into
{}
(map (fn [[k v]] (vector (list k) v))
(make-markov (slurp "darklyrics.txt") 2))))
(comment
(parse-letters-urls (fetch-url base-url))
(def darkov
(into
{}
(map (fn [[k v]] (vector (list k) v))
(make-markov (slurp "darklyrics.txt")))))
(take 100 darkov)
(util/write-markov "darklyrics.edn" darkov)
(spit "test.txt" (pr-str {:foo "1"}))
(def lyrics (scrape base-url))
(with-open [writer (io/writer "darklyrics.txt")]
(run!
#(.write writer %)
(take 20 lyrics)))
(def response (fetch-url base-url))
(def a (fetch-url (first (pages-urls response))))
(artists-urls (fetch-url (second (pages-urls response))))
(def la (lazy-artists (pages-urls response)))
(first la)
(def first-artists-page (first la))
(def first-artists-album-url (first (second (first (artists-albums first-artists-page)))))
(album-lyrics (fetch-url first-artists-album-url))
(first (lazy-albums (first la)))
(def artist-1 (first (artists-urls a)))
(def artist-1-page (fetch-url artist-1))
(-> artist-1-page
(html/select [:div.album]))
(def artists-albums-1 (artists-albums artist-1-page))
(def artist-album (first (second (first artists-albums-1))))
artist-album
(def album (fetch-url artist-album))
)
(take 200 (filter english? lyrics)))))

@ -3,6 +3,7 @@
[com.owoga.prhyme.util :as util]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand]
[com.owoga.prhyme.util.nlp :as nlp]
[com.owoga.corpus.darklyrics :as dr]
[com.owoga.prhyme.frp :as frp]
[com.owoga.prhyme.core :as prhyme]))
@ -53,8 +54,12 @@
(defn adjust-for-markov
[markov percent]
(let [target-markov-n (count (first (first markov)))]
(fn [[words target result]]
(let [markov-options (markov (list (:norm-word (first result))))
(if (>= (count result) target-markov-n)
(let [markov-options (markov (->> result
(take target-markov-n)
(map :norm-word)))
markov-option-avg (/ (apply + (vals markov-options))
(max 1 (count markov-options)))]
(if (nil? markov-options)
@ -77,7 +82,8 @@
markovs)
non-markovs)
target
result])))))
result])))
[words target result]))))
(defn adjust-for-rimes
[dictionary percent]
@ -108,6 +114,37 @@
target
result])))
(defn adjust-for-tail-rimes
[dictionary percent]
(fn [[words target result]]
(if (empty? result)
(let [words-with-rime-count
(map
(fn [word]
(assoc word :num-matching (count (frp/consecutive-matching target word :rimes))))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])
[words target result])))
(defn prhyme
"2020-10-21 iteration"
[words weights-adjuster target stop?]
@ -172,7 +209,149 @@
(defn filter-for-syllable-count [syllable-count coll]
(filter #(= syllable-count (phrase-syllable-count %)) coll))
(defn syllable-stop
[target]
(fn [inner-target result]
(<= (:syllable-count target)
(apply + (map :syllable-count result)))))
(defn generate-rhyme-for-phrase
[words adjust phrase]
(let [words (map #(assoc % :weight 1) words)
words-map (into {} (map #(vector (:norm-word %) %) words))
target (phrase->word words phrase)]
(prhymer words adjust target (syllable-stop target))))
(def adj (comp (adjust-for-markov dr/darkov 0.25)
(adjust-for-markov dr/darkov-2 0.9)
(adjust-for-tail-rimes words-map 0.9)))
(defn generate-prhymes [poem]
(let [r (partial generate-rhyme-for-phrase frp/popular adj)]
(fn []
(->> poem
(map (fn [phrase]
(let [target (phrase->word frp/popular phrase)]
(first
(filter
#(and
(or (< 0.9 (rand))
(nlp/valid-sentence? (string/join " " (map :norm-word %))))
(= (:syllable-count target)
(apply + (map :syllable-count %))))
(r phrase))))))
(map (fn [line] (map #(:norm-word %) line)))
(map #(string/join " " %))))))
(comment
(->> ["mister sandman give me a dream"
"make him the cutest that i've ever seen"
"give him two lips like roses in clover"
"then tell him that his lonesome nights are over"]
(generate-prhymes)
(repeatedly)
(take 2))
(->> ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]
(generate-prhymes)
(repeatedly)
(take 10))
;; => (("it parts loran"
;; "some kind supreme"
;; "beaming idealist"
;; "just some more lair queen")
;; ("where do we ran"
;; "whole hold back steam"
;; "the true terrorist"
;; "murders rocked chlorine")
;; ("that cages span" "water the steam" "personnel stylist" "slavery marine")
;; ("from distant bran" "admissions ream" "and deaf elitist" "persuaded soybean")
;; ("auction merman"
;; "and fills my dream"
;; "to his bows poorest"
;; "disappearing wean")
;; ("get under an" "appetite seam" "we must ingest gist" "overboard caffeine")
;; ("moody madman"
;; "tableau downstream"
;; "enormously hissed"
;; "unzip hap file sheen")
;; ("mistakes merman"
;; "there is this dream"
;; "cherries publicist"
;; "name of my routine")
;; ("for the choir" "remote extreme" "olives internist" "too late to you mean")
;; ("the ghosts that tan"
;; "built on the dream"
;; "is this band is pissed"
;; "arts summon fifteen"))
(def adj (comp (adjust-for-markov dr/darkov 0.9)
(adjust-for-tail-rimes words-map 0.9)))
(let [r (generate-rhyme-for-phrase
frp/popular
adj
"mister sandman")]
(take 3 r))
(def r (partial generate-rhyme-for-phrase frp/popular adj))
(take
10
(repeatedly
(fn []
(->> ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]
(map (fn [phrase]
(let [target (phrase->word frp/popular phrase)]
(first
(filter
#(= (:syllable-count target)
(apply + (map :syllable-count %)))
(r phrase))))))
(map (fn [line] (map #(:norm-word %) line)))
(map #(string/join " " %))))))
;; ("farther caveman"
;; "pain primal scream"
;; "and this fucking pissed"
;; "all become true green")
;; ("guarding mailman"
;; "stand striving beam"
;; "in gothic earnest"
;; "chaos unforeseen")
;; ("face the sandman"
;; "push comes the steam"
;; "industrialist"
;; "well thought that thrives bean")
;; ("restore milkman"
;; "even first gleam"
;; "contract alchemist"
;; "slavery marine")
;; ("clouds nights the pan"
;; "blissful peace theme"
;; "treason guitarist"
;; "chaos unforeseen")
;; ("painter japan"
;; "from hell extreme"
;; "with me to resist"
;; "to your bet fifteen")
;; ("he trusts doorman"
;; "bang bang the dream"
;; "truth recruit fascist"
;; "to the wealth saline")
;; ("accounting bran"
;; "rainy clouds gleam"
;; "cardiologist"
;; "yang trader eighteen"))
(map #(take 1 %) (map r ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]))
(take 3 frp/words)
(phrase->word frp/popular "well-off")
(map (fn [line] (phrase->word frp/popular line))
@ -180,13 +359,15 @@
"give me dream"
"make him the cutest"
"that i've ever seen"])
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
(def adj (comp (adjust-for-markov lovecraft-markov 0.9)
(adjust-for-rimes words-map 0.9)))
(repeatedly 10 #(gen-prhymes frp/popular
(->> (gen-prhymes frp/popular
adj
["i'm testing rhyme software"
"what do you think"]))
["mister sandman"
"give me dream"
"make him the cutest"
"that i've ever seen"]))
(take 5 (filter #(= 7 (phrase-syllable-count (first %)))
(repeatedly #(gen-prhymes frp/popular adj ["taylor is my beautiful"]))))

@ -72,6 +72,31 @@
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
(defn window [n]
(fn [coll]
(cond
(empty? coll) []
(< (count coll) n) []
:else (cons (take n coll)
(lazy-seq ((window n) (drop n coll)))))))
(defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn make-markov [text]
(let [tokens (reverse (string/split (clean-text text) #"\s+"))]
(reduce
(fn [a [t1 t2]]
(update-in a [t1 t2] (fnil inc 0)))
{}
((window 2) tokens))))
(defn write-markov [filename markov]
(spit filename (pr-str markov)))
(defn read-markov [filename]
(read-string (slurp filename)))
(defn take-through
"(take-through even? [1 2 3 4 7 7 5 2 8 10])
returns '((1 2 3 4) (7 7 5 2) (8) (10))"

Loading…
Cancel
Save