Add 2-chain markov and darklyrics

main
Eric Ihli 4 years ago
parent 1228739fd2
commit f3274a8c2f

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -1,5 +1,8 @@
;; TODO: Filter out non-English lyrics.
(ns com.owoga.corpus.darklyrics
(:require [net.cgrand.enlive-html :as html]
[com.owoga.prhyme.util :as util]
[clojure.string :as string]
[clojure.java.io :as io]))
@ -9,14 +12,6 @@
(defn fetch-url [url]
(html/html-resource (java.net.URL. url)))
(defn pages-urls [index]
(-> index
(html/select [:div.listrow])
(first)
(html/select [:a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url %)))))
(defn parse-letters-urls [index]
(-> index
(html/select [:div.listrow])
@ -25,36 +20,12 @@
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url %)))))
(defn artists-urls [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url "/" %)))))
(defn parse-artists-urls [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:attrs :href])))
((partial map #(apply str root-url "/" %)))))
(defn artists-names [page]
(-> page
(html/select [:div.artists :a])
((partial map #(get-in % [:content])))))
(defn artists-albums [page]
(-> page
(html/select [:div.album])
((partial
map
(fn [album]
(cons
(first (map html/text (html/select album [:h2 :strong])))
(list
(map
#(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." ""))
(html/select album [:a])))))))))
(defn parse-artists-albums [page]
(-> page
(html/select [:div.album])
@ -66,17 +37,6 @@
#(str root-url (string/replace (get-in % [:attrs :href]) #"\.\." ""))
(html/select album [:a]))))))))
(defn album-lyrics [page]
(-> page
(html/select [:div.lyrics])
first
:content
((partial partition-by #(and (map? %) (= :h3 (:tag %)))))
flatten
((partial filter string?))
((partial apply str))
(string/replace #"\s+" " ")))
(defn parse-album-lyrics [page]
(-> page
(html/select [:div.lyrics])
@ -88,43 +48,11 @@
((partial apply str))
(string/replace #"\s+" " ")))
(defn lazy-artists
([urls]
(lazy-artists urls '()))
([urls artists]
(cond
(empty? urls)
nil
(empty? artists)
(lazy-artists (rest urls)
(artists-urls (fetch-url (first urls))))
:else
(cons (fetch-url (first artists))
(lazy-seq (lazy-artists urls (rest artists)))))))
(defn lazy-lyrics
([page]
(let [album-urls (->> (artists-albums page)
(map #(vector (first %) (first (second %)))))]
(lazy-lyrics page album-urls)))
([page albums]
(cond
(empty? albums) nil
:else
(cons (album-lyrics (fetch-url (second (first albums))))
(lazy-seq (lazy-lyrics page (rest albums)))))))
(defn lazy-scrape
([base-url]
(let [response (fetch-url base-url)
alphabetical (pages-urls response)
artists (lazy-artists alphabetical)]))
([response artists albums]
(cond
(empty? artists) nil
)))
(defn english? [text]
(let [words (string/split text #"\s+")
english-words
(->> words (filter #(util/words-map (string/lower-case %))))]
(< 0.5 (/ (count english-words) (count words)))))
(defn scrape
([base-url]
@ -144,28 +72,36 @@
:else
nil)))
(defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn make-markov [text n]
(let [tokens (reverse (string/split (clean-text text) #"\s+"))]
(reduce
(fn [a w]
(let [k (butlast w)
v (last w)]
(update-in a [k v] (fnil inc 0))))
{}
((util/window (inc n)) tokens))))
(def darkov-2
(into
{}
(map (fn [[k v]] (vector (list k) v))
(make-markov (slurp "darklyrics.txt") 2))))
(comment
(parse-letters-urls (fetch-url base-url))
(def darkov
(into
{}
(map (fn [[k v]] (vector (list k) v))
(make-markov (slurp "darklyrics.txt")))))
(take 100 darkov)
(util/write-markov "darklyrics.edn" darkov)
(spit "test.txt" (pr-str {:foo "1"}))
(def lyrics (scrape base-url))
(with-open [writer (io/writer "darklyrics.txt")]
(run!
#(.write writer %)
(take 20 lyrics)))
(def response (fetch-url base-url))
(def a (fetch-url (first (pages-urls response))))
(artists-urls (fetch-url (second (pages-urls response))))
(def la (lazy-artists (pages-urls response)))
(first la)
(def first-artists-page (first la))
(def first-artists-album-url (first (second (first (artists-albums first-artists-page)))))
(album-lyrics (fetch-url first-artists-album-url))
(first (lazy-albums (first la)))
(def artist-1 (first (artists-urls a)))
(def artist-1-page (fetch-url artist-1))
(-> artist-1-page
(html/select [:div.album]))
(def artists-albums-1 (artists-albums artist-1-page))
(def artist-album (first (second (first artists-albums-1))))
artist-album
(def album (fetch-url artist-album))
)
(take 200 (filter english? lyrics)))))

@ -3,6 +3,7 @@
[com.owoga.prhyme.util :as util]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand]
[com.owoga.prhyme.util.nlp :as nlp]
[com.owoga.corpus.darklyrics :as dr]
[com.owoga.prhyme.frp :as frp]
[com.owoga.prhyme.core :as prhyme]))
@ -53,31 +54,36 @@
(defn adjust-for-markov
[markov percent]
(fn [[words target result]]
(let [markov-options (markov (list (:norm-word (first result))))
markov-option-avg (/ (apply + (vals markov-options))
(max 1 (count markov-options)))]
(if (nil? markov-options)
[words target result]
(let [[markovs non-markovs]
((juxt filter remove)
#(markov-options (:norm-word %))
words)
weight-non-markovs (apply + (map :weight non-markovs))
target-weight-markovs (* 100 percent weight-non-markovs)
count-markovs (count markovs)
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
[(concat
(map
(fn [m]
(let [option (markov-options (:norm-word m))]
(as-> m m
(assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m)))
(assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs)))))
markovs)
non-markovs)
target
result])))))
(let [target-markov-n (count (first (first markov)))]
(fn [[words target result]]
(if (>= (count result) target-markov-n)
(let [markov-options (markov (->> result
(take target-markov-n)
(map :norm-word)))
markov-option-avg (/ (apply + (vals markov-options))
(max 1 (count markov-options)))]
(if (nil? markov-options)
[words target result]
(let [[markovs non-markovs]
((juxt filter remove)
#(markov-options (:norm-word %))
words)
weight-non-markovs (apply + (map :weight non-markovs))
target-weight-markovs (* 100 percent weight-non-markovs)
count-markovs (count markovs)
adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))]
[(concat
(map
(fn [m]
(let [option (markov-options (:norm-word m))]
(as-> m m
(assoc m :weight (* (/ option markov-option-avg) adjustment-markovs (:weight m)))
(assoc m :adjustment-for-markov (* (/ option markov-option-avg) adjustment-markovs)))))
markovs)
non-markovs)
target
result])))
[words target result]))))
(defn adjust-for-rimes
[dictionary percent]
@ -108,6 +114,37 @@
target
result])))
(defn adjust-for-tail-rimes
[dictionary percent]
(fn [[words target result]]
(if (empty? result)
(let [words-with-rime-count
(map
(fn [word]
(assoc word :num-matching (count (frp/consecutive-matching target word :rimes))))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])
[words target result])))
(defn prhyme
"2020-10-21 iteration"
[words weights-adjuster target stop?]
@ -172,7 +209,149 @@
(defn filter-for-syllable-count [syllable-count coll]
(filter #(= syllable-count (phrase-syllable-count %)) coll))
(defn syllable-stop
[target]
(fn [inner-target result]
(<= (:syllable-count target)
(apply + (map :syllable-count result)))))
(defn generate-rhyme-for-phrase
[words adjust phrase]
(let [words (map #(assoc % :weight 1) words)
words-map (into {} (map #(vector (:norm-word %) %) words))
target (phrase->word words phrase)]
(prhymer words adjust target (syllable-stop target))))
(def adj (comp (adjust-for-markov dr/darkov 0.25)
(adjust-for-markov dr/darkov-2 0.9)
(adjust-for-tail-rimes words-map 0.9)))
(defn generate-prhymes [poem]
(let [r (partial generate-rhyme-for-phrase frp/popular adj)]
(fn []
(->> poem
(map (fn [phrase]
(let [target (phrase->word frp/popular phrase)]
(first
(filter
#(and
(or (< 0.9 (rand))
(nlp/valid-sentence? (string/join " " (map :norm-word %))))
(= (:syllable-count target)
(apply + (map :syllable-count %))))
(r phrase))))))
(map (fn [line] (map #(:norm-word %) line)))
(map #(string/join " " %))))))
(comment
(->> ["mister sandman give me a dream"
"make him the cutest that i've ever seen"
"give him two lips like roses in clover"
"then tell him that his lonesome nights are over"]
(generate-prhymes)
(repeatedly)
(take 2))
(->> ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]
(generate-prhymes)
(repeatedly)
(take 10))
;; => (("it parts loran"
;; "some kind supreme"
;; "beaming idealist"
;; "just some more lair queen")
;; ("where do we ran"
;; "whole hold back steam"
;; "the true terrorist"
;; "murders rocked chlorine")
;; ("that cages span" "water the steam" "personnel stylist" "slavery marine")
;; ("from distant bran" "admissions ream" "and deaf elitist" "persuaded soybean")
;; ("auction merman"
;; "and fills my dream"
;; "to his bows poorest"
;; "disappearing wean")
;; ("get under an" "appetite seam" "we must ingest gist" "overboard caffeine")
;; ("moody madman"
;; "tableau downstream"
;; "enormously hissed"
;; "unzip hap file sheen")
;; ("mistakes merman"
;; "there is this dream"
;; "cherries publicist"
;; "name of my routine")
;; ("for the choir" "remote extreme" "olives internist" "too late to you mean")
;; ("the ghosts that tan"
;; "built on the dream"
;; "is this band is pissed"
;; "arts summon fifteen"))
(def adj (comp (adjust-for-markov dr/darkov 0.9)
(adjust-for-tail-rimes words-map 0.9)))
(let [r (generate-rhyme-for-phrase
frp/popular
adj
"mister sandman")]
(take 3 r))
(def r (partial generate-rhyme-for-phrase frp/popular adj))
(take
10
(repeatedly
(fn []
(->> ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]
(map (fn [phrase]
(let [target (phrase->word frp/popular phrase)]
(first
(filter
#(= (:syllable-count target)
(apply + (map :syllable-count %)))
(r phrase))))))
(map (fn [line] (map #(:norm-word %) line)))
(map #(string/join " " %))))))
;; ("farther caveman"
;; "pain primal scream"
;; "and this fucking pissed"
;; "all become true green")
;; ("guarding mailman"
;; "stand striving beam"
;; "in gothic earnest"
;; "chaos unforeseen")
;; ("face the sandman"
;; "push comes the steam"
;; "industrialist"
;; "well thought that thrives bean")
;; ("restore milkman"
;; "even first gleam"
;; "contract alchemist"
;; "slavery marine")
;; ("clouds nights the pan"
;; "blissful peace theme"
;; "treason guitarist"
;; "chaos unforeseen")
;; ("painter japan"
;; "from hell extreme"
;; "with me to resist"
;; "to your bet fifteen")
;; ("he trusts doorman"
;; "bang bang the dream"
;; "truth recruit fascist"
;; "to the wealth saline")
;; ("accounting bran"
;; "rainy clouds gleam"
;; "cardiologist"
;; "yang trader eighteen"))
(map #(take 1 %) (map r ["mister sandman"
"give me a dream"
"make him the cutest"
"that i've ever seen"]))
(take 3 frp/words)
(phrase->word frp/popular "well-off")
(map (fn [line] (phrase->word frp/popular line))
@ -180,13 +359,15 @@
"give me dream"
"make him the cutest"
"that i've ever seen"])
(defonce lovecraft-markov (read-string (slurp "lovecraft.edn")))
(def adj (comp (adjust-for-markov lovecraft-markov 0.9)
(adjust-for-rimes words-map 0.9)))
(repeatedly 10 #(gen-prhymes frp/popular
adj
["i'm testing rhyme software"
"what do you think"]))
(->> (gen-prhymes frp/popular
adj
["mister sandman"
"give me dream"
"make him the cutest"
"that i've ever seen"]))
(take 5 (filter #(= 7 (phrase-syllable-count (first %)))
(repeatedly #(gen-prhymes frp/popular adj ["taylor is my beautiful"]))))

@ -72,6 +72,31 @@
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
(defn window [n]
(fn [coll]
(cond
(empty? coll) []
(< (count coll) n) []
:else (cons (take n coll)
(lazy-seq ((window n) (drop n coll)))))))
(defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
(defn make-markov [text]
(let [tokens (reverse (string/split (clean-text text) #"\s+"))]
(reduce
(fn [a [t1 t2]]
(update-in a [t1 t2] (fnil inc 0)))
{}
((window 2) tokens))))
(defn write-markov [filename markov]
(spit filename (pr-str markov)))
(defn read-markov [filename]
(read-string (slurp filename)))
(defn take-through
"(take-through even? [1 2 3 4 7 7 5 2 8 10])
returns '((1 2 3 4) (7 7 5 2) (8) (10))"

Loading…
Cancel
Save