main
Eric Ihli 4 years ago
parent c2d3882854
commit 94f32d0a2f

@ -139,7 +139,8 @@
{} {}
((util/window (inc n)) tokens)))) ((util/window (inc n)) tokens))))
(def darkov-2 (util/read-markov "dark-corpus-2.edn")) (defn read-darkov-2 []
(util/read-markov (io/resource "dark-corpus-2.edn")))
(defn norm-filepath [text] (defn norm-filepath [text]
(-> text (-> text
@ -167,7 +168,8 @@
artist-album-texts))) artist-album-texts)))
(comment (comment
(def darkov-2 (util/read-markov "dark-corpus-2.edn")) (def darkov-2 (util/read-markov (io/resource "dark-corpus-2.edn")))
(take 10 darkov-2)
(get darkov-2 '(nil nil)) (get darkov-2 '(nil nil))
(take 3 (scrape base-url)) (take 3 (scrape base-url))
(-main) (-main)
@ -200,11 +202,4 @@
(make-markov (slurp "darklyrics.txt") 1)))) (make-markov (slurp "darklyrics.txt") 1))))
(run! write-scrape (take 4 (scrape base-url))) (run! write-scrape (take 4 (scrape base-url)))
(take 100 darkov) (def lyrics (scrape base-url)))
(util/write-markov "darklyrics.edn" darkov)
(spit "test.txt" (pr-str {:foo "1"}))
(def lyrics (scrape base-url))
(with-open [writer (io/writer "darklyrics.txt")]
(run!
#(.write writer %)
(take 200 (filter english? lyrics)))))

@ -15,25 +15,6 @@
{} {}
((util/window (inc n)) tokens))) ((util/window (inc n)) tokens)))
(defn -main [directory]
(let [directory (io/file "dark-corpus")
files (file-seq directory)
lines (->> files
(remove #(.isDirectory %))
(map #(slurp %))
(map #(string/split % #"\n+"))
#_(map #(util/extend-coll % nil 1)))
markovs (->> lines
(map #(make-markov % 1)))]
(take 1 markovs)))
(merge-with
(fn [a b]
(+ (if (nil? a) 0 a)
b))
{:foo 1}
{:bar 2 :foo 3})
(defn merge-markov [& maps] (defn merge-markov [& maps]
(apply (apply
merge-with merge-with
@ -56,8 +37,8 @@
"them" 50 "them" 50
"baz" 99}})) "baz" 99}}))
(defn gen-markov [] (defn gen-markov [directory]
(->> (file-seq (io/file "dark-corpus")) (->> (file-seq (io/file directory))
(remove #(.isDirectory %)) (remove #(.isDirectory %))
(map #(slurp %)) (map #(slurp %))
(map clean-text) (map clean-text)
@ -69,27 +50,4 @@
(map #(util/extend-coll % nil 2)) (map #(util/extend-coll % nil 2))
(map #(make-markov % 2)) (map #(make-markov % 2))
(apply merge-markov) (apply merge-markov)
(util/write-markov "dark-corpus-2.edn"))) (util/write-markov "resources/dark-corpus-2.edn")))
(comment
(gen-markov)
(->> (file-seq (io/file "dark-corpus"))
(remove #(.isDirectory %))
(map #(slurp %))
(map clean-text)
(filter util/english?)
(map #(string/split % #"\n+"))
(flatten)
(map #(string/split % #"\s+"))
(map reverse)
(map #(util/extend-coll % nil 2))
(map #(make-markov % 2))
(apply merge-markov)
(util/write-markov "dark-corpus-2.edn"))
(def darkov-2 (util/read-markov "dark-corpus-2.edn"))
(def darkov-1 (util/read-markov "dark-corpus-1.edn"))
(get darkov-2 '(nil nil))
(darkov-1 '("london"))
(-main "dark-lyrics"))

@ -25,6 +25,75 @@
(def nouns (def nouns
(set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt")))))) (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
(defrecord Word [word syllables syllable-count rimes onsets nuclei])
;;; Typical rhyme model (explanation of following 3 functions)
;;
;; In the typical theory of syllable structure, the general structure of a
;; syllable (σ) consists of three segments. These segments are grouped into two
;; components:
;;
;; Onset (ω)
;; a consonant or consonant cluster, obligatory in some languages,
;; optional or even restricted in others
;;
;; Rime (ρ)
;; right branch, contrasts with onset, splits into nucleus and coda
;;
;; Nucleus (ν)
;; a vowel or syllabic consonant, obligatory in most languages
;; Coda (κ)
;; consonant, optional in some languages, highly restricted or prohibited in others
(defn rimes [syllables]
(->> syllables
(map reverse)
(map #(first (u/take-through u/vowel %)))
(map reverse)))
(defn onset+nucleus [syllables]
(->> syllables
(map #(first (u/take-through u/vowel %)))))
(defn nucleus [syllables]
(map #(list (last (first (u/take-through u/vowel %)))) syllables))
(defn make-word [word]
(let [syllables (s/syllabify (rest word))
rimes (rimes syllables)
onsets (onset+nucleus syllables)
nuclei (nucleus syllables)]
(->> (->Word
(first word)
syllables
(count syllables)
rimes
onsets
nuclei)
(#(assoc % :norm-word (string/lower-case
(string/replace
(:word %)
#"\(\d+\)"
"")))))))
(defn make-word-1 [word phonemes]
(let [syllables (s/syllabify phonemes)
rimes (rimes syllables)
onsets (onset+nucleus syllables)
nuclei (nucleus syllables)]
(->> (->Word
(string/lower-case word)
syllables
(count syllables)
rimes
onsets
nuclei)
;; CMU dict has multiple pronounciations for some words.
;; foobar(1), foobar(2), etc...
;; it's useful to have the normalized word for situations
;; when you don't care how it's pronounced.
(#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" ""))))))
(defn words-by-rime* [words] (defn words-by-rime* [words]
(let [words-with-rime (->> words (let [words-with-rime (->> words
(map rest) (map rest)
@ -129,35 +198,6 @@
(defn filter-to-syllable-count [n words] (defn filter-to-syllable-count [n words]
(filter (fn [word] (= n (count (s/syllabify (rest word))))) words)) (filter (fn [word] (= n (count (s/syllabify (rest word))))) words))
(defn rimes [syllables]
(->> syllables
(map reverse)
(map #(first (u/take-through u/vowel %)))
(map reverse)))
(defn rimes? [a b]
(cond
(and (= 1 (count (last (:rimes a))))
(= 1 (count (last (:rimes b))))
(or (= (last (:rimes a)) '("ER"))
(= (last (:rimes a)) '("AA"))
(= (last (:rimes a)) '("AE"))
(= (last (:rimes a)) '("AO"))
(= (last (:rimes a)) '("AW"))
(= (last (:rimes a)) '("EH"))
(= (last (:rimes a)) '("IH"))
(= (last (:rimes a)) '("UH"))
(= (last (:rimes a)) '("AH"))))
(= (list (first (take-last 2 (:nuclei a)))
(last (:onsets a)))
(list (first (take-last 2 (:nuclei b)))
(last (:onsets b))))
(and (= 1 (count (last (:rimes a))))
(= 1 (count (last (:rimes b)))))
(= (take-last 2 (:nuclei a)) (take-last 2 (:nuclei b)))
:else (= (last (:rimes a)) (last (:rimes b)))))
(defn rhymes? (defn rhymes?
"What does it mean for something to rhyme?" "What does it mean for something to rhyme?"
@ -185,13 +225,6 @@
:else (= (last (:rimes a)) (last (:rimes b))))) :else (= (last (:rimes a)) (last (:rimes b)))))
(defn onset+nucleus [syllables]
(->> syllables
(map #(first (u/take-through u/vowel %)))))
(defn nucleus [syllables]
(map #(list (last (first (u/take-through u/vowel %)))) syllables))
(defn rhyming-word (defn rhyming-word
"Simple lookup in data. "Simple lookup in data.
Data is a tree of syllables to words. Data is a tree of syllables to words.

@ -1,6 +1,5 @@
(ns com.owoga.prhyme.frp (ns com.owoga.prhyme.frp
(:require [clojure.java.io :as io] (:require [clojure.java.io :as io]
[clojure.pprint :as pprint]
[clojure.string :as string] [clojure.string :as string]
[clojure.set :as set] [clojure.set :as set]
[com.owoga.prhyme.core :as p] [com.owoga.prhyme.core :as p]

@ -81,30 +81,35 @@
the tail of a target." the tail of a target."
[percent] [percent]
(fn [[words target result]] (fn [[words target result]]
(if (empty? result) (let [words-with-rime-count
(let [words-with-rime-count
(map
(fn [word]
(assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map (map
(fn [word] (fn [word]
(as-> word word (assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
(assoc word :weight (* adjustment-rhyming (:weight word))) words)
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming) [rhyming non-rhyming]
non-rhyming) ((juxt filter remove)
target #(< 0 (:num-matching %))
result]) words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])))
(defn adjust-for-tail-rhyme
[percent]
(fn [[words target result]]
(if (empty? result)
((adjust-for-rhymes percent) [words target result])
[words target result]))) [words target result])))

@ -3,12 +3,13 @@
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection] [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
[clojure.string :as string] [clojure.string :as string]
[com.owoga.prhyme.frp :as frp] [com.owoga.prhyme.frp :as frp]
[com.owoga.corpus.darklyrics :as darklyrics])) [taoensso.nippy :as nippy]
[clojure.java.io :as io]))
(defn rhyme-from-scheme (defn rhyme-from-scheme
"scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]" "scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
[rhymer scheme] [rhymer markov scheme]
(let [base-words (map #(assoc % :weight 1) frp/popular)] (let [base-words (map #(assoc % :weight 1) frp/popular)]
(loop [scheme scheme (loop [scheme scheme
rhymes {} rhymes {}
@ -25,7 +26,7 @@
(remove (remove
nil? nil?
[(weighted-selection/adjust-for-markov [(weighted-selection/adjust-for-markov
darklyrics/darkov-2 markov
0.99) 0.99)
(when (rhymes pattern) (when (rhymes pattern)
(weighted-selection/adjust-for-tail-rhyme 0.99))])) (weighted-selection/adjust-for-tail-rhyme 0.99))]))
@ -45,16 +46,12 @@
(conj result rhyme))))))) (conj result rhyme)))))))
(comment (comment
(rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8))) (time (def darkov-2 (nippy/thaw-from-file (io/resource "darkov-2.bin"))))
(rhyme-from-scheme nil darkov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
) )
(comment (comment
(->> (repeatedly
(fn []
(rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7)))))
(take 2))
(apply map vector (list '(1 2 3) '(4 5 6)))
(->> (gen/selection-seq (->> (gen/selection-seq
(map #(assoc % :weight 1) frp/words) (map #(assoc % :weight 1) frp/words)
(weighted-selection/adjust-for-rhymes 0.99) (weighted-selection/adjust-for-rhymes 0.99)
@ -86,3 +83,5 @@
"hate is my virtue" "hate is my virtue"
"my feelings are well overdue" "my feelings are well overdue"
"war we await the afterlife"]) "war we await the afterlife"])

@ -58,8 +58,7 @@
(empty? onset) (recur (rest phones) [(first phones)]) (empty? onset) (recur (rest phones) [(first phones)])
(not (>sonorous (first phones) (last onset))) [onset phones] (not (>sonorous (first phones) (last onset))) [onset phones]
:else (recur (rest phones) (conj onset (first phones))))))) :else (recur (rest phones) (conj onset (first phones)))))))
(comment
(slurp-onset (reverse ["B" "W"])))
(defn fix-lax (defn fix-lax
"https://www.reddit.com/r/phonetics/comments/i7hp5f/what_is_the_alaska_rule_in_reference_to/ "https://www.reddit.com/r/phonetics/comments/i7hp5f/what_is_the_alaska_rule_in_reference_to/
@ -103,22 +102,35 @@
(comment (comment
(syllabify ["AH" "L" "AE" "S" "K" "AH"]) (syllabify ["AH" "L" "AE" "S" "K" "AH"])
;; => (("AH") ("L" "AE" "S") ("K" "AH"))
(syllabify ["H" "ER" "AH" "L" "D"]) (syllabify ["H" "ER" "AH" "L" "D"])
;; => (("H" "ER") ("AH" "L" "D"))
(syllabify ["H" "EH" "R" "AH" "L" "D"]) (syllabify ["H" "EH" "R" "AH" "L" "D"])
;; => (("H" "EH") ("R" "AH" "L" "D"))
(syllabify ["B" "OY" "N" "K"]) (syllabify ["B" "OY" "N" "K"])
(syllabify ["H" "ER" "AH" "L" "D"]) ;; => (("B" "OY" "N" "K"))
(syllabify ["G" "L" "IH" "M" "P" "S" "T"]) (syllabify ["G" "L" "IH" "M" "P" "S" "T"])
;; => (("G" "L" "IH" "M" "P" "S" "T"))
(syllabify ["B" "IY" "G" "L" "IH" "M" "P" "S" "T"]) (syllabify ["B" "IY" "G" "L" "IH" "M" "P" "S" "T"])
;; => (("B" "IY") ("G" "L" "IH" "M" "P" "S" "T"))
(syllabify ["G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]) (syllabify ["G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"])
;; => (("G" "L" "IH" "M" "P" "S") ("T" "R" "EH" "D"))
(syllabify ["UH" "P" "R" "AY" "S" "IY" "NG"]) (syllabify ["UH" "P" "R" "AY" "S" "IY" "NG"])
;; => (("UH") ("P" "R" "AY") ("S" "IY" "NG"))
(syllabify ["UH" "L" "AE" "S" "K" "UH"]) (syllabify ["UH" "L" "AE" "S" "K" "UH"])
;; => (("UH") ("L" "AE" "S") ("K" "UH"))
(syllabify ["R" "OY" "AH" "L"]) (syllabify ["R" "OY" "AH" "L"])
;; => (("R" "OY") ("AH" "L"))
(syllabify ["R" "AY" "AH" "L"]) (syllabify ["R" "AY" "AH" "L"])
;; => (("R" "AY") ("AH" "L"))
;; TODO: Fix below wi-thcheeze
(syllabify ["R" "OY" "AH" "L" "W" "IH" "TH" "CH" "IY" "Z"]) (syllabify ["R" "OY" "AH" "L" "W" "IH" "TH" "CH" "IY" "Z"])
;; => (("R" "OY") ("AH" "L") ("W" "IH") ("TH" "CH" "IY" "Z"))
;;
;; ["GLIMPSED" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "IY" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "EH" "G" "L" "IH" "M" "P" "S" "T"]
;; ["GLIMSTEST" "G" "L" "IH" "M" "S" "T" "EH" "S" "T"]
;; ["GLIMPSTRED" "G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]
;; ["GLIMSTRED" "G" "L" "IH" "M" "S" "T" "R" "EH" "D"]
) )
;; ["GLIMPSED" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "IY" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "EH" "G" "L" "IH" "M" "P" "S" "T"]
;; ["GLIMSTEST" "G" "L" "IH" "M" "S" "T" "EH" "S" "T"]
;; ["GLIMPSTRED" "G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]
;; ["GLIMSTRED" "G" "L" "IH" "M" "S" "T" "R" "EH" "D"]

Loading…
Cancel
Save