main
Eric Ihli 4 years ago
parent c2d3882854
commit 94f32d0a2f

@ -139,7 +139,8 @@
{}
((util/window (inc n)) tokens))))
(def darkov-2 (util/read-markov "dark-corpus-2.edn"))
(defn read-darkov-2 []
(util/read-markov (io/resource "dark-corpus-2.edn")))
(defn norm-filepath [text]
(-> text
@ -167,7 +168,8 @@
artist-album-texts)))
(comment
(def darkov-2 (util/read-markov "dark-corpus-2.edn"))
(def darkov-2 (util/read-markov (io/resource "dark-corpus-2.edn")))
(take 10 darkov-2)
(get darkov-2 '(nil nil))
(take 3 (scrape base-url))
(-main)
@ -200,11 +202,4 @@
(make-markov (slurp "darklyrics.txt") 1))))
(run! write-scrape (take 4 (scrape base-url)))
(take 100 darkov)
(util/write-markov "darklyrics.edn" darkov)
(spit "test.txt" (pr-str {:foo "1"}))
(def lyrics (scrape base-url))
(with-open [writer (io/writer "darklyrics.txt")]
(run!
#(.write writer %)
(take 200 (filter english? lyrics)))))
(def lyrics (scrape base-url)))

@ -15,25 +15,6 @@
{}
((util/window (inc n)) tokens)))
(defn -main [directory]
(let [directory (io/file "dark-corpus")
files (file-seq directory)
lines (->> files
(remove #(.isDirectory %))
(map #(slurp %))
(map #(string/split % #"\n+"))
#_(map #(util/extend-coll % nil 1)))
markovs (->> lines
(map #(make-markov % 1)))]
(take 1 markovs)))
(merge-with
(fn [a b]
(+ (if (nil? a) 0 a)
b))
{:foo 1}
{:bar 2 :foo 3})
(defn merge-markov [& maps]
(apply
merge-with
@ -56,8 +37,8 @@
"them" 50
"baz" 99}}))
(defn gen-markov []
(->> (file-seq (io/file "dark-corpus"))
(defn gen-markov [directory]
(->> (file-seq (io/file directory))
(remove #(.isDirectory %))
(map #(slurp %))
(map clean-text)
@ -69,27 +50,4 @@
(map #(util/extend-coll % nil 2))
(map #(make-markov % 2))
(apply merge-markov)
(util/write-markov "dark-corpus-2.edn")))
(comment
(gen-markov)
(->> (file-seq (io/file "dark-corpus"))
(remove #(.isDirectory %))
(map #(slurp %))
(map clean-text)
(filter util/english?)
(map #(string/split % #"\n+"))
(flatten)
(map #(string/split % #"\s+"))
(map reverse)
(map #(util/extend-coll % nil 2))
(map #(make-markov % 2))
(apply merge-markov)
(util/write-markov "dark-corpus-2.edn"))
(def darkov-2 (util/read-markov "dark-corpus-2.edn"))
(def darkov-1 (util/read-markov "dark-corpus-1.edn"))
(get darkov-2 '(nil nil))
(darkov-1 '("london"))
(-main "dark-lyrics"))
(util/write-markov "resources/dark-corpus-2.edn")))

@ -25,6 +25,75 @@
(def nouns
(set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
(defrecord Word [word syllables syllable-count rimes onsets nuclei])
;;; Typical rhyme model (explanation of following 3 functions)
;;
;; In the typical theory of syllable structure, the general structure of a
;; syllable (σ) consists of three segments. These segments are grouped into two
;; components:
;;
;; Onset (ω)
;; a consonant or consonant cluster, obligatory in some languages,
;; optional or even restricted in others
;;
;; Rime (ρ)
;; right branch, contrasts with onset, splits into nucleus and coda
;;
;; Nucleus (ν)
;; a vowel or syllabic consonant, obligatory in most languages
;; Coda (κ)
;; consonant, optional in some languages, highly restricted or prohibited in others
(defn rimes [syllables]
(->> syllables
(map reverse)
(map #(first (u/take-through u/vowel %)))
(map reverse)))
(defn onset+nucleus [syllables]
(->> syllables
(map #(first (u/take-through u/vowel %)))))
(defn nucleus [syllables]
(map #(list (last (first (u/take-through u/vowel %)))) syllables))
(defn make-word [word]
(let [syllables (s/syllabify (rest word))
rimes (rimes syllables)
onsets (onset+nucleus syllables)
nuclei (nucleus syllables)]
(->> (->Word
(first word)
syllables
(count syllables)
rimes
onsets
nuclei)
(#(assoc % :norm-word (string/lower-case
(string/replace
(:word %)
#"\(\d+\)"
"")))))))
(defn make-word-1 [word phonemes]
(let [syllables (s/syllabify phonemes)
rimes (rimes syllables)
onsets (onset+nucleus syllables)
nuclei (nucleus syllables)]
(->> (->Word
(string/lower-case word)
syllables
(count syllables)
rimes
onsets
nuclei)
;; CMU dict has multiple pronounciations for some words.
;; foobar(1), foobar(2), etc...
;; it's useful to have the normalized word for situations
;; when you don't care how it's pronounced.
(#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" ""))))))
(defn words-by-rime* [words]
(let [words-with-rime (->> words
(map rest)
@ -129,35 +198,6 @@
(defn filter-to-syllable-count [n words]
(filter (fn [word] (= n (count (s/syllabify (rest word))))) words))
(defn rimes [syllables]
(->> syllables
(map reverse)
(map #(first (u/take-through u/vowel %)))
(map reverse)))
(defn rimes? [a b]
(cond
(and (= 1 (count (last (:rimes a))))
(= 1 (count (last (:rimes b))))
(or (= (last (:rimes a)) '("ER"))
(= (last (:rimes a)) '("AA"))
(= (last (:rimes a)) '("AE"))
(= (last (:rimes a)) '("AO"))
(= (last (:rimes a)) '("AW"))
(= (last (:rimes a)) '("EH"))
(= (last (:rimes a)) '("IH"))
(= (last (:rimes a)) '("UH"))
(= (last (:rimes a)) '("AH"))))
(= (list (first (take-last 2 (:nuclei a)))
(last (:onsets a)))
(list (first (take-last 2 (:nuclei b)))
(last (:onsets b))))
(and (= 1 (count (last (:rimes a))))
(= 1 (count (last (:rimes b)))))
(= (take-last 2 (:nuclei a)) (take-last 2 (:nuclei b)))
:else (= (last (:rimes a)) (last (:rimes b)))))
(defn rhymes?
"What does it mean for something to rhyme?"
@ -185,13 +225,6 @@
:else (= (last (:rimes a)) (last (:rimes b)))))
(defn onset+nucleus [syllables]
(->> syllables
(map #(first (u/take-through u/vowel %)))))
(defn nucleus [syllables]
(map #(list (last (first (u/take-through u/vowel %)))) syllables))
(defn rhyming-word
"Simple lookup in data.
Data is a tree of syllables to words.

@ -1,6 +1,5 @@
(ns com.owoga.prhyme.frp
(:require [clojure.java.io :as io]
[clojure.pprint :as pprint]
[clojure.string :as string]
[clojure.set :as set]
[com.owoga.prhyme.core :as p]

@ -81,30 +81,35 @@
the tail of a target."
[percent]
(fn [[words target result]]
(if (empty? result)
(let [words-with-rime-count
(map
(fn [word]
(assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(let [words-with-rime-count
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])
(assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
words)
[rhyming non-rhyming]
((juxt filter remove)
#(< 0 (:num-matching %))
words-with-rime-count)
weight-non-rhyming (apply + (map :weight non-rhyming))
target-weight-rhyming (* 100 percent weight-non-rhyming)
count-rhyming (count rhyming)
adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
[(concat
(map
(fn [word]
(as-> word word
(assoc word :weight (* adjustment-rhyming (:weight word)))
(assoc word :adjustment-for-rimes adjustment-rhyming)))
rhyming)
non-rhyming)
target
result])))
(defn adjust-for-tail-rhyme
[percent]
(fn [[words target result]]
(if (empty? result)
((adjust-for-rhymes percent) [words target result])
[words target result])))

@ -3,12 +3,13 @@
[com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
[clojure.string :as string]
[com.owoga.prhyme.frp :as frp]
[com.owoga.corpus.darklyrics :as darklyrics]))
[taoensso.nippy :as nippy]
[clojure.java.io :as io]))
(defn rhyme-from-scheme
"scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
[rhymer scheme]
[rhymer markov scheme]
(let [base-words (map #(assoc % :weight 1) frp/popular)]
(loop [scheme scheme
rhymes {}
@ -25,7 +26,7 @@
(remove
nil?
[(weighted-selection/adjust-for-markov
darklyrics/darkov-2
markov
0.99)
(when (rhymes pattern)
(weighted-selection/adjust-for-tail-rhyme 0.99))]))
@ -45,16 +46,12 @@
(conj result rhyme)))))))
(comment
(rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8)))
(time (def darkov-2 (nippy/thaw-from-file (io/resource "darkov-2.bin"))))
(rhyme-from-scheme nil darkov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
)
(comment
(->> (repeatedly
(fn []
(rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7)))))
(take 2))
(apply map vector (list '(1 2 3) '(4 5 6)))
(->> (gen/selection-seq
(map #(assoc % :weight 1) frp/words)
(weighted-selection/adjust-for-rhymes 0.99)
@ -86,3 +83,5 @@
"hate is my virtue"
"my feelings are well overdue"
"war we await the afterlife"])

@ -58,8 +58,7 @@
(empty? onset) (recur (rest phones) [(first phones)])
(not (>sonorous (first phones) (last onset))) [onset phones]
:else (recur (rest phones) (conj onset (first phones)))))))
(comment
(slurp-onset (reverse ["B" "W"])))
(defn fix-lax
"https://www.reddit.com/r/phonetics/comments/i7hp5f/what_is_the_alaska_rule_in_reference_to/
@ -103,22 +102,35 @@
(comment
(syllabify ["AH" "L" "AE" "S" "K" "AH"])
;; => (("AH") ("L" "AE" "S") ("K" "AH"))
(syllabify ["H" "ER" "AH" "L" "D"])
;; => (("H" "ER") ("AH" "L" "D"))
(syllabify ["H" "EH" "R" "AH" "L" "D"])
;; => (("H" "EH") ("R" "AH" "L" "D"))
(syllabify ["B" "OY" "N" "K"])
(syllabify ["H" "ER" "AH" "L" "D"])
;; => (("B" "OY" "N" "K"))
(syllabify ["G" "L" "IH" "M" "P" "S" "T"])
;; => (("G" "L" "IH" "M" "P" "S" "T"))
(syllabify ["B" "IY" "G" "L" "IH" "M" "P" "S" "T"])
;; => (("B" "IY") ("G" "L" "IH" "M" "P" "S" "T"))
(syllabify ["G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"])
;; => (("G" "L" "IH" "M" "P" "S") ("T" "R" "EH" "D"))
(syllabify ["UH" "P" "R" "AY" "S" "IY" "NG"])
;; => (("UH") ("P" "R" "AY") ("S" "IY" "NG"))
(syllabify ["UH" "L" "AE" "S" "K" "UH"])
;; => (("UH") ("L" "AE" "S") ("K" "UH"))
(syllabify ["R" "OY" "AH" "L"])
;; => (("R" "OY") ("AH" "L"))
(syllabify ["R" "AY" "AH" "L"])
;; => (("R" "AY") ("AH" "L"))
;; TODO: Fix below wi-thcheeze
(syllabify ["R" "OY" "AH" "L" "W" "IH" "TH" "CH" "IY" "Z"])
;; => (("R" "OY") ("AH" "L") ("W" "IH") ("TH" "CH" "IY" "Z"))
;;
;; ["GLIMPSED" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "IY" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "EH" "G" "L" "IH" "M" "P" "S" "T"]
;; ["GLIMSTEST" "G" "L" "IH" "M" "S" "T" "EH" "S" "T"]
;; ["GLIMPSTRED" "G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]
;; ["GLIMSTRED" "G" "L" "IH" "M" "S" "T" "R" "EH" "D"]
)
;; ["GLIMPSED" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "IY" "G" "L" "IH" "M" "P" "S" "T"]
;; ["BEGLIMPSED" "B" "EH" "G" "L" "IH" "M" "P" "S" "T"]
;; ["GLIMSTEST" "G" "L" "IH" "M" "S" "T" "EH" "S" "T"]
;; ["GLIMPSTRED" "G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]
;; ["GLIMSTRED" "G" "L" "IH" "M" "S" "T" "R" "EH" "D"]

Loading…
Cancel
Save