Cleanup

5 years ago · 94f32d0a2f
parent c2d3882854
commit 94f32d0a2f
7 changed files with 136 additions and 135 deletions
--- a/src/com/owoga/corpus/darklyrics.clj
+++ b/src/com/owoga/corpus/darklyrics.clj
@ -139,7 +139,8 @@
     {}
     ((util/window (inc n)) tokens))))
-(def darkov-2 (util/read-markov "dark-corpus-2.edn"))
+(defn read-darkov-2 []
  (util/read-markov (io/resource "dark-corpus-2.edn")))
 (defn norm-filepath [text]
  (-> text
@ -167,7 +168,8 @@
     artist-album-texts)))
 (comment
-  (def darkov-2 (util/read-markov "dark-corpus-2.edn"))
+  (def darkov-2 (util/read-markov (io/resource "dark-corpus-2.edn")))
  (take 10 darkov-2)
  (get darkov-2 '(nil nil))
  (take 3 (scrape base-url))
  (-main)
@ -200,11 +202,4 @@
          (make-markov (slurp "darklyrics.txt") 1))))
  (run! write-scrape (take 4 (scrape base-url)))
-  (take 100 darkov)
+  (def lyrics (scrape base-url)))
  (util/write-markov "darklyrics.edn" darkov)
  (spit "test.txt" (pr-str {:foo "1"}))
  (def lyrics (scrape base-url))
  (with-open [writer (io/writer "darklyrics.txt")]
    (run!
     #(.write writer %)
     (take 200 (filter english? lyrics)))))
--- a/src/com/owoga/corpus/markov.clj
+++ b/src/com/owoga/corpus/markov.clj
@ -15,25 +15,6 @@
   {}
   ((util/window (inc n)) tokens)))
 (defn -main [directory]
  (let [directory (io/file "dark-corpus")
        files (file-seq directory)
        lines (->> files
                   (remove #(.isDirectory %))
                   (map #(slurp %))
                   (map #(string/split % #"\n+"))
                   #_(map #(util/extend-coll % nil 1)))
        markovs (->> lines
                     (map #(make-markov % 1)))]
    (take 1 markovs)))
 (merge-with
 (fn [a b]
   (+ (if (nil? a) 0 a)
      b))
 {:foo 1}
 {:bar 2 :foo 3})
 (defn merge-markov [& maps]
  (apply
   merge-with
@ -56,8 +37,8 @@
               "them" 50
               "baz" 99}}))
-(defn gen-markov []
+(defn gen-markov [directory]
-  (->> (file-seq (io/file "dark-corpus"))
+  (->> (file-seq (io/file directory))
       (remove #(.isDirectory %))
       (map #(slurp %))
       (map clean-text)
@ -69,27 +50,4 @@
       (map #(util/extend-coll % nil 2))
       (map #(make-markov % 2))
       (apply merge-markov)
-       (util/write-markov "dark-corpus-2.edn")))
+       (util/write-markov "resources/dark-corpus-2.edn")))
 (comment
  (gen-markov)
  (->> (file-seq (io/file "dark-corpus"))
       (remove #(.isDirectory %))
       (map #(slurp %))
       (map clean-text)
       (filter util/english?)
       (map #(string/split % #"\n+"))
       (flatten)
       (map #(string/split % #"\s+"))
       (map reverse)
       (map #(util/extend-coll % nil 2))
       (map #(make-markov % 2))
       (apply merge-markov)
       (util/write-markov "dark-corpus-2.edn"))
  (def darkov-2 (util/read-markov "dark-corpus-2.edn"))
  (def darkov-1 (util/read-markov "dark-corpus-1.edn"))
  (get darkov-2 '(nil nil))
  (darkov-1 '("london"))
  (-main "dark-lyrics"))
--- a/src/com/owoga/prhyme/core.clj
+++ b/src/com/owoga/prhyme/core.clj
@ -25,6 +25,75 @@
 (def nouns
  (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
 (defrecord Word [word syllables syllable-count rimes onsets nuclei])
 ;;; Typical rhyme model (explanation of following 3 functions)
 ;;
 ;; In the typical theory of syllable structure, the general structure of a
 ;; syllable (σ) consists of three segments. These segments are grouped into two
 ;; components:
 ;;
 ;; Onset (ω)
 ;;     a consonant or consonant cluster, obligatory in some languages,
 ;;     optional or even restricted in others
 ;;    
 ;; Rime (ρ)
 ;;     right branch, contrasts with onset, splits into nucleus and coda
 ;;
 ;;     Nucleus (ν)
 ;;         a vowel or syllabic consonant, obligatory in most languages
 ;;     Coda (κ)
 ;;         consonant, optional in some languages, highly restricted or prohibited in others
 (defn rimes [syllables]
  (->> syllables
       (map reverse)
       (map #(first (u/take-through u/vowel %)))
       (map reverse)))
 (defn onset+nucleus [syllables]
  (->> syllables
       (map #(first (u/take-through u/vowel %)))))
 (defn nucleus [syllables]
  (map #(list (last (first (u/take-through u/vowel %)))) syllables))
 (defn make-word [word]
  (let [syllables (s/syllabify (rest word))
        rimes     (rimes syllables)
        onsets    (onset+nucleus syllables)
        nuclei    (nucleus syllables)]
    (->> (->Word
          (first word)
          syllables
          (count syllables)
          rimes
          onsets
          nuclei)
         (#(assoc % :norm-word (string/lower-case
                                (string/replace
                                 (:word %)
                                 #"\(\d+\)"
                                 "")))))))
 (defn make-word-1 [word phonemes]
  (let [syllables (s/syllabify phonemes)
        rimes     (rimes syllables)
        onsets    (onset+nucleus syllables)
        nuclei    (nucleus syllables)]
    (->> (->Word
          (string/lower-case word)
          syllables
          (count syllables)
          rimes
          onsets
          nuclei)
         ;; CMU dict has multiple pronounciations for some words.
         ;; foobar(1), foobar(2), etc...
         ;; it's useful to have the normalized word for situations
         ;; when you don't care how it's pronounced.
         (#(assoc % :normalized-word (string/replace (:word %) #"\(\d+\)" ""))))))
 (defn words-by-rime* [words]
  (let [words-with-rime (->> words
                             (map rest)
@ -129,35 +198,6 @@
 (defn filter-to-syllable-count [n words]
  (filter (fn [word] (= n (count (s/syllabify (rest word))))) words))
 (defn rimes [syllables]
  (->> syllables
       (map reverse)
       (map #(first (u/take-through u/vowel %)))
       (map reverse)))
 (defn rimes? [a b]
  (cond
    (and (= 1 (count (last (:rimes a))))
         (= 1 (count (last (:rimes b))))
         (or (= (last (:rimes a)) '("ER"))
             (= (last (:rimes a)) '("AA"))
             (= (last (:rimes a)) '("AE"))
             (= (last (:rimes a)) '("AO"))
             (= (last (:rimes a)) '("AW"))
             (= (last (:rimes a)) '("EH"))
             (= (last (:rimes a)) '("IH"))
             (= (last (:rimes a)) '("UH"))
             (= (last (:rimes a)) '("AH"))))
    (= (list (first (take-last 2 (:nuclei a)))
             (last (:onsets a)))
       (list (first (take-last 2 (:nuclei b)))
             (last (:onsets b))))
    (and (= 1 (count (last (:rimes a))))
         (= 1 (count (last (:rimes b)))))
    (= (take-last 2 (:nuclei a)) (take-last 2 (:nuclei b)))
    :else (= (last (:rimes a)) (last (:rimes b)))))
 (defn rhymes?
  "What does it mean for something to rhyme?"
@ -185,13 +225,6 @@
    :else (= (last (:rimes a)) (last (:rimes b)))))
 (defn onset+nucleus [syllables]
  (->> syllables
       (map #(first (u/take-through u/vowel %)))))
 (defn nucleus [syllables]
  (map #(list (last (first (u/take-through u/vowel %)))) syllables))
 (defn rhyming-word
  "Simple lookup in data.
  Data is a tree of syllables to words.
--- a/src/com/owoga/prhyme/frp.clj
+++ b/src/com/owoga/prhyme/frp.clj
@ -1,6 +1,5 @@
 (ns com.owoga.prhyme.frp
  (:require [clojure.java.io :as io]
            [clojure.pprint :as pprint]
            [clojure.string :as string]
            [clojure.set :as set]
            [com.owoga.prhyme.core :as p]
--- a/src/com/owoga/prhyme/generation/weighted_selection.clj
+++ b/src/com/owoga/prhyme/generation/weighted_selection.clj
@ -81,30 +81,35 @@
  the tail of a target."
  [percent]
  (fn [[words target result]]
-    (if (empty? result)
+    (let [words-with-rime-count
      (let [words-with-rime-count
            (map
             (fn [word]
               (assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
             words)
            [rhyming non-rhyming]
            ((juxt filter remove)
             #(< 0 (:num-matching %))
             words-with-rime-count)
            weight-non-rhyming (apply + (map :weight non-rhyming))
            target-weight-rhyming (* 100 percent weight-non-rhyming)
            count-rhyming (count rhyming)
            adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
        [(concat
          (map
           (fn [word]
-             (as-> word word
+             (assoc word :num-matching (if (prhyme/rhymes? target word) 1 0)))
-               (assoc word :weight (* adjustment-rhyming (:weight word)))
+           words)
-               (assoc word :adjustment-for-rimes adjustment-rhyming)))
+
-           rhyming)
+          [rhyming non-rhyming]
-          non-rhyming)
+          ((juxt filter remove)
-         target
+           #(< 0 (:num-matching %))
-         result])
+           words-with-rime-count)
          weight-non-rhyming (apply + (map :weight non-rhyming))
          target-weight-rhyming (* 100 percent weight-non-rhyming)
          count-rhyming (count rhyming)
          adjustment-rhyming (if (= 0 count-rhyming) 1 (/ target-weight-rhyming count-rhyming))]
      [(concat
        (map
         (fn [word]
           (as-> word word
             (assoc word :weight (* adjustment-rhyming (:weight word)))
             (assoc word :adjustment-for-rimes adjustment-rhyming)))
         rhyming)
        non-rhyming)
       target
       result])))
 (defn adjust-for-tail-rhyme
  [percent]
  (fn [[words target result]]
    (if (empty? result)
      ((adjust-for-rhymes percent) [words target result])
      [words target result])))
--- a/src/com/owoga/prhyme/lymeric.clj
+++ b/src/com/owoga/prhyme/lymeric.clj
@ -3,12 +3,13 @@
            [com.owoga.prhyme.generation.weighted-selection :as weighted-selection]
            [clojure.string :as string]
            [com.owoga.prhyme.frp :as frp]
-            [com.owoga.corpus.darklyrics :as darklyrics]))
+            [taoensso.nippy :as nippy]
            [clojure.java.io :as io]))
 (defn rhyme-from-scheme
  "scheme of format [[A 9] [A 9] [B 5] [B 5] [A 9]]"
-  [rhymer scheme]
+  [rhymer markov scheme]
  (let [base-words (map #(assoc % :weight 1) frp/popular)]
    (loop [scheme scheme
           rhymes {}
@ -25,7 +26,7 @@
                   (remove
                    nil?
                    [(weighted-selection/adjust-for-markov
-                      darklyrics/darkov-2
+                      markov
                      0.99)
                     (when (rhymes pattern)
                       (weighted-selection/adjust-for-tail-rhyme 0.99))]))
@ -45,16 +46,12 @@
                 (conj result rhyme)))))))
 (comment
-  (rhyme-from-scheme nil '((A 8) (A 8) (B 5) (B 5) (A 8)))
+  (time (def darkov-2 (nippy/thaw-from-file (io/resource "darkov-2.bin"))))
  (rhyme-from-scheme nil darkov-2 '((A 8) (A 8) (B 5) (B 5) (A 8)))
  )
 (comment
  (->> (repeatedly
        (fn []
          (rhyme-from-scheme nil '((A 7) (A 7) (B 5) (B 5) (A 7)))))
       (take 2))
  (apply map vector (list '(1 2 3) '(4 5 6)))
  (->> (gen/selection-seq
        (map #(assoc % :weight 1) frp/words)
        (weighted-selection/adjust-for-rhymes 0.99)
@ -86,3 +83,5 @@
   "hate is my virtue"
   "my feelings are well overdue"
   "war we await the afterlife"])
--- a/src/com/owoga/prhyme/syllabify.clj
+++ b/src/com/owoga/prhyme/syllabify.clj
@ -58,8 +58,7 @@
       (empty? onset) (recur (rest phones) [(first phones)])
       (not (>sonorous (first phones) (last onset))) [onset phones]
       :else (recur (rest phones) (conj onset (first phones)))))))
-(comment
+
  (slurp-onset (reverse ["B" "W"])))
 (defn fix-lax
  "https://www.reddit.com/r/phonetics/comments/i7hp5f/what_is_the_alaska_rule_in_reference_to/
@ -103,22 +102,35 @@
 (comment
  (syllabify ["AH" "L" "AE" "S" "K" "AH"])
  ;; => (("AH") ("L" "AE" "S") ("K" "AH"))
  (syllabify ["H" "ER" "AH" "L" "D"])
  ;; => (("H" "ER") ("AH" "L" "D"))
  (syllabify ["H" "EH" "R" "AH" "L" "D"])
  ;; => (("H" "EH") ("R" "AH" "L" "D"))
  (syllabify ["B" "OY" "N" "K"])
-  (syllabify ["H" "ER" "AH" "L" "D"])
+  ;; => (("B" "OY" "N" "K"))
  (syllabify ["G" "L" "IH" "M" "P" "S" "T"])
  ;; => (("G" "L" "IH" "M" "P" "S" "T"))
  (syllabify ["B" "IY" "G" "L" "IH" "M" "P" "S" "T"])
  ;; => (("B" "IY") ("G" "L" "IH" "M" "P" "S" "T"))
  (syllabify ["G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"])
  ;; => (("G" "L" "IH" "M" "P" "S") ("T" "R" "EH" "D"))
  (syllabify ["UH" "P" "R" "AY" "S" "IY" "NG"])
  ;; => (("UH") ("P" "R" "AY") ("S" "IY" "NG"))
  (syllabify ["UH" "L" "AE" "S" "K" "UH"])
  ;; => (("UH") ("L" "AE" "S") ("K" "UH"))
  (syllabify ["R" "OY" "AH" "L"])
  ;; => (("R" "OY") ("AH" "L"))
  (syllabify ["R" "AY" "AH" "L"])
  ;; => (("R" "AY") ("AH" "L"))
  ;; TODO: Fix below wi-thcheeze
  (syllabify ["R" "OY" "AH" "L" "W" "IH" "TH" "CH" "IY" "Z"])
  ;; => (("R" "OY") ("AH" "L") ("W" "IH") ("TH" "CH" "IY" "Z"))
  ;;
  ;; ["GLIMPSED" "G" "L" "IH" "M" "P" "S" "T"]
  ;; ["BEGLIMPSED" "B" "IY" "G" "L" "IH" "M" "P" "S" "T"]
  ;; ["BEGLIMPSED" "B" "EH" "G" "L" "IH" "M" "P" "S" "T"]
  ;; ["GLIMSTEST" "G" "L" "IH" "M" "S" "T" "EH" "S" "T"]
  ;; ["GLIMPSTRED" "G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]
  ;; ["GLIMSTRED" "G" "L" "IH" "M" "S" "T" "R" "EH" "D"]
  )
 ;; ["GLIMPSED" "G" "L" "IH" "M" "P" "S" "T"]
 ;; ["BEGLIMPSED" "B" "IY" "G" "L" "IH" "M" "P" "S" "T"]
 ;; ["BEGLIMPSED" "B" "EH" "G" "L" "IH" "M" "P" "S" "T"]
 ;; ["GLIMSTEST" "G" "L" "IH" "M" "S" "T" "EH" "S" "T"]
 ;; ["GLIMPSTRED" "G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]
 ;; ["GLIMSTRED" "G" "L" "IH" "M" "S" "T" "R" "EH" "D"]