prhyme/dev/examples/core.clj

(ns examples.core
  (:require [clojure.string :as string]
            [clojure.set]
            [clojure.java.io :as io]
            [taoensso.nippy :as nippy]
            [taoensso.timbre :as timbre]
            [com.owoga.prhyme.frp :as frp]
            [com.owoga.prhyme.util :as util]
            [com.owoga.prhyme.core :as prhyme]
            [com.owoga.prhyme.data.bigrams :as bigrams]
            [com.owoga.prhyme.gen :as gen]
            [com.owoga.prhyme.nlp.core :as nlp]
            [com.owoga.prhyme.nlg.core :as nlg]
            [com.owoga.prhyme.data.dictionary :as dict]
            [com.owoga.prhyme.data.thesaurus :as thesaurus]
            [com.owoga.prhyme.data.darklyrics :as darklyrics]
            [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
            [com.owoga.prhyme.generation.weighted-selection :as weighted]
            [clojure.set :as set]
            [clojure.zip :as zip]
            [clojure.walk :as walk]))

(defn weight-fn [word target result]
  (let [rimes (frp/consecutive-matching word target :rimes)
        nuclei (frp/consecutive-matching word target :nuclei)
        onsets (frp/consecutive-matching word target :onsets)
        total (apply + (map count [rimes nuclei onsets]))]
    total))

(defn pred-fn [word target result]
  (< 0 (weight-fn word target result)))

(defn weight-popular [word target result]
  (if (dict/popular (:normalized-word word))
    10
    1))

(defn pred-popular [word target result]
  (< 1 (weight-popular word target result)))

(def words-by-rime (prhyme/words-by-rime*
                      (filter
                       (fn [[word & _]]
                         (get
                          dict/popular
                          (string/lower-case word)))
                       dict/cmu-dict)))

(defn rime-1 [target]
  (let [rime (last (:rimes target))]
    (fn [x]
      (= rime (last (:rimes x))))))

(defn rime-2 [target]
  (let [rime (last (butlast (:rimes target)))]
    (fn [x]
      (= rime (last (butlast (:rimes x)))))))

(defn rime-member? [coll]
  (let [coll (into #{} coll)]
    (fn [x]
      (coll (:normalized-word x)))))

(defn rime-compare [& comparators]
  (let [juxtcomp (apply juxt comparators)]
    (fn [a b]
      (let [a (juxtcomp a)
            b (juxtcomp b)]
        (compare a b)))))
(def c
  (fn [a b]
    ((rime-compare
      (rime-1 {:rimes '(1 2)})
      (rime-2 {:rimes '(1 2)})
      (rime-member? ["foo" "bar"]))
     b a)))

(comment
  (let [coll [{:rimes '(3 2) :normalized-word "foo"}
              {:rimes '(1 2) :normalized-word "foo"}
              {:rimes '(4 5) :normalized-word "foo"}
              {:rimes '(1 2) :normalized-word "buzz"}]]
    (sort c coll))

  (let [coll '("woman"
               "union"
               "passion"
               "infatuation"
               "emotion"
               "disposition"
               "communion"
               "attraction"
               "affection"
               "adoration"
               "admiration")
        coll (map #(prhyme/phrase->word dict/popular %) coll)
        target (prhyme/phrase->word dict/popular "devotion")
        synonyms (thesaurus/synonyms "love" "heart")
        comparisons (fn [target]
                      (fn [a b]
                        ((rime-compare
                          (rime-1 target)
                          (rime-2 target)
                          (rime-member? synonyms))
                         b a)))]
    (sort (comparisons target) coll))

  )

(defn rhymestorm [& words]
  (let [synonyms (->> (apply thesaurus/synonyms words)
                      (filter #(get dict/popular %))
                      (into #{}))
        comparisons (fn [target]
                      (fn [a b]
                        ((rime-compare
                          (rime-1 target)
                          (rime-2 target)
                          (rime-member? synonyms))
                         b a)))]
    (->> synonyms
         (map
          (fn [synonym]
            (let [word (prhyme/phrase->word dict/prhyme-dict synonym)
                  rhymes (get words-by-rime (last (:rimes word)))]
              (when rhymes
                (let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
                      rhyming-synonyms (remove #{(:normalized-word word)} (filter synonyms rhyming-words))]
                  [(:normalized-word word) rhyming-synonyms])))))
         (remove (fn [[_ rhymes]]
                   (empty? rhymes)))
         (map (fn [[target rhymes]]
                [target (->> rhymes
                             (map prhyme/phrase->word dict/popular)
                             (sort (comparisons (prhyme/phrase->word dict/popular target)))
                             (map :normalized-word))]))
         (into {}))))

(comment
  (rhymestorm "love")
  (take 3 (drop 500 dict/prhyme-dict))
  (take 3 dict/cmu-dict)
  (take 3 dict/popular)

  (let [node (get-in words-by-rime ['("AH" "V")])]
    (->> (prhyme/flatten-node node)))

  (let  [love-synonyms (thesaurus/thesaurus "love")
         heart-synonyms (thesaurus/thesaurus "heart")]
    (->> (clojure.set/intersection
          (into #{} love-synonyms)
          (into #{} heart-synonyms))
         (map string/lower-case)
         (filter #(dict/popular %))))

  (let [synonyms (thesaurus/synonyms "love" "heart")]
    synonyms)

  (def love-rhymes
    (let [synonyms (->> (thesaurus/synonyms "love" "heart")
                        (filter #(get dict/popular %))
                        (into #{}))]
      (->>
       (map
        (fn [synonym]
          (let [word (prhyme/phrase->word dict/prhyme-dict synonym)
                rhymes (get words-by-rime (last (:rimes word)))]
            (when rhymes
              (let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))
                    rhyming-synonyms (filter synonyms rhyming-words)]
                [(:normalized-word word) rhyming-synonyms]))))
        synonyms)
       (into {}))))

  (count love-rhymes)
  (get-in words-by-rime ['("AH" "V")])

  (weight-fn
   (first (filter #(= (:normalized-word %) "gotshal's") dict/prhyme-dict))
   (prhyme/phrase->word dict/prhyme-dict "bye bye")
   nil)

  (take 10 darklyrics/darklyrics-markov-2)
  (get darklyrics/darklyrics-markov-2 '("memory" "my"))
  (repeatedly
   5
   (fn []
     (let [rhymes (gen/selection-seq
                   dict/prhyme-dict
                   (comp (weighted/adjust-for-tail-rhyme 0.90)
                         #_(weighted/adjust-for-rhymes 0.50)
                         #_(weighted/adjust-for-fn :adj-rimes 0.80 pred-fn weight-fn)
                         (weighted/adjust-for-fn :adj-popular 0.95 pred-popular weight-popular)
                         (weighted/adjust-for-markov darklyrics/darklyrics-markov-2 0.99))
                   (prhyme/phrase->word dict/prhyme-dict "happy birthday taylor my love"))]
       (->> rhymes
            (take 5)
            (map :normalized-word)))))
  )

(defn remove-sentences-with-words-not-in-dictionary [dictionary]
  (let [dictionary (into #{} dictionary)]
    (fn [sentences]
      (->> sentences
           (map #(string/split % #" "))
           (remove #(some (complement dictionary) %))
           (remove #(some string/blank? %))
           (map #(string/join " " %))))))

(defn pathed-part-of-speech-word-frequencies
  "Seq of pathed part-of-speech to word frequencies of each document.

  {(TOP NP NN) {'test' 2 'sample' 4 ,,,}
   (TOP VP VBZ) {'is' 5 'runs' 2 ,,,}
   ,,,}

  To reduce, deep merge with +."
  [documents]
  (->> documents
       (map slurp)
       (map util/clean-text)
       (filter dict/english?)
       (map #(string/split % #"\n+"))
       (map (remove-sentences-with-words-not-in-dictionary dict/popular))
       (remove empty?)
       (remove #(some empty? %))
       (map nlp/treebank-zipper)
       (map nlp/leaf-pos-path-word-freqs)))

(defn grammar-tree-frequencies
  "Seq of grammar tree frequencies of each document.

  {(TOP (NP (NN)) (VP (VBZ))) 23
   (TOP (NP (DT) (NN)) (VP (VBZ))) 18
   ,,,}

  To reduce, merge with +."
  [documents]
  (->> documents
       (map slurp)
       (map util/clean-text)
       (filter dict/english?)
       (map #(string/split % #"\n+"))
       (map (remove-sentences-with-words-not-in-dictionary dict/popular))
       (remove empty?)
       (remove #(some empty? %))
       (map nlp/parse-to-simple-tree)
       (map nlp/parse-tree-sans-leaf-words)
       (map
        (fn [lines]
          (map #(hash-map % 1) lines)))
       (map (partial apply merge-with +))))

(defn weighted-selection-from-map [m]
  (first (weighted-rand/weighted-selection second (seq m))))

(defn chunked-writing-pos-path-freqs
  [documents chunk-size]
  (let [chunks (range 0 (count documents) chunk-size)]
    (run!
     (fn [chunk]
       (let [structure (->> documents
                            (drop chunk)
                            (take chunk-size)
                            pathed-part-of-speech-word-frequencies
                            (reduce
                             (fn [a v]
                               (nlp/deep-merge-with + a v))
                             {}))
             filepath (format "resources/pos-freqs/%s.nip" chunk)]
         (timbre/info (format "Writing to %s." filepath))
         (nippy/freeze-to-file filepath structure)))
     chunks)))

(defn chunked-writing-structure-freqs
  [documents chunk-size]
  (let [chunks (range 0 (count documents) chunk-size)]
    (run!
     (fn [chunk]
       (let [structure (->> documents
                            (drop chunk)
                            (take chunk-size)
                            structures
                            (reduce
                             (fn [a v]
                               (nlp/deep-merge-with + a v))
                             {}))
             filepath (format "resources/structure-freqs/%s.nip" chunk)]
         (timbre/info (format "Writing to %s." filepath))
         (nippy/freeze-to-file filepath structure)))
     chunks)))

(defn pos-paths->pos-freqs
  "Convert pos paths, like {(TOP S NP NN) {'test' 5 'car' 3 ,,,}}
  into a top-level pos freq map like {NN {'test' 25 'car' 8 ,,,}}.is"
  [pos-paths]
  (->> pos-paths
       (map
        (fn [[k v]]
          (hash-map (last k) v)))
       (reduce
        (fn [a v]
          (nlp/deep-merge-with + a v))
        {})))

(comment
  (take 5 darklyrics/darklyrics-markov-2)
  (darklyrics/darklyrics-markov-2 '("time" "is"))
  (def darkov-2 darklyrics/darklyrics-markov-2)
  ;; => ([("profanity" "unholy") {"its" 2}]
  ;;     [("ants" "triumph") {nil 1}]
  ;;     [("hiding" "our") {"of" 1, "expose" 3, "above" 1}]
  ;;     [("won't" "intervention") {"divine" 1, "an" 1}]
  ;;     [("pines" "weeping") {"the" 1}])


  ;; Merge pos paths
  (def pos-freqs-data
    (let [documents (->> "resources/pos-freqs"
                         io/file
                         file-seq
                         (remove #(.isDirectory %)))]
      (reduce
       (fn [accum document]
         (let [data (nippy/thaw-from-file document)]
           (nlp/deep-merge-with + accum data)))
       {}
       documents)))

  (nippy/freeze-to-file "resources/corpus/darklyrics/pos-word-freqs.nippy" pos-freqs-data)
  (count pos-freqs-data)
  (take 20 pos-freqs-data)

  (time
   (def pos-freqs-data-3
     (reduce
      (fn [acc [k v]]
        (let [new-map (hash-map (take-last 3 k) v)]
          (nlp/deep-merge-with + acc new-map)))
      {}
      pos-freqs-data)))
  (count pos-freqs-data-3)
  (take 2 (reverse (sort-by #(count (second %)) pos-freqs-data-3)))

  (time
   (def
     pos-freqs-data-2
     (reduce
      (fn [acc [k v]]
        (let [new-map (hash-map (take-last 2 k) v)]
          (nlp/deep-merge-with + acc new-map)))
      {}
      pos-freqs-data-3)))

  (def structure-freq-data
    (let [documents (->> "resources/structure-freqs"
                         io/file
                         file-seq
                         (remove #(.isDirectory %)))]
      (reduce
       (fn [accum document]
         (let [data (nippy/thaw-from-file document)]
           (nlp/deep-merge-with + accum data)))
       {}
       documents)))
  (def popular-structure-freq-data (into {} (take 500 (reverse (sort-by #(second %) structure-freq-data)))))
  (take 100 popular-structure-freq-data)
  (nippy/freeze-to-file "resources/corpus/darklyrics/grammar-tree-freqs.nippy" structure-freq-data)

  (def t1 (nippy/thaw-from-file "resources/structure-freqs/0.nip"))
  structures
  (take 100 (reverse (sort-by second structures)))
  (do
    (let [documents (->> "dark-corpus"
                         io/file
                         file-seq
                         (remove #(.isDirectory %))
                         (drop 5000))
          chunk-size 5000]
      (chunked-writing-pos-path-freqs
       documents
       chunk-size))
    (let [documents (->> "dark-corpus"
                         io/file
                         file-seq
                         (remove #(.isDirectory %))
                         (drop 50000))
          chunk-size 5000]
      (chunked-writing-structure-freqs
       documents
       chunk-size)))

  (def t1 (nippy/thaw-from-file "resources/pos-freqs/0.nip"))
  (take 10 t1)
  (let [path-freqs (pos-paths->pos-freqs t1)]
    (take 10 path-freqs))

  (take 5 t1)
  (take 10 (reverse (sort-by #(count (second %)) t1)))
  (def t3 (nippy/thaw-from-file "resources/pos-freqs/400.nip"))
  (def t2 (nippy/thaw-from-file "resources/pos-freqs/800.nip"))
  (count (merge-with + t1 t2 t3))
  ;; => 2353
  (count t3)
  ;; => 1013
  (count t1)
  ;; => 871
  (count t2)
  ;; => 676  (def corpus
  (->> "dark-corpus"
       io/file
       file-seq
       (remove #(.isDirectory %)))

  (time
   (def example-pos-freqs
     (->> corpus
          (take 100)
          pos-path-freqs
          (reduce
           (fn [a v]
             (nlp/deep-merge-with + a v))
           {}))))

  (time
   (def example-structures
     (->> corpus
          (take 100)
          structures
          (reduce
           (fn [a v]
             (merge-with + a v))
           {}))))

  (let [structure (weighted-selection-from-map example-structures)]
    (repeatedly
     10
     (fn []
       (->> (nlp/generate-from-structure-and-pos-freqs
             structure
             example-pos-freqs)
            nlp/leaf-nodes
            (string/join " ")))))

  ;; => ("then get your life"
  ;;     "sometimes lie my hand"
  ;;     "still become your chapter"
  ;;     "alright fade our surfing"
  ;;     "far care my band"
  ;;     "all fake my fallow"
  ;;     "here gimme our head"
  ;;     "long back my guide"
  ;;     "never stop their seed"
  ;;     "never consume our tomorrow")

  ;; => ("now scarred towards the future"
  ;;     "never gone among the side"
  ;;     "ill removed with the end"
  ;;     "well filled in the life"
  ;;     "again torn towards the world"
  ;;     "desperately matched in the love"
  ;;     "nowadays matched in the ark"
  ;;     "awhile needed through all night"
  ;;     "so torn in the darkness"
  ;;     "first erased on the land")

  ;; => ("pictures of the destiny"
  ;;     "tears on the pain"
  ;;     "lights in the disaster"
  ;;     "corpses on the fire"
  ;;     "castles on the universe"
  ;;     "efforts for the king"
  ;;     "visions of the night"
  ;;     "retreats into the darker"
  ;;     "tales into the attack"
  ;;     "pictures into the play")

  (get-in {:a 1} '())
  (let [zipper (zip/seq-zip '(TOP (S (NP) (VB))))]
    (-> zipper
        zip/down
        zip/right
        zip/node)))
Add example code, more nlp options 4 years ago			`(ns examples.core`
			`(:require [clojure.string :as string]`
			`[clojure.set]`
Add comments to nlp/g code 4 years ago			`[clojure.java.io :as io]`
Add nippy for pos and grammar freqs 4 years ago			`[taoensso.nippy :as nippy]`
			`[taoensso.timbre :as timbre]`
Add example code, more nlp options 4 years ago			`[com.owoga.prhyme.frp :as frp]`
Add comments to nlp/g code 4 years ago			`[com.owoga.prhyme.util :as util]`
Add example code, more nlp options 4 years ago			`[com.owoga.prhyme.core :as prhyme]`
			`[com.owoga.prhyme.data.bigrams :as bigrams]`
			`[com.owoga.prhyme.gen :as gen]`
Add comments to nlp/g code 4 years ago			`[com.owoga.prhyme.nlp.core :as nlp]`
			`[com.owoga.prhyme.nlg.core :as nlg]`
Add example code, more nlp options 4 years ago			`[com.owoga.prhyme.data.dictionary :as dict]`
			`[com.owoga.prhyme.data.thesaurus :as thesaurus]`
			`[com.owoga.prhyme.data.darklyrics :as darklyrics]`
Add function to parse top-k 4 years ago			`[com.owoga.prhyme.util.weighted-rand :as weighted-rand]`
Add example code, more nlp options 4 years ago			`[com.owoga.prhyme.generation.weighted-selection :as weighted]`
			`[clojure.set :as set]`
Add comments to nlp/g code 4 years ago			`[clojure.zip :as zip]`
			`[clojure.walk :as walk]))`
Add example code, more nlp options 4 years ago
			`(defn weight-fn [word target result]`
			`(let [rimes (frp/consecutive-matching word target :rimes)`
			`nuclei (frp/consecutive-matching word target :nuclei)`
			`onsets (frp/consecutive-matching word target :onsets)`
			`total (apply + (map count [rimes nuclei onsets]))]`
			`total))`

			`(defn pred-fn [word target result]`
			`(< 0 (weight-fn word target result)))`

			`(defn weight-popular [word target result]`
			`(if (dict/popular (:normalized-word word))`
			`10`
			`1))`

			`(defn pred-popular [word target result]`
			`(< 1 (weight-popular word target result)))`

			`(def words-by-rime (prhyme/words-by-rime*`
			`(filter`
			`(fn [[word & _]]`
			`(get`
			`dict/popular`
			`(string/lower-case word)))`
			`dict/cmu-dict)))`

			`(defn rime-1 [target]`
			`(let [rime (last (:rimes target))]`
			`(fn [x]`
			`(= rime (last (:rimes x))))))`

			`(defn rime-2 [target]`
			`(let [rime (last (butlast (:rimes target)))]`
			`(fn [x]`
			`(= rime (last (butlast (:rimes x)))))))`

			`(defn rime-member? [coll]`
			`(let [coll (into #{} coll)]`
			`(fn [x]`
			`(coll (:normalized-word x)))))`

			`(defn rime-compare [& comparators]`
			`(let [juxtcomp (apply juxt comparators)]`
			`(fn [a b]`
			`(let [a (juxtcomp a)`
			`b (juxtcomp b)]`
			`(compare a b)))))`
			`(def c`
			`(fn [a b]`
			`((rime-compare`
			`(rime-1 {:rimes '(1 2)})`
			`(rime-2 {:rimes '(1 2)})`
			`(rime-member? ["foo" "bar"]))`
			`b a)))`

			`(comment`
			`(let [coll [{:rimes '(3 2) :normalized-word "foo"}`
			`{:rimes '(1 2) :normalized-word "foo"}`
			`{:rimes '(4 5) :normalized-word "foo"}`
			`{:rimes '(1 2) :normalized-word "buzz"}]]`
			`(sort c coll))`

			`(let [coll '("woman"`
			`"union"`
			`"passion"`
			`"infatuation"`
			`"emotion"`
			`"disposition"`
			`"communion"`
			`"attraction"`
			`"affection"`
			`"adoration"`
			`"admiration")`
			`coll (map #(prhyme/phrase->word dict/popular %) coll)`
			`target (prhyme/phrase->word dict/popular "devotion")`
			`synonyms (thesaurus/synonyms "love" "heart")`
			`comparisons (fn [target]`
			`(fn [a b]`
			`((rime-compare`
			`(rime-1 target)`
			`(rime-2 target)`
			`(rime-member? synonyms))`
			`b a)))]`
			`(sort (comparisons target) coll))`

			`)`

			`(defn rhymestorm [& words]`
			`(let [synonyms (->> (apply thesaurus/synonyms words)`
			`(filter #(get dict/popular %))`
			`(into #{}))`
			`comparisons (fn [target]`
			`(fn [a b]`
			`((rime-compare`
			`(rime-1 target)`
			`(rime-2 target)`
			`(rime-member? synonyms))`
			`b a)))]`
			`(->> synonyms`
			`(map`
			`(fn [synonym]`
			`(let [word (prhyme/phrase->word dict/prhyme-dict synonym)`
			`rhymes (get words-by-rime (last (:rimes word)))]`
			`(when rhymes`
			`(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))`
			`rhyming-synonyms (remove #{(:normalized-word word)} (filter synonyms rhyming-words))]`
			`[(:normalized-word word) rhyming-synonyms])))))`
			`(remove (fn [[_ rhymes]]`
			`(empty? rhymes)))`
			`(map (fn [[target rhymes]]`
			`[target (->> rhymes`
			`(map prhyme/phrase->word dict/popular)`
			`(sort (comparisons (prhyme/phrase->word dict/popular target)))`
			`(map :normalized-word))]))`
			`(into {}))))`

			`(comment`
			`(rhymestorm "love")`
			`(take 3 (drop 500 dict/prhyme-dict))`
			`(take 3 dict/cmu-dict)`
			`(take 3 dict/popular)`

			`(let [node (get-in words-by-rime ['("AH" "V")])]`
			`(->> (prhyme/flatten-node node)))`

			`(let [love-synonyms (thesaurus/thesaurus "love")`
			`heart-synonyms (thesaurus/thesaurus "heart")]`
			`(->> (clojure.set/intersection`
			`(into #{} love-synonyms)`
			`(into #{} heart-synonyms))`
			`(map string/lower-case)`
			`(filter #(dict/popular %))))`

			`(let [synonyms (thesaurus/synonyms "love" "heart")]`
			`synonyms)`

			`(def love-rhymes`
			`(let [synonyms (->> (thesaurus/synonyms "love" "heart")`
			`(filter #(get dict/popular %))`
			`(into #{}))]`
			`(->>`
			`(map`
			`(fn [synonym]`
			`(let [word (prhyme/phrase->word dict/prhyme-dict synonym)`
			`rhymes (get words-by-rime (last (:rimes word)))]`
			`(when rhymes`
			`(let [rhyming-words (map string/lower-case (prhyme/flatten-node rhymes))`
			`rhyming-synonyms (filter synonyms rhyming-words)]`
			`[(:normalized-word word) rhyming-synonyms]))))`
			`synonyms)`
			`(into {}))))`

			`(count love-rhymes)`
			`(get-in words-by-rime ['("AH" "V")])`

			`(weight-fn`
			`(first (filter #(= (:normalized-word %) "gotshal's") dict/prhyme-dict))`
			`(prhyme/phrase->word dict/prhyme-dict "bye bye")`
			`nil)`

			`(take 10 darklyrics/darklyrics-markov-2)`
			`(get darklyrics/darklyrics-markov-2 '("memory" "my"))`
			`(repeatedly`
			`5`
			`(fn []`
			`(let [rhymes (gen/selection-seq`
			`dict/prhyme-dict`
			`(comp (weighted/adjust-for-tail-rhyme 0.90)`
			`#_(weighted/adjust-for-rhymes 0.50)`
			`#_(weighted/adjust-for-fn :adj-rimes 0.80 pred-fn weight-fn)`
			`(weighted/adjust-for-fn :adj-popular 0.95 pred-popular weight-popular)`
			`(weighted/adjust-for-markov darklyrics/darklyrics-markov-2 0.99))`
			`(prhyme/phrase->word dict/prhyme-dict "happy birthday taylor my love"))]`
			`(->> rhymes`
			`(take 5)`
			`(map :normalized-word)))))`
			`)`
Add comments to nlp/g code 4 years ago
			`(defn remove-sentences-with-words-not-in-dictionary [dictionary]`
			`(let [dictionary (into #{} dictionary)]`
			`(fn [sentences]`
			`(->> sentences`
			`(map #(string/split % #" "))`
			`(remove #(some (complement dictionary) %))`
			`(remove #(some string/blank? %))`
			`(map #(string/join " " %))))))`

Scratch code related to grammar nlg 4 years ago			`(defn pathed-part-of-speech-word-frequencies`
			`"Seq of pathed part-of-speech to word frequencies of each document.`

			`{(TOP NP NN) {'test' 2 'sample' 4 ,,,}`
			`(TOP VP VBZ) {'is' 5 'runs' 2 ,,,}`
			`,,,}`

Add nippy for pos and grammar freqs 4 years ago			`To reduce, deep merge with +."`
			`[documents]`
			`(->> documents`
			`(map slurp)`
			`(map util/clean-text)`
			`(filter dict/english?)`
			`(map #(string/split % #"\n+"))`
			`(map (remove-sentences-with-words-not-in-dictionary dict/popular))`
			`(remove empty?)`
			`(remove #(some empty? %))`
			`(map nlp/treebank-zipper)`
			`(map nlp/leaf-pos-path-word-freqs)))`

Scratch code related to grammar nlg 4 years ago			`(defn grammar-tree-frequencies`
			`"Seq of grammar tree frequencies of each document.`

			`{(TOP (NP (NN)) (VP (VBZ))) 23`
			`(TOP (NP (DT) (NN)) (VP (VBZ))) 18`
			`,,,}`

Add nippy for pos and grammar freqs 4 years ago			`To reduce, merge with +."`
			`[documents]`
			`(->> documents`
			`(map slurp)`
			`(map util/clean-text)`
			`(filter dict/english?)`
			`(map #(string/split % #"\n+"))`
			`(map (remove-sentences-with-words-not-in-dictionary dict/popular))`
			`(remove empty?)`
			`(remove #(some empty? %))`
			`(map nlp/parse-to-simple-tree)`
			`(map nlp/parse-tree-sans-leaf-words)`
			`(map`
			`(fn [lines]`
			`(map #(hash-map % 1) lines)))`
			`(map (partial apply merge-with +))))`
Add comments to nlp/g code 4 years ago
Add function to parse top-k 4 years ago			`(defn weighted-selection-from-map [m]`
			`(first (weighted-rand/weighted-selection second (seq m))))`

Add nippy for pos and grammar freqs 4 years ago			`(defn chunked-writing-pos-path-freqs`
			`[documents chunk-size]`
			`(let [chunks (range 0 (count documents) chunk-size)]`
			`(run!`
			`(fn [chunk]`
			`(let [structure (->> documents`
			`(drop chunk)`
			`(take chunk-size)`
Scratch code related to grammar nlg 4 years ago			`pathed-part-of-speech-word-frequencies`
Add nippy for pos and grammar freqs 4 years ago			`(reduce`
			`(fn [a v]`
			`(nlp/deep-merge-with + a v))`
			`{}))`
			`filepath (format "resources/pos-freqs/%s.nip" chunk)]`
			`(timbre/info (format "Writing to %s." filepath))`
			`(nippy/freeze-to-file filepath structure)))`
			`chunks)))`

			`(defn chunked-writing-structure-freqs`
			`[documents chunk-size]`
			`(let [chunks (range 0 (count documents) chunk-size)]`
			`(run!`
			`(fn [chunk]`
			`(let [structure (->> documents`
			`(drop chunk)`
			`(take chunk-size)`
			`structures`
			`(reduce`
			`(fn [a v]`
			`(nlp/deep-merge-with + a v))`
			`{}))`
			`filepath (format "resources/structure-freqs/%s.nip" chunk)]`
			`(timbre/info (format "Writing to %s." filepath))`
			`(nippy/freeze-to-file filepath structure)))`
			`chunks)))`

			`(defn pos-paths->pos-freqs`
			`"Convert pos paths, like {(TOP S NP NN) {'test' 5 'car' 3 ,,,}}`
			`into a top-level pos freq map like {NN {'test' 25 'car' 8 ,,,}}.is"`
			`[pos-paths]`
			`(->> pos-paths`
			`(map`
			`(fn [[k v]]`
			`(hash-map (last k) v)))`
			`(reduce`
			`(fn [a v]`
			`(nlp/deep-merge-with + a v))`
			`{})))`

Add comments to nlp/g code 4 years ago			`(comment`
Add nippy for pos and grammar freqs 4 years ago			`(take 5 darklyrics/darklyrics-markov-2)`
			`(darklyrics/darklyrics-markov-2 '("time" "is"))`
			`(def darkov-2 darklyrics/darklyrics-markov-2)`
			`;; => ([("profanity" "unholy") {"its" 2}]`
			`;; [("ants" "triumph") {nil 1}]`
			`;; [("hiding" "our") {"of" 1, "expose" 3, "above" 1}]`
			`;; [("won't" "intervention") {"divine" 1, "an" 1}]`
			`;; [("pines" "weeping") {"the" 1}])`

Scratch code related to grammar nlg 4 years ago
			`;; Merge pos paths`
			`(def pos-freqs-data`
			`(let [documents (->> "resources/pos-freqs"`
			`io/file`
			`file-seq`
			`(remove #(.isDirectory %)))]`
			`(reduce`
			`(fn [accum document]`
			`(let [data (nippy/thaw-from-file document)]`
			`(nlp/deep-merge-with + accum data)))`
			`{}`
			`documents)))`

			`(nippy/freeze-to-file "resources/corpus/darklyrics/pos-word-freqs.nippy" pos-freqs-data)`
			`(count pos-freqs-data)`
			`(take 20 pos-freqs-data)`

			`(time`
			`(def pos-freqs-data-3`
			`(reduce`
			`(fn [acc [k v]]`
			`(let [new-map (hash-map (take-last 3 k) v)]`
			`(nlp/deep-merge-with + acc new-map)))`
			`{}`
			`pos-freqs-data)))`
			`(count pos-freqs-data-3)`
			`(take 2 (reverse (sort-by #(count (second %)) pos-freqs-data-3)))`

			`(time`
			`(def`
			`pos-freqs-data-2`
			`(reduce`
			`(fn [acc [k v]]`
			`(let [new-map (hash-map (take-last 2 k) v)]`
			`(nlp/deep-merge-with + acc new-map)))`
			`{}`
			`pos-freqs-data-3)))`

			`(def structure-freq-data`
			`(let [documents (->> "resources/structure-freqs"`
			`io/file`
			`file-seq`
			`(remove #(.isDirectory %)))]`
			`(reduce`
			`(fn [accum document]`
			`(let [data (nippy/thaw-from-file document)]`
			`(nlp/deep-merge-with + accum data)))`
			`{}`
			`documents)))`
			`(def popular-structure-freq-data (into {} (take 500 (reverse (sort-by #(second %) structure-freq-data)))))`
			`(take 100 popular-structure-freq-data)`
			`(nippy/freeze-to-file "resources/corpus/darklyrics/grammar-tree-freqs.nippy" structure-freq-data)`

			`(def t1 (nippy/thaw-from-file "resources/structure-freqs/0.nip"))`
			`structures`
Add nippy for pos and grammar freqs 4 years ago			`(take 100 (reverse (sort-by second structures)))`
Scratch code related to grammar nlg 4 years ago			`(do`
			`(let [documents (->> "dark-corpus"`
			`io/file`
			`file-seq`
			`(remove #(.isDirectory %))`
			`(drop 5000))`
			`chunk-size 5000]`
			`(chunked-writing-pos-path-freqs`
			`documents`
			`chunk-size))`
			`(let [documents (->> "dark-corpus"`
			`io/file`
			`file-seq`
			`(remove #(.isDirectory %))`
			`(drop 50000))`
			`chunk-size 5000]`
			`(chunked-writing-structure-freqs`
			`documents`
			`chunk-size)))`
Add nippy for pos and grammar freqs 4 years ago
			`(def t1 (nippy/thaw-from-file "resources/pos-freqs/0.nip"))`
			`(take 10 t1)`
			`(let [path-freqs (pos-paths->pos-freqs t1)]`
			`(take 10 path-freqs))`
Add comments to nlp/g code 4 years ago
Add nippy for pos and grammar freqs 4 years ago			`(take 5 t1)`
			`(take 10 (reverse (sort-by #(count (second %)) t1)))`
			`(def t3 (nippy/thaw-from-file "resources/pos-freqs/400.nip"))`
			`(def t2 (nippy/thaw-from-file "resources/pos-freqs/800.nip"))`
			`(count (merge-with + t1 t2 t3))`
			`;; => 2353`
			`(count t3)`
			`;; => 1013`
			`(count t1)`
			`;; => 871`
			`(count t2)`
			`;; => 676 (def corpus`
			`(->> "dark-corpus"`
			`io/file`
			`file-seq`
Scratch code related to grammar nlg 4 years ago			`(remove #(.isDirectory %)))`
Add nippy for pos and grammar freqs 4 years ago
			`(time`
			`(def example-pos-freqs`
			`(->> corpus`
			`(take 100)`
			`pos-path-freqs`
			`(reduce`
			`(fn [a v]`
			`(nlp/deep-merge-with + a v))`
			`{}))))`

			`(time`
			`(def example-structures`
			`(->> corpus`
			`(take 100)`
			`structures`
			`(reduce`
			`(fn [a v]`
			`(merge-with + a v))`
			`{}))))`
Add comments to nlp/g code 4 years ago
Add function to parse top-k 4 years ago			`(let [structure (weighted-selection-from-map example-structures)]`
			`(repeatedly`
			`10`
			`(fn []`
			`(->> (nlp/generate-from-structure-and-pos-freqs`
			`structure`
			`example-pos-freqs)`
			`nlp/leaf-nodes`
			`(string/join " ")))))`
Add nippy for pos and grammar freqs 4 years ago
Add function to parse top-k 4 years ago			`;; => ("then get your life"`
			`;; "sometimes lie my hand"`
			`;; "still become your chapter"`
			`;; "alright fade our surfing"`
			`;; "far care my band"`
			`;; "all fake my fallow"`
			`;; "here gimme our head"`
			`;; "long back my guide"`
			`;; "never stop their seed"`
			`;; "never consume our tomorrow")`

			`;; => ("now scarred towards the future"`
			`;; "never gone among the side"`
			`;; "ill removed with the end"`
			`;; "well filled in the life"`
			`;; "again torn towards the world"`
			`;; "desperately matched in the love"`
			`;; "nowadays matched in the ark"`
			`;; "awhile needed through all night"`
			`;; "so torn in the darkness"`
			`;; "first erased on the land")`

			`;; => ("pictures of the destiny"`
			`;; "tears on the pain"`
			`;; "lights in the disaster"`
			`;; "corpses on the fire"`
			`;; "castles on the universe"`
			`;; "efforts for the king"`
			`;; "visions of the night"`
			`;; "retreats into the darker"`
			`;; "tales into the attack"`
			`;; "pictures into the play")`
Add comments to nlp/g code 4 years ago
			`(get-in {:a 1} '())`
			`(let [zipper (zip/seq-zip '(TOP (S (NP) (VB))))]`
			`(-> zipper`
			`zip/down`
			`zip/right`
Scratch code related to grammar nlg 4 years ago			`zip/node)))`