#+TITLE: Capstone Documentation
* C
Design and develop a fully functional data product that addresses your identified business problem or organizational need. Include each of the following attributes as they are the minimum required elements for the product:
** one descriptive method and one non-descriptive (predictive or prescriptive) method
*** Descriptive Method
**** Most common sentence structures
Here is the code to generate a report on the most common sentence structures given a directory of lyrics files.
#+begin_src clojure :results value
(require '[com.owoga.corpus.markov :as markov]
'[com.owoga.prhyme.nlp.core :as nlp]
'[clojure.string :as string]
'[ :as io])
(let [lines (transduce
(map slurp)
(map #(string/split % #"\n"))
(map (partial remove empty?))
(map nlp/structure-freqs))
(eduction (markov/xf-file-seq 0 10) (file-seq (io/file "/home/eihli/src/prhyme/dark-corpus"))))]
(take 5 (sort-by (comp - second) lines)))
| (TOP (NP (NNP) (.))) | 6 |
| (TOP (S (NP (PRP)) (VP (VBP) (ADJP (JJ))) (.))) | 6 |
| (INC (NP (JJ) (NN)) nil (IN) (NP (DT)) (NP (PRP)) (VBP)) | 4 |
| (TOP (NP (NP (JJ) (NN)) nil (NP (NN) (CC) (NN)))) | 4 |
| (TOP (S (NP (JJ) (NN)) nil (VP (VBG) (ADJP (JJ))))) | 4 |
*** Prescriptive Method
**** Most likely next words
#+begin_src clojure
(require '[com.darklimericks.server.models :as models]
'[com.owoga.trie :as trie])
(let [seed ["bother" "me"]
seed-ids (map models/database seed)
lookup (reverse seed-ids)
results (trie/children (trie/lookup models/markov-trie lookup))]
(->> results
(map #(get % []))
(sort-by (comp - second))
(map #(update % 0 models/database))
(take 10)))
| don't | 36 |
| doesn't | 21 |
| to | 14 |
| won't | 9 |
| really | 5 |
| not | 4 |
| you | 4 |
| it | 3 |
| even | 3 |
| shouldn't | 3 |
** collected or available datasets
The dataset currently in use is in ~/dark-corpus~. Further dataset will need to be provided by the end-user.
** Decision support functionality
*** Choosing words for a lyric based on markov likelihood
*** Choosing words to complete a lyric based on rhyme quality
#+begin_src clojure :results value table :colnames yes
(require '[com.darklimericks.linguistics.core :as linguistics])
(let [results
"bother me"
(->> results
(fn [[rhyming-word
[rhyming-word frequency-count-of-rhyming-word rhyme-quality]))
(take 10)
(into [["rhyme" "frequency count" "rhyme quality"]])))
| rhyme | frequency count | rhyme quality |
| honoree | 2 | 7 |
| referee | 3 | 6 |
| repartee | 2 | 6 |
| nominee | 2 | 6 |
| undersea | 1 | 6 |
| oversea | 1 | 6 |
| rosemarie | 0 | 6 |
| disagree | 180 | 5 |
| poverty | 175 | 5 |
| mockery | 122 | 5 |
** Ability to support featurizing, parsing, cleaning, and wrangling datasets
The data processing code is in ~prhyme~
Each line gets tokenized using a regular expression to split the string into tokens.
#+begin_src clojure
(def re-word
"Regex for tokenizing a string into words
(including contractions and hyphenations),
commas, periods, and newlines."
Along with tokenization, the lines get stripped of whitespace and converted to lowercase. This conversion is done so that
words can be compared: "Foo" is the same as "foo".
#+begin_src clojure
(def xf-tokenize
(map string/trim)
(map (partial re-seq re-word))
(map (partial map second))
(map (partial mapv string/lower-case))))
** methods and algorithms supporting data exploration and preparation
The primary data structure and algorithms supporting exploration of the data are a Markov Trie
The Trie data structure suppors a ~lookup~ function that returns the child trie at a certain lookup key and a ~children~ function that returns all of the immediate children of a particular Trie.
#+begin_src clojure
(defprotocol ITrie
(children [self] "Immediate children of a node.")
(lookup [self ^clojure.lang.PersistentList ks] "Return node at key."))
(deftype Trie [key value ^clojure.lang.PersistentTreeMap children-]
(children [trie]
(fn [[k ^Trie child]]
(Trie. k
(.value child)
(.children- child)))
(lookup [trie k]
(loop [k k
trie trie]
;; Allows `update` to work the same as with maps... can use `fnil`.
;; (nil? trie') (throw (Exception. (format "Key not found: %s" k)))
(nil? trie) nil
(empty? k)
(Trie. (.key trie)
(.value trie)
(.children- trie))
:else (recur
(rest k)
(get (.children- trie) (first k))))))
** data visualization functionalities for data exploration and inspection
** implementation of interactive queries
Interactive query capability at [[]].
** implementation of machine-learning methods and algorithms
Functions for training both forwards and backwards
#+begin_src clojure
(defn file-seq->markov-trie
"For forwards markov."
[database files n m]
(map slurp)
(map #(string/split % #"[\n+\?\.]"))
(map (partial transduce data-transform/xf-tokenize conj))
(map (partial transduce data-transform/xf-filter-english conj))
(map (partial remove empty?))
(map (partial into [] (data-transform/xf-pad-tokens (dec m) "<s>" 1 "</s>")))
(map (partial mapcat (partial data-transform/n-to-m-partitions n (inc m))))
(mapcat (partial mapv (data-transform/make-database-processor database))))
(fn [trie lookup]
(update trie lookup (fnil #(update % 1 inc) [lookup 0]))))
(let [files (->> "dark-corpus"
(eduction (xf-file-seq 501 2)))
database (atom {:next-id 1})
trie (file-seq->markov-trie database files 1 3)]
[(take 20 trie)
(map (comp (partial map @database) first) (take 20 (drop 105 trie)))
(take 10 @database)])
;; [([(1 1 2) [[1 1 2] 1]]
;; [(1 1 3) [[1 1 3] 1]]
;; [(1 1 7) [[1 1 7] 2]]
;; [(1 1 9) [[1 1 9] 3]]
;; [(1 1 16) [[1 1 16] 4]])
;; (("<s>" "call" "me")
;; ("<s>" "call")
;; ("<s>" "right" "</s>")
;; ("<s>" "right")
;; ("<s>" "that's" "proportional")
;; ("<s>" "that's")
;; ("<s>" "don't" "</s>")
;; ("<s>" "don't")
;; ("<s>" "yourself" "in")
;; ("<s>" "yourself")
;; ("<s>" "transformation" "</s>")
;; ("<s>" "transformation")
;; ("<s>")
;; ("them" "from" "their")
;; ("them" "from")
;; ("them")
;; ("from" "their" "pain")
;; ("from" "their")
;; ("from" "your" "side")
;; ("from" "your"))
;; (["come" 92]
;; ["summer" 17]
;; ["more" 101]
;; [121 "that's"]
;; [65 "by"]
;; ["dust" 133]
;; [70 "said"]
;; ["misery" 128]
;; [62 "get"]
;; [74 "gone"])]
#+begin_src clojure
(defn train-backwards
"For building lines backwards so they can be seeded with a target rhyme."
[files n m trie-filepath database-filepath tightly-packed-trie-filepath]
(let [database (atom {:next-id 1})
trie (file-seq->backwards-markov-trie database files n m)]
(nippy/freeze-to-file trie-filepath (seq trie))
(println "Froze" trie-filepath)
(nippy/freeze-to-file database-filepath @database)
(println "Froze" database-filepath)
(save-tightly-packed-trie trie database tightly-packed-trie-filepath)
(let [loaded-trie (->> trie-filepath
(into (trie/make-trie)))
loaded-db (->> database-filepath
loaded-tightly-packed-trie (tpt/load-tightly-packed-trie-from-file
(decode-fn loaded-db))]
(println "Loaded trie:" (take 5 loaded-trie))
(println "Loaded database:" (take 5 loaded-db))
(println "Loaded tightly-packed-trie:" (take 5 loaded-tightly-packed-trie))
(println "Successfully loaded trie and database."))))
(let [files (->> "dark-corpus"
(eduction (xf-file-seq 0 250000)))
[trie database] (train-backwards
(def markov-trie (into (trie/make-trie) (nippy/thaw-from-file "/home/eihli/.models/markov-trie-4-gram-backwards.bin"))))
(def database (nippy/thaw-from-file "/home/eihli/.models/markov-database-4-gram-backwards.bin")))
(def markov-tight-trie
(decode-fn database))))
(take 20 markov-tight-trie)
** functionalities to evaluate the accuracy of the data product
** industry-appropriate security features
** tools to monitor and maintain the product
** a user-friendly, functional dashboard that includes at least three visualization types
* Documentation
D. Create each of the following forms of documentation for the product you have developed:
** Business Vision
Provide rhyming lyric suggestions optionally constrained by syllable count.
** Data Sets
See ~resources/darklyrics-markov.tpt~
** Data Analysis
See ~src/com/owoga/darklyrics/core.clj~
** Assessment
See visualization of rhyme suggestion in action.
See perplexity?
** Visualizations
See visualization of smoothing technique.
See wordcloud
** Accuracy
• assessment of the products accuracy
** Testing
• the results from the data product testing, revisions, and optimization based on the provided plans, including screenshots
** Source
• source code and executable file(s)
** Quick Start
• a quick start guide summarizing the steps necessary to install and use the product
* Notes
http-kit doesn't support https so no need to bother with keystore stuff like you would with jetty. Just proxy from haproxy.

css :css
:or {title "DarkLimericks"
css ["/assets/tachyons.css"]
js ["/assets/wgu-main.js"]}
js ["/assets/wgu/main.js"]}
:as opts} :opts}
& body]
(println (keys request))
{:class "ml2"}
"Show rhyme suggestions"))])
"Show rhyme suggestions"))
[:canvas#myChart {:width 400 :height 400}]]])
(defn show-rhyme-suggestion
[request suggestions]

"integrity": "sha1-hZgoeOIbmOHGZCXgPQF0eI9Wnug=",
"dev": true
"chart.js": {
"version": "3.4.0",
"resolved": "",
"integrity": "sha512-mJsRm2apQm5mwz2OgYqGNG4erZh/qljcRZkWSa0kLkFr3UC3e1wKRMgnIh6WdhUrNu0w/JT9PkjLyylqEqHXEQ=="
"cipher-base": {
"version": "1.0.4",
"resolved": "",

"devDependencies": {
"shadow-cljs": "2.14.5"
"dependencies": {}
"dependencies": {
"chart.js": "^3.4.0"

{:target :browser
:output-dir "/home/eihli/src/darklimericks/web/resources/public/wgu/"
:assets-path "/assets/"
:modules {:main {:init-fn}}}}}

(:require ["chart.js/auto" :as chart]))
(defn init [] (println "Hello world"))
(defn init-chart []
(let [ctx (. js/document getElementById "myChart")
data {:type "bar",
:data {:labels ["Red", "Blue", "Yellow", "Green", "Purple", "Orange"],
:datasets [{
:label "# of Votes",
:data [12, 19, 3, 5, 2, 3],
:backgroundColor [
"rgba(255, 99, 132, 0.2)",
"rgba(54, 162, 235, 0.2)",
"rgba(255, 206, 86, 0.2)",
"rgba(75, 192, 192, 0.2)",
"rgba(153, 102, 255, 0.2)",
"rgba(255, 159, 64, 0.2)"
:borderColor [
"rgba(255, 99, 132, 1)",
"rgba(54, 162, 235, 1)",
"rgba(255, 206, 86, 1)",
"rgba(75, 192, 192, 1)",
"rgba(153, 102, 255, 1)",
"rgba(255, 159, 64, 1)"
:borderWidth 1}]},
:options {:scales {:y {:beginAtZero true}}}}
chart (new chart/Chart ctx, (clj->js data))]
(.log js/console chart)))
(defn init []
(println "Hello world")
(.addEventListener js/window "DOMContentLoaded" init-chart))
