diff --git a/dev/examples/core.clj b/dev/examples/core.clj
index 3438f80..4d3323a 100644
--- a/dev/examples/core.clj
+++ b/dev/examples/core.clj
@@ -280,7 +280,7 @@
(let [structure (->> documents
(drop chunk)
(take chunk-size)
- structures
+ grammar-tree-frequencies
(reduce
(fn [a v]
(nlp/deep-merge-with + a v))
@@ -363,13 +363,17 @@
(nlp/deep-merge-with + accum data)))
{}
documents)))
+
+ (def dark-lyrics-structure-frequencies
+ (nippy/thaw-from-file "resources/corpus/darklyrics/grammar-tree-freqs.nippy"))
+
(def popular-structure-freq-data (into {} (take 500 (reverse (sort-by #(second %) structure-freq-data)))))
+
(take 100 popular-structure-freq-data)
(nippy/freeze-to-file "resources/corpus/darklyrics/grammar-tree-freqs.nippy" structure-freq-data)
(def t1 (nippy/thaw-from-file "resources/structure-freqs/0.nip"))
- structures
- (take 100 (reverse (sort-by second structures)))
+
(do
(let [documents (->> "dark-corpus"
io/file
@@ -426,7 +430,7 @@
(def example-structures
(->> corpus
(take 100)
- structures
+ grammar-tree-frequencies
(reduce
(fn [a v]
(merge-with + a v))
diff --git a/dev/examples/tpt.clj b/dev/examples/tpt.clj
index bd382a6..6039183 100644
--- a/dev/examples/tpt.clj
+++ b/dev/examples/tpt.clj
@@ -1,6 +1,7 @@
(ns examples.tpt
(:require [clojure.string :as string]
[clojure.java.io :as io]
+ [com.owoga.prhyme.nlp.core :as nlp]
[taoensso.tufte :as tufte :refer (defnp p profiled profile)]
[com.owoga.trie :as trie]
[com.owoga.tightly-packed-trie :as tpt]
@@ -427,7 +428,10 @@
(@context :database)))
nil)
-(defn find-rhymes [trie word]
+(defn find-rhymes
+ "Takes a rhyme-trie (perfect or vowel only, for example)
+ and a word. Returns list of rhyming words."
+ [trie word]
(->> (perfect-rhymes trie (or (dict/cmu-with-stress-map word)
(util/get-phones-with-stress word)))
(map (comp first second))
@@ -442,7 +446,10 @@
freq]))
(remove #(= word (first %)))))
-(defn choose-next-word [{:keys [database trie] :as context} n-gram]
+(defn choose-next-word
+ "Given an n-gram of [[word1 freq1] [word2 freq2]] chooses
+ the next word based on markove data in trie."
+ [{:keys [database trie] :as context} n-gram]
(let [n-gram-ids (->> n-gram (map first) (map database))
node (trie/lookup trie n-gram-ids)]
(cond
@@ -467,24 +474,83 @@
:else
(choose-next-word context (butlast n-gram)))))
+(defn remove-sentence-markers [phrase]
+ (remove (fn [[word _]] (#{"" ""} word)) phrase))
+
+(defn valid-sentence? [phrase]
+ (->> phrase
+ (map first)
+ (string/join " ")
+ (#(string/replace % #"(|)" ""))
+ (nlp/valid-sentence?)))
+
+(defn valid-sentences? [phrase]
+ (let [sentences (->> (util/take-through
+ #(= (first %) "")
+ phrase)
+ (map remove-sentence-markers))]
+ sentences))
+
(defn generate-phrase [{:keys [database trie] :as context} phrase]
- (loop [phrase phrase]
- (if (< 10 (count phrase))
- phrase
- (recur (cons (choose-next-word context (take 3 phrase))
- phrase)))))
-
-(defn generate-rhyme [{:keys [perfect-rhyme-trie] :as context} phrase]
- (let [phrase1 (generate-phrase context phrase)
- rhyme (first (find-rhymes perfect-rhyme-trie (first (last phrase1))))
- phrase2 (generate-phrase context (list (last phrase1)))]
- [phrase1 phrase2]))
+ (loop [phrase' (loop [phrase phrase]
+ (if (< 5 (count phrase))
+ phrase
+ (recur (cons (choose-next-word context (take 3 phrase))
+ phrase))))]
+ (if (valid-sentence? phrase')
+ phrase'
+ (recur (loop [phrase phrase]
+ (if (< 5 (count phrase))
+ phrase
+ (recur (cons (choose-next-word context (take 3 phrase))
+ phrase))))))))
+
+(defn generate-sentence-backwards
+ "Given a phrase of [w1 w2 w3] generates a sentence
+ using a backwards markov."
+ ([{:keys [database trie] :as context} phrase]
+ (let [phrase (map (fn [w]
+ (let [id (database w)]
+ [w (second (get trie [id]))]))
+ phrase)]
+ (loop [phrase' (loop [phrase phrase]
+ (if (= "" (first (first phrase)))
+ phrase
+ (recur (cons (choose-next-word context (take 3 phrase))
+ phrase))))]
+ (if (valid-sentence? phrase')
+ phrase'
+ (recur (loop [phrase phrase]
+ (if (= "" (first (first phrase)))
+ phrase
+ (recur (cons (choose-next-word context (take 3 phrase))
+ phrase)))))))))
+ )
+
+(defn generate-rhyme
+ ([context]
+ (generate-rhyme context [""]))
+ ([{:keys [perfect-rhyme-trie] :as context} phrase]
+ (let [phrase1 (generate-sentence-backwards context phrase)
+ rhyme (second (find-rhymes perfect-rhyme-trie (first (first (take-last 2 phrase1)))))
+ phrase2 (generate-sentence-backwards context [(first rhyme) ""])]
+ [phrase1 phrase2])))
(comment
(initialize)
+ (generate-rhyme @context)
+
+ (let [{:keys [database trie rhyme-trie]} @context
+ phrase [""]
+ ids (map database phrase)]
+ (get trie ids))
+ (choose-next-word @context (take 3 [["" 509]]))
+
+ (generate-sentence-backwards @context [""])
+
+ (valid-sentences? (generate-phrase @context '(["bitter" 41])))
+
- (generate-phrase @context '(["bitter" 41]))
- (generate-rhyme @context '(["bitter" 41]))
(choose-next-word @context (take 3 [["theology" 41]]))
@@ -616,7 +682,7 @@
(if (> i 20)
generated-text
(let [children (loop [i 4]
- (let [node (p :lookup
+ (let [node (p :lookup
(trie/lookup
loaded-tightly-packed-trie
(vec (take-last i generated-text))))
diff --git a/src/com/owoga/prhyme/util/math.clj b/src/com/owoga/prhyme/util/math.clj
index dde5b41..f530552 100644
--- a/src/com/owoga/prhyme/util/math.clj
+++ b/src/com/owoga/prhyme/util/math.clj
@@ -390,4 +390,3 @@
[r (/ nr* nr)])
(map vector xs ys (vals sgt))))))
-(discount-coefficient-map )