Generation misc

main
Eric Ihli 4 years ago
parent 2c06413a93
commit f408778c9b

@ -533,7 +533,6 @@
(transduce (transduce
(comp (comp
(map (fn [[k v]] (map (fn [[k v]]
(println (string/join " " (map (@context :database) k)))
[(string/join " " (map (@context :database) k)) [(string/join " " (map (@context :database) k))
[k v]])) [k v]]))
(map (fn [[phrase [k v]]] (map (fn [[phrase [k v]]]
@ -543,11 +542,10 @@
(fn [trie [k v]] (fn [trie [k v]]
(update trie k (fnil conj [v]) v))) (update trie k (fnil conj [v]) v)))
(trie/make-trie) (trie/make-trie)
(->> (trie/children-at-depth (@context :trie) 1 2) (->> (trie/children-at-depth (@context :trie) 0 1)))))
(drop 500050)
(take 20)))))
nil) nil)
(take 5 (@context :flex-rhyme-trie))
) )
(comment (comment
@ -755,10 +753,17 @@
(exclude-non-english-phrases-from-choices context))] (exclude-non-english-phrases-from-choices context))]
(if (empty? choices) (if (empty? choices)
(recur (if (string? target-rhyme) (recur (if (string? target-rhyme)
(rest (phrase->flex-rhyme-phones context target-rhyme)) (butlast (phrase->flex-rhyme-phones target-rhyme))
(rest target-rhyme))) (butlast target-rhyme)))
choices)))) choices))))
(comment
(->> (rhyming-n-gram-choices @context "fall")
(map (comp (@context :database) first first)))
)
(defn generate-n-syllable-sentence-rhyming-with (defn generate-n-syllable-sentence-rhyming-with
[context target-phrase n-gram-rank target-rhyme-syllable-count target-sentence-syllable-count] [context target-phrase n-gram-rank target-rhyme-syllable-count target-sentence-syllable-count]
(if (string? target-phrase) (if (string? target-phrase)
@ -824,14 +829,15 @@
(map last) (map last)
(apply distinct?)))) (apply distinct?))))
(->> (generate-haiku "technology") (->> (generate-haiku "football")
(filter valid-haiku) (filter valid-haiku)
(map (partial string/join "\n")) (map (partial string/join "\n"))
(map #(vector % (sha256 %))) (map #(vector % (sha256 %)))
(map (fn [[haiku sha]] (map (fn [[haiku sha]]
(println haiku) (println haiku)
(println sha) (println sha)
(println)))) (println)))
(take 10))
) )
@ -865,19 +871,6 @@ witness sky is blackened now
" "
(defn amul8
([sentence]
(->> (amulate (string/split sentence #" "))
(map reverse)
(map (partial string/join " "))))
([sentence n]
(loop [result [sentence]
n n]
(if (zero? n)
result
(recur (conj result (amul8 (peek result)))
(dec n))))))
(defn amulate? (defn amulate?
[text] [text]
(let [digest (sha256 text)] (let [digest (sha256 text)]

@ -3,16 +3,20 @@
[clojure.string :as string] [clojure.string :as string]
[taoensso.timbre :as timbre] [taoensso.timbre :as timbre]
[com.owoga.prhyme.util.math :as math] [com.owoga.prhyme.util.math :as math]
[com.owoga.phonetics.syllabify :as owoga.syllabify]
[com.owoga.phonetics :as owoga.phonetics]
[com.owoga.tightly-packed-trie.encoding :as encoding] [com.owoga.tightly-packed-trie.encoding :as encoding]
[examples.core :as examples] [examples.core :as examples]
[taoensso.nippy :as nippy] [taoensso.nippy :as nippy]
[com.owoga.prhyme.nlp.core :as nlp] [com.owoga.prhyme.nlp.core :as nlp]
[examples.tpt :as examples.tpt]
[clojure.java.io :as io] [clojure.java.io :as io]
[com.owoga.prhyme.data-transform :as df] [com.owoga.prhyme.data-transform :as df]
[com.owoga.trie :as trie] [com.owoga.trie :as trie]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand] [com.owoga.prhyme.util.weighted-rand :as weighted-rand]
[clojure.set :as set] [clojure.set :as set]
[com.owoga.tightly-packed-trie :as tpt])) [com.owoga.tightly-packed-trie :as tpt]
[com.owoga.prhyme.nlp.tag-sets.treebank-ii :as tb2]))
(defn update-values [m f & args] (defn update-values [m f & args]
(reduce (reduce
@ -438,8 +442,11 @@
(update trie k (fnil inc 0))) (update trie k (fnil inc 0)))
trie trie
entries))) entries)))
(trie/make-trie) #_(trie/make-trie)
(take 300 texts)))) test-trie
(->> texts
(drop 4000)
(take 4000)))))
) )
@ -520,8 +527,61 @@
(map zip/node) (map zip/node)
()) ())
(nippy/freeze-to-file "resources/grammar-trie-take-8000.bin" (seq test-trie))
(nippy/freeze-to-file "resources/grammar-database-take-8000.bin" @test-database)
) )
(defn phrase->flex-rhyme-phones
"Takes a space-seperated string of words
and returns the concatenation of the words
vowel phones.
Returns them in reversed order so they
are ready to be used in a lookup of a rhyme trie.
"
[phrase]
(->> phrase
(#(string/split % #" "))
(map (comp owoga.syllabify/syllabify first owoga.phonetics/get-phones))
(map (partial reduce into []))
(map #(filter (partial re-find #"\d") %))
(flatten)
(map #(string/replace % #"\d" ""))
(reverse)))
(defn markov-generate-grammar-with-rhyming-tail
[grammar-trie grammar-database rhyme-trie rhyme-database rhyme-target zipper]
(let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target)
rhyme-options (examples.tpt/rhyming-n-gram-choices
{:database rhyme-database
:flex-rhyme-trie rhyme-trie}
rhyme-target)
rhyme-option-words (map (comp rhyme-database first first) rhyme-options)
grammar (markov-generate-grammar
grammar-trie
grammar-database
zipper)
tail [(grammar-database (zip/node (zipper-last (zip/vector-zip grammar))))]
choices (map (comp grammar-database first)
(children grammar-trie grammar-database tail))
intersection (set/intersection
(into #{} rhyme-option-words)
(into #{} choices))]
(if (empty? intersection)
(do
(println (take 5 choices)
(take 5 rhyme-option-words))
(markov-generate-grammar-with-rhyming-tail
grammar-trie
grammar-database
rhyme-trie
rhyme-database
rhyme-target
zipper))
intersection)))
(defn markov-generate-sentence (defn markov-generate-sentence
[trie database zipper] [trie database zipper]
(cond (cond
@ -580,7 +640,10 @@
(zip/vector-zip))))) (zip/vector-zip)))))
(comment (comment
(generate test-trie @test-database (zip/vector-zip [1])) (markov-generate-sentence
test-trie
@test-database
(zip/vector-zip [1]))
(repeatedly (repeatedly
20 20
@ -626,6 +689,7 @@
(map zip/node))) (map zip/node)))
(defn choose-with-n-gram-markov (defn choose-with-n-gram-markov
"Hard-coded to work with 4-gram. That's the </s> at the end."
[zipper [zipper
grammar-trie grammar-trie
grammar-database grammar-database
@ -633,7 +697,7 @@
n-gram-database] n-gram-database]
(let [prev-pos (previous-leaf-part-of-speech zipper) (let [prev-pos (previous-leaf-part-of-speech zipper)
prev-pos' (map grammar-database prev-pos) prev-pos' (map grammar-database prev-pos)
n-gram (filter string? (rest-leafs zipper)) n-gram (filter string? (concat (rest-leafs zipper) ["</s>" "</s>" "</s>"]))
n-gram' (mapv tpt-db n-gram) n-gram' (mapv tpt-db n-gram)
part-of-speech-children (->> (children grammar-trie grammar-database (take-last 1 prev-pos')) part-of-speech-children (->> (children grammar-trie grammar-database (take-last 1 prev-pos'))
(map #(vector (grammar-database (first %)) (map #(vector (grammar-database (first %))
@ -641,15 +705,14 @@
grammar-children (->> (children grammar-trie grammar-database prev-pos') grammar-children (->> (children grammar-trie grammar-database prev-pos')
(map #(vector (grammar-database (first %)) (map #(vector (grammar-database (first %))
(second %)))) (second %))))
n-gram-children (->> n-gram' n-gram-children (loop [n-gram' (reverse (remove nil? (take 4 n-gram')))]
(take 2) (if-let [node (trie/lookup n-gram-trie n-gram')]
(reverse) (->> (trie/children node)
(trie/lookup n-gram-trie)
(trie/children)
(map #(vector (n-gram-database (.key %)) (get % [])))) (map #(vector (n-gram-database (.key %)) (get % []))))
(recur (rest n-gram'))))
combined-choices (reduce combined-choices (reduce
(fn [acc [k v]] (fn [acc [k v]]
(update acc k (fnil * v))) (update acc k (fnil + v)))
(into {} grammar-children) (into {} grammar-children)
n-gram-children) n-gram-children)
intersection (set/intersection intersection (set/intersection
@ -670,9 +733,9 @@
n-gram-children n-gram-children
combined-choices combined-choices
choice] choice]
)) (first choice)))
(defn previous-leaf-part-of-speech (defn previous-leaf-loc
[zipper] [zipper]
(->> zipper (->> zipper
(iterate zip/prev) (iterate zip/prev)
@ -680,11 +743,32 @@
(filter #(and (symbol? (zip/node %)) (filter #(and (symbol? (zip/node %))
(zip/up %) (zip/up %)
(= 1 (count (zip/node (zip/up %)))))) (= 1 (count (zip/node (zip/up %))))))
(first) (first)))
(defn previous-leaf-part-of-speech
[zipper]
(->> zipper
previous-leaf-loc
(zip/path) (zip/path)
(map first) (map first)
(filter symbol?))) (filter symbol?)))
(defn nearest-ancestor-phrase
[loc]
(->> loc
(iterate zip/prev)
(take-while (complement nil?))
(filter (comp tb2/phrases zip/node))
(first)))
(comment
(nearest-ancestor-phrase
(->> (zip/vector-zip
'[NP [NN]])
zip/down
zip/right
zip/down)))
(comment (comment
;; Working backwards from a completed grammar tree that has ;; Working backwards from a completed grammar tree that has
;; been partially filled in with words, choose the next likely word ;; been partially filled in with words, choose the next likely word
@ -742,13 +826,17 @@
n-gram' n-gram'
prev-pos prev-pos
prev-pos' prev-pos'
part-of-speech-children
grammar-children grammar-children
n-gram-children n-gram-children
combined-choices combined-choices
choice] choice]
(choose-with-n-gram-markov (choose-with-n-gram-markov
loc test-trie @test-database tpt tpt-db)] loc test-trie @test-database tpt tpt-db)]
[ [prev-pos
(take 5 grammar-children)
(take 5 n-gram-children)
(take 5 combined-choices)
choice])) choice]))
(trie/lookup test-trie [1 59 3 5 5 17]) (trie/lookup test-trie [1 59 3 5 5 17])
@ -761,6 +849,30 @@
(def tpt-db (nippy/thaw-from-file (io/resource "dark-corpus-4-gram-backwards-db.bin"))) (def tpt-db (nippy/thaw-from-file (io/resource "dark-corpus-4-gram-backwards-db.bin")))
(markov-generate-grammar test-trie @test-database (zip/vector-zip [1])) (markov-generate-grammar test-trie @test-database (zip/vector-zip [1]))
(-> (markov-generate-grammar test-trie @test-database (zip/vector-zip [1]))
(zip/vector-zip)
(zipper-last)
(visitor
zip/prev
(fn [loc]
(let [k (filter symbol? (map first (zip/path loc)))]
(if (and (symbol? (zip/node loc))
(zip/up loc)
(= 1 (count (zip/node (zip/up loc))))
(not-empty k))
(let [k' (map @test-database k)
choice (choose-with-n-gram-markov
loc
test-trie
@test-database
tpt
tpt-db)]
(zip/replace
loc
[(zip/node loc)
[choice]]))
loc)))))
(-> (markov-generate-grammar test-trie @test-database (zip/vector-zip [1])) (-> (markov-generate-grammar test-trie @test-database (zip/vector-zip [1]))
zip/vector-zip zip/vector-zip
(zipper-last) (zipper-last)
@ -773,8 +885,8 @@
(= 1 (count (zip/node (zip/up loc)))) (= 1 (count (zip/node (zip/up loc))))
(not-empty k)) (not-empty k))
(let [k' (map @test-database k) (let [k' (map @test-database k)
choice (@test-database (first (choose test-trie @test-database k')))] choice (@test-database (first (choose
(println k') test-trie @test-database k')))]
(zip/replace (zip/replace
loc loc
[(zip/node loc) [(zip/node loc)
@ -783,3 +895,156 @@
(@test-database 497) (@test-database 497)
) )
(defn markov-choose-words-for-grammar
[grammar-trie
grammar-database
n-gram-trie
n-gram-database
grammar]
(-> grammar
(visitor
zip/prev
(fn [loc]
(println (zip/node loc))
(if (and (tb2/words (zip/node loc))
(nil? (zip/right loc)))
(do
(println "inserting right" (zip/node loc))
(zip/insert-right
loc
[(choose-with-n-gram-markov
loc
grammar-trie
grammar-database
n-gram-trie
n-gram-database)]))
loc)))))
(comment
(let [grammar '[[TOP [[S [[NP [[NNS]]] [VP [[VBP]]] [NP [[NNS ["taylor"]]]]]]]]]
sentence (markov-choose-words-for-grammar
test-trie
@test-database
tpt
tpt-db
(->> (zip/vector-zip grammar)
zipper-last
previous-leaf-loc))
grammar2 `[[TOP [[S [[NP [[NNS]] [VP [[VBP]]] [NP [[NNS]]]]]]]]]
rhyme (random-sample
(markov-generate-grammar-with-rhyming-tail
grammar2
grammar-database
rhyme-trie
rhyme-database
"taylor"
))]
sentence)
(markov-generate-grammar
test-trie
@test-database
(zip/vector-zip [1]))
(map @test-database '[TOP S NP NNS VP VBP]) ;; => (1 3 5 74 7 53)
(markov-generate-grammar-with-rhyming-tail
test-trie
@test-database
rhyme-trie
rhyme-database
"taylor"
(zip/vector-zip [1]))
)
(defn markov-complete-grammar-with-rhyming-tail
[grammar-trie grammar-database rhyme-trie rhyme-database grammar rhyme-target]
(let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target)
rhyme-options (examples.tpt/rhyming-n-gram-choices
{:database rhyme-database
:flex-rhyme-trie rhyme-trie}
rhyme-target)
rhyme-option-words (map (comp rhyme-database first first) rhyme-options)
tail [(grammar-database (zip/node (zipper-last (zip/vector-zip grammar))))]
choices (map (comp grammar-database first)
(children grammar-trie grammar-database tail))
intersection (set/intersection
(into #{} rhyme-option-words)
(into #{} choices))]
(if (empty? intersection)
(markov-complete-grammar-with-rhyming-tail
grammar-trie
grammar-database
rhyme-trie
rhyme-database
rhyme-target)
intersection)))
(defn markov-generate-sentence
[trie database zipper]
(cond
(zip/end? zipper)
(zip/root zipper)
(seqable? (zip/node zipper))
(recur trie database (zip/next zipper))
(symbol? (zip/node zipper))
(recur trie database (zip/next zipper))
(symbol? (database (zip/node zipper)))
(let [sym (database (zip/node zipper))
sym-path (->> (map first (zip/path zipper))
butlast
(filter symbol?)
(#(concat % (list sym))))
path (map database sym-path)
choice (first (choose trie database path))]
(recur
trie
database
(-> zipper
(zip/replace
[sym choice])
(zip/root)
(zip/vector-zip))))
(string? (database (zip/node zipper)))
(let [terminal (database (zip/node zipper))
path (->> (map first (zip/path zipper))
butlast
(filter symbol?))]
(recur
trie
database
(-> zipper
(zip/replace
terminal)
(zip/next)
(zip/root)
(zip/vector-zip))))
:else
(recur
trie
database
(-> zipper
(zip/replace
(mapv
database
(database (zip/node zipper))))
(zip/next)
(zip/root)
(zip/vector-zip)))))
(comment
(generate-grammar-from [[NN ["taylor"]]])
(map (comp (partial map @test-database) first) (take 5 test-trie))
(@test-database 1)
)

@ -68,7 +68,7 @@
(ParserFactory/create (ParserFactory/create
(ParserModel. (ParserModel.
(io/input-stream (io/resource "models/en-parser-chunking.bin"))) (io/input-stream (io/resource "models/en-parser-chunking.bin")))
3 5
0.95)) 0.95))
(defn parse-probs [parses] (defn parse-probs [parses]
@ -83,13 +83,20 @@
(string/split results #"\n"))) (string/split results #"\n")))
(comment (comment
(- (Math/log 0.001) (Math/log 0.01))
(Math/E)
(tokenize "Eric's testing.") (tokenize "Eric's testing.")
(let [results (StringBuffer.) (let [results (StringBuffer.)
parses (ParserTool/parseLine "Eric 's testing ." custom-parser 3)] parses (ParserTool/parseLine "The dog ran fast ." custom-parser 1)]
((juxt parse-probs parse-strs) parses)) ((juxt parse-probs parse-strs) parses))
(let [results (StringBuffer.)
parses (ParserTool/parseLine "Eric 's testing ." custom-parser 1)]
(meta parses))
) )
(Math/log (Math/pow Math/E -4.1))
(defn parse-top-n [tokenized n] (defn parse-top-n [tokenized n]
(let [results (StringBuffer.) (let [results (StringBuffer.)
parses (ParserTool/parseLine tokenized custom-parser n)] parses (ParserTool/parseLine tokenized custom-parser n)]
@ -1276,6 +1283,7 @@
(map string/trim) (map string/trim)
(filter english?) (filter english?)
(remove empty?) (remove empty?)
(filter valid-sentence?)
(mapv treebank-zipper) (mapv treebank-zipper)
(remove nil?) (remove nil?)
(map parts-of-speech-trie-entries) (map parts-of-speech-trie-entries)
@ -1287,7 +1295,7 @@
(clojure.lang.MapEntry. (into (vec k) [v]) v))))) (clojure.lang.MapEntry. (into (vec k) [v]) v)))))
(comment (comment
(let [text "Hi my name. Is Eric? \n What is yours? Fooaba brosaet"] (let [text "Hi my name. Is Eric? \n What is yours? Fooaba brosaet. The run dog go for the."]
(text->grammar-trie-map-entry text) (text->grammar-trie-map-entry text)
#_(->> text #_(->> text
-split-text-into-sentences -split-text-into-sentences

Loading…
Cancel
Save