Generation misc

main
Eric Ihli 4 years ago
parent 2c06413a93
commit f408778c9b

@ -533,7 +533,6 @@
(transduce
(comp
(map (fn [[k v]]
(println (string/join " " (map (@context :database) k)))
[(string/join " " (map (@context :database) k))
[k v]]))
(map (fn [[phrase [k v]]]
@ -543,11 +542,10 @@
(fn [trie [k v]]
(update trie k (fnil conj [v]) v)))
(trie/make-trie)
(->> (trie/children-at-depth (@context :trie) 1 2)
(drop 500050)
(take 20)))))
(->> (trie/children-at-depth (@context :trie) 0 1)))))
nil)
(take 5 (@context :flex-rhyme-trie))
)
(comment
@ -755,10 +753,17 @@
(exclude-non-english-phrases-from-choices context))]
(if (empty? choices)
(recur (if (string? target-rhyme)
(rest (phrase->flex-rhyme-phones context target-rhyme))
(rest target-rhyme)))
(butlast (phrase->flex-rhyme-phones target-rhyme))
(butlast target-rhyme)))
choices))))
(comment
(->> (rhyming-n-gram-choices @context "fall")
(map (comp (@context :database) first first)))
)
(defn generate-n-syllable-sentence-rhyming-with
[context target-phrase n-gram-rank target-rhyme-syllable-count target-sentence-syllable-count]
(if (string? target-phrase)
@ -824,14 +829,15 @@
(map last)
(apply distinct?))))
(->> (generate-haiku "technology")
(->> (generate-haiku "football")
(filter valid-haiku)
(map (partial string/join "\n"))
(map #(vector % (sha256 %)))
(map (fn [[haiku sha]]
(println haiku)
(println sha)
(println))))
(println)))
(take 10))
)
@ -865,19 +871,6 @@ witness sky is blackened now
"
(defn amul8
([sentence]
(->> (amulate (string/split sentence #" "))
(map reverse)
(map (partial string/join " "))))
([sentence n]
(loop [result [sentence]
n n]
(if (zero? n)
result
(recur (conj result (amul8 (peek result)))
(dec n))))))
(defn amulate?
[text]
(let [digest (sha256 text)]

@ -3,16 +3,20 @@
[clojure.string :as string]
[taoensso.timbre :as timbre]
[com.owoga.prhyme.util.math :as math]
[com.owoga.phonetics.syllabify :as owoga.syllabify]
[com.owoga.phonetics :as owoga.phonetics]
[com.owoga.tightly-packed-trie.encoding :as encoding]
[examples.core :as examples]
[taoensso.nippy :as nippy]
[com.owoga.prhyme.nlp.core :as nlp]
[examples.tpt :as examples.tpt]
[clojure.java.io :as io]
[com.owoga.prhyme.data-transform :as df]
[com.owoga.trie :as trie]
[com.owoga.prhyme.util.weighted-rand :as weighted-rand]
[clojure.set :as set]
[com.owoga.tightly-packed-trie :as tpt]))
[com.owoga.tightly-packed-trie :as tpt]
[com.owoga.prhyme.nlp.tag-sets.treebank-ii :as tb2]))
(defn update-values [m f & args]
(reduce
@ -438,8 +442,11 @@
(update trie k (fnil inc 0)))
trie
entries)))
(trie/make-trie)
(take 300 texts))))
#_(trie/make-trie)
test-trie
(->> texts
(drop 4000)
(take 4000)))))
)
@ -520,8 +527,61 @@
(map zip/node)
())
(nippy/freeze-to-file "resources/grammar-trie-take-8000.bin" (seq test-trie))
(nippy/freeze-to-file "resources/grammar-database-take-8000.bin" @test-database)
)
(defn phrase->flex-rhyme-phones
"Takes a space-seperated string of words
and returns the concatenation of the words
vowel phones.
Returns them in reversed order so they
are ready to be used in a lookup of a rhyme trie.
"
[phrase]
(->> phrase
(#(string/split % #" "))
(map (comp owoga.syllabify/syllabify first owoga.phonetics/get-phones))
(map (partial reduce into []))
(map #(filter (partial re-find #"\d") %))
(flatten)
(map #(string/replace % #"\d" ""))
(reverse)))
(defn markov-generate-grammar-with-rhyming-tail
[grammar-trie grammar-database rhyme-trie rhyme-database rhyme-target zipper]
(let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target)
rhyme-options (examples.tpt/rhyming-n-gram-choices
{:database rhyme-database
:flex-rhyme-trie rhyme-trie}
rhyme-target)
rhyme-option-words (map (comp rhyme-database first first) rhyme-options)
grammar (markov-generate-grammar
grammar-trie
grammar-database
zipper)
tail [(grammar-database (zip/node (zipper-last (zip/vector-zip grammar))))]
choices (map (comp grammar-database first)
(children grammar-trie grammar-database tail))
intersection (set/intersection
(into #{} rhyme-option-words)
(into #{} choices))]
(if (empty? intersection)
(do
(println (take 5 choices)
(take 5 rhyme-option-words))
(markov-generate-grammar-with-rhyming-tail
grammar-trie
grammar-database
rhyme-trie
rhyme-database
rhyme-target
zipper))
intersection)))
(defn markov-generate-sentence
[trie database zipper]
(cond
@ -580,7 +640,10 @@
(zip/vector-zip)))))
(comment
(generate test-trie @test-database (zip/vector-zip [1]))
(markov-generate-sentence
test-trie
@test-database
(zip/vector-zip [1]))
(repeatedly
20
@ -626,6 +689,7 @@
(map zip/node)))
(defn choose-with-n-gram-markov
"Hard-coded to work with 4-gram. That's the </s> at the end."
[zipper
grammar-trie
grammar-database
@ -633,7 +697,7 @@
n-gram-database]
(let [prev-pos (previous-leaf-part-of-speech zipper)
prev-pos' (map grammar-database prev-pos)
n-gram (filter string? (rest-leafs zipper))
n-gram (filter string? (concat (rest-leafs zipper) ["</s>" "</s>" "</s>"]))
n-gram' (mapv tpt-db n-gram)
part-of-speech-children (->> (children grammar-trie grammar-database (take-last 1 prev-pos'))
(map #(vector (grammar-database (first %))
@ -641,15 +705,14 @@
grammar-children (->> (children grammar-trie grammar-database prev-pos')
(map #(vector (grammar-database (first %))
(second %))))
n-gram-children (->> n-gram'
(take 2)
(reverse)
(trie/lookup n-gram-trie)
(trie/children)
(map #(vector (n-gram-database (.key %)) (get % []))))
n-gram-children (loop [n-gram' (reverse (remove nil? (take 4 n-gram')))]
(if-let [node (trie/lookup n-gram-trie n-gram')]
(->> (trie/children node)
(map #(vector (n-gram-database (.key %)) (get % []))))
(recur (rest n-gram'))))
combined-choices (reduce
(fn [acc [k v]]
(update acc k (fnil * v)))
(update acc k (fnil + v)))
(into {} grammar-children)
n-gram-children)
intersection (set/intersection
@ -670,9 +733,9 @@
n-gram-children
combined-choices
choice]
))
(first choice)))
(defn previous-leaf-part-of-speech
(defn previous-leaf-loc
[zipper]
(->> zipper
(iterate zip/prev)
@ -680,11 +743,32 @@
(filter #(and (symbol? (zip/node %))
(zip/up %)
(= 1 (count (zip/node (zip/up %))))))
(first)
(first)))
(defn previous-leaf-part-of-speech
[zipper]
(->> zipper
previous-leaf-loc
(zip/path)
(map first)
(filter symbol?)))
(defn nearest-ancestor-phrase
[loc]
(->> loc
(iterate zip/prev)
(take-while (complement nil?))
(filter (comp tb2/phrases zip/node))
(first)))
(comment
(nearest-ancestor-phrase
(->> (zip/vector-zip
'[NP [NN]])
zip/down
zip/right
zip/down)))
(comment
;; Working backwards from a completed grammar tree that has
;; been partially filled in with words, choose the next likely word
@ -742,13 +826,17 @@
n-gram'
prev-pos
prev-pos'
part-of-speech-children
grammar-children
n-gram-children
combined-choices
choice]
(choose-with-n-gram-markov
loc test-trie @test-database tpt tpt-db)]
[
[prev-pos
(take 5 grammar-children)
(take 5 n-gram-children)
(take 5 combined-choices)
choice]))
(trie/lookup test-trie [1 59 3 5 5 17])
@ -761,6 +849,30 @@
(def tpt-db (nippy/thaw-from-file (io/resource "dark-corpus-4-gram-backwards-db.bin")))
(markov-generate-grammar test-trie @test-database (zip/vector-zip [1]))
(-> (markov-generate-grammar test-trie @test-database (zip/vector-zip [1]))
(zip/vector-zip)
(zipper-last)
(visitor
zip/prev
(fn [loc]
(let [k (filter symbol? (map first (zip/path loc)))]
(if (and (symbol? (zip/node loc))
(zip/up loc)
(= 1 (count (zip/node (zip/up loc))))
(not-empty k))
(let [k' (map @test-database k)
choice (choose-with-n-gram-markov
loc
test-trie
@test-database
tpt
tpt-db)]
(zip/replace
loc
[(zip/node loc)
[choice]]))
loc)))))
(-> (markov-generate-grammar test-trie @test-database (zip/vector-zip [1]))
zip/vector-zip
(zipper-last)
@ -773,8 +885,8 @@
(= 1 (count (zip/node (zip/up loc))))
(not-empty k))
(let [k' (map @test-database k)
choice (@test-database (first (choose test-trie @test-database k')))]
(println k')
choice (@test-database (first (choose
test-trie @test-database k')))]
(zip/replace
loc
[(zip/node loc)
@ -783,3 +895,156 @@
(@test-database 497)
)
(defn markov-choose-words-for-grammar
[grammar-trie
grammar-database
n-gram-trie
n-gram-database
grammar]
(-> grammar
(visitor
zip/prev
(fn [loc]
(println (zip/node loc))
(if (and (tb2/words (zip/node loc))
(nil? (zip/right loc)))
(do
(println "inserting right" (zip/node loc))
(zip/insert-right
loc
[(choose-with-n-gram-markov
loc
grammar-trie
grammar-database
n-gram-trie
n-gram-database)]))
loc)))))
(comment
(let [grammar '[[TOP [[S [[NP [[NNS]]] [VP [[VBP]]] [NP [[NNS ["taylor"]]]]]]]]]
sentence (markov-choose-words-for-grammar
test-trie
@test-database
tpt
tpt-db
(->> (zip/vector-zip grammar)
zipper-last
previous-leaf-loc))
grammar2 `[[TOP [[S [[NP [[NNS]] [VP [[VBP]]] [NP [[NNS]]]]]]]]]
rhyme (random-sample
(markov-generate-grammar-with-rhyming-tail
grammar2
grammar-database
rhyme-trie
rhyme-database
"taylor"
))]
sentence)
(markov-generate-grammar
test-trie
@test-database
(zip/vector-zip [1]))
(map @test-database '[TOP S NP NNS VP VBP]) ;; => (1 3 5 74 7 53)
(markov-generate-grammar-with-rhyming-tail
test-trie
@test-database
rhyme-trie
rhyme-database
"taylor"
(zip/vector-zip [1]))
)
(defn markov-complete-grammar-with-rhyming-tail
[grammar-trie grammar-database rhyme-trie rhyme-database grammar rhyme-target]
(let [rhyme-phones (phrase->flex-rhyme-phones rhyme-target)
rhyme-options (examples.tpt/rhyming-n-gram-choices
{:database rhyme-database
:flex-rhyme-trie rhyme-trie}
rhyme-target)
rhyme-option-words (map (comp rhyme-database first first) rhyme-options)
tail [(grammar-database (zip/node (zipper-last (zip/vector-zip grammar))))]
choices (map (comp grammar-database first)
(children grammar-trie grammar-database tail))
intersection (set/intersection
(into #{} rhyme-option-words)
(into #{} choices))]
(if (empty? intersection)
(markov-complete-grammar-with-rhyming-tail
grammar-trie
grammar-database
rhyme-trie
rhyme-database
rhyme-target)
intersection)))
(defn markov-generate-sentence
[trie database zipper]
(cond
(zip/end? zipper)
(zip/root zipper)
(seqable? (zip/node zipper))
(recur trie database (zip/next zipper))
(symbol? (zip/node zipper))
(recur trie database (zip/next zipper))
(symbol? (database (zip/node zipper)))
(let [sym (database (zip/node zipper))
sym-path (->> (map first (zip/path zipper))
butlast
(filter symbol?)
(#(concat % (list sym))))
path (map database sym-path)
choice (first (choose trie database path))]
(recur
trie
database
(-> zipper
(zip/replace
[sym choice])
(zip/root)
(zip/vector-zip))))
(string? (database (zip/node zipper)))
(let [terminal (database (zip/node zipper))
path (->> (map first (zip/path zipper))
butlast
(filter symbol?))]
(recur
trie
database
(-> zipper
(zip/replace
terminal)
(zip/next)
(zip/root)
(zip/vector-zip))))
:else
(recur
trie
database
(-> zipper
(zip/replace
(mapv
database
(database (zip/node zipper))))
(zip/next)
(zip/root)
(zip/vector-zip)))))
(comment
(generate-grammar-from [[NN ["taylor"]]])
(map (comp (partial map @test-database) first) (take 5 test-trie))
(@test-database 1)
)

@ -68,7 +68,7 @@
(ParserFactory/create
(ParserModel.
(io/input-stream (io/resource "models/en-parser-chunking.bin")))
3
5
0.95))
(defn parse-probs [parses]
@ -83,13 +83,20 @@
(string/split results #"\n")))
(comment
(- (Math/log 0.001) (Math/log 0.01))
(Math/E)
(tokenize "Eric's testing.")
(let [results (StringBuffer.)
parses (ParserTool/parseLine "Eric 's testing ." custom-parser 3)]
parses (ParserTool/parseLine "The dog ran fast ." custom-parser 1)]
((juxt parse-probs parse-strs) parses))
(let [results (StringBuffer.)
parses (ParserTool/parseLine "Eric 's testing ." custom-parser 1)]
(meta parses))
)
(Math/log (Math/pow Math/E -4.1))
(defn parse-top-n [tokenized n]
(let [results (StringBuffer.)
parses (ParserTool/parseLine tokenized custom-parser n)]
@ -1276,6 +1283,7 @@
(map string/trim)
(filter english?)
(remove empty?)
(filter valid-sentence?)
(mapv treebank-zipper)
(remove nil?)
(map parts-of-speech-trie-entries)
@ -1287,7 +1295,7 @@
(clojure.lang.MapEntry. (into (vec k) [v]) v)))))
(comment
(let [text "Hi my name. Is Eric? \n What is yours? Fooaba brosaet"]
(let [text "Hi my name. Is Eric? \n What is yours? Fooaba brosaet. The run dog go for the."]
(text->grammar-trie-map-entry text)
#_(->> text
-split-text-into-sentences

Loading…
Cancel
Save