Generate with some grammar rules
parent
dc5340be7f
commit
dd51e0fdca
@ -0,0 +1,544 @@
|
|||||||
|
(ns com.owoga.prhyme.generation.markov-example
|
||||||
|
(:require [clojure.string :as string]
|
||||||
|
[clojure.java.io :as io]
|
||||||
|
[com.owoga.prhyme.util.math :as math]
|
||||||
|
[com.owoga.phonetics :as phonetics]
|
||||||
|
[com.owoga.phonetics.syllabify :as syllabify]
|
||||||
|
[cljol.dig9 :as d]
|
||||||
|
[clojure.zip :as zip]
|
||||||
|
[com.owoga.tightly-packed-trie.bit-manip :as bm]
|
||||||
|
[com.owoga.trie :as trie]
|
||||||
|
[com.owoga.tightly-packed-trie.encoding :as encoding]
|
||||||
|
[com.owoga.tightly-packed-trie :as tpt]
|
||||||
|
[taoensso.nippy :as nippy]
|
||||||
|
[com.owoga.prhyme.nlp.tag-sets.treebank-ii :as tb2]
|
||||||
|
[com.owoga.prhyme.nlp.core :as nlp]))
|
||||||
|
|
||||||
|
(def corpus (slurp (io/resource "cask_of_amontillado.txt")))
|
||||||
|
|
||||||
|
;; For better generation of text, you'll probably want to pad the starts
|
||||||
|
;; of sentences with n-1 "start-of-sentence" tokens.
|
||||||
|
(defn prep-punctuation-for-tokenization
|
||||||
|
"Puts spaces around punctuation so that they aren't
|
||||||
|
tokenized with the words they are attached to.
|
||||||
|
|
||||||
|
Might add extraneous whitespace, but presumedly that will be ignored/removed
|
||||||
|
during tokenization."
|
||||||
|
[text]
|
||||||
|
(string/replace text #"([\.,!?])" " $1 "))
|
||||||
|
|
||||||
|
(defn remove-quotes
|
||||||
|
"...and hyphens"
|
||||||
|
[text]
|
||||||
|
(string/replace text #"[\"-]" ""))
|
||||||
|
|
||||||
|
(defn remove-formatting-characters
|
||||||
|
"Input has underscores, presumably because the text
|
||||||
|
might be rendered by something that can italicize or bold text.
|
||||||
|
We'll just ignore them for now."
|
||||||
|
[text]
|
||||||
|
(string/replace text #"[_*]" ""))
|
||||||
|
|
||||||
|
(defn tokenize [text]
|
||||||
|
(-> text
|
||||||
|
remove-formatting-characters
|
||||||
|
prep-punctuation-for-tokenization
|
||||||
|
remove-quotes
|
||||||
|
string/lower-case
|
||||||
|
(string/split #"[\n ]+")))
|
||||||
|
|
||||||
|
(defn interleave-all
|
||||||
|
"Like interleave, but instead of ending the interleave when the shortest collection
|
||||||
|
has been consumed, continues to interleave the remaining collections."
|
||||||
|
{:added "1.0"
|
||||||
|
:static true}
|
||||||
|
([] ())
|
||||||
|
([c1] (lazy-seq c1))
|
||||||
|
([c1 c2]
|
||||||
|
(lazy-seq
|
||||||
|
(let [s1 (seq c1) s2 (seq c2)]
|
||||||
|
(if (and s1 s2)
|
||||||
|
(cons (first s1) (cons (first s2)
|
||||||
|
(interleave-all (rest s1) (rest s2))))
|
||||||
|
(lazy-seq (or s1 s2))))))
|
||||||
|
([c1 c2 & colls]
|
||||||
|
(lazy-seq
|
||||||
|
(let [ss (->> (map seq (conj colls c2 c1))
|
||||||
|
(remove nil?))]
|
||||||
|
(when ss
|
||||||
|
(concat (map first ss) (apply interleave-all (map rest ss))))))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [tokens [1 2 3 4 5]
|
||||||
|
p1 (partition 1 1 tokens)
|
||||||
|
p2 (partition 2 1 tokens)
|
||||||
|
p3 (partition 3 1 tokens)]
|
||||||
|
(interleave-all p1 p2 p3)))
|
||||||
|
|
||||||
|
(defn ngramify-tokens [n m tokens]
|
||||||
|
(let [partition-colls (map #(partition % 1 tokens) (range n m))
|
||||||
|
ngrams (apply interleave-all partition-colls)]
|
||||||
|
ngrams))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(->> (tokenize corpus)
|
||||||
|
(take 5)
|
||||||
|
(ngramify-tokens 1 4))
|
||||||
|
;; => (("the")
|
||||||
|
;; ("the" "thousand")
|
||||||
|
;; ("the" "thousand" "injuries")
|
||||||
|
;; ("thousand")
|
||||||
|
;; ("thousand" "injuries")
|
||||||
|
;; ("thousand" "injuries" "of")
|
||||||
|
;; ("injuries")
|
||||||
|
;; ("injuries" "of")
|
||||||
|
;; ("injuries" "of" "fortunato")
|
||||||
|
;; ("of")
|
||||||
|
;; ("of" "fortunato")
|
||||||
|
;; ("fortunato"))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(defn add-terminal-value-to-ngram
|
||||||
|
"The Trie expects entries to be of the form '(k1 k2 k3 value).
|
||||||
|
The ngrams generated above are just '(k1 k2 k3).
|
||||||
|
This adds a value that is simply the ngram itself:
|
||||||
|
'(k1 k2 k3 '(k1 k2 k3))."
|
||||||
|
[ngram]
|
||||||
|
(concat ngram (list ngram)))
|
||||||
|
|
||||||
|
|
||||||
|
(defn trie->frequency-of-frequencies-map
|
||||||
|
"The second argument to this function specifies which rank you
|
||||||
|
want to get the map for."
|
||||||
|
[trie n]
|
||||||
|
(->> trie
|
||||||
|
(trie/children-at-depth n)
|
||||||
|
(map (comp :count second first seq))
|
||||||
|
frequencies
|
||||||
|
(into (sorted-map))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(trie->frequency-of-frequencies-map trie 1)
|
||||||
|
;; => {1 558,
|
||||||
|
;; 2 110,
|
||||||
|
;; ,,,
|
||||||
|
;; 167 1,
|
||||||
|
;; 177 1}
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
;; The frequency of a thus-far unseen species is the number of species seen once over the
|
||||||
|
;; total number of species.
|
||||||
|
;; That's commonly referred to as P0
|
||||||
|
;; There will be a different P0 for each rank of N-gram.
|
||||||
|
|
||||||
|
(defn P0 [trie n]
|
||||||
|
(let [freq-map (trie->frequency-of-frequencies-map trie n)]
|
||||||
|
(/ (freq-map 1) (apply + (vals freq-map)))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(P0 trie 1)
|
||||||
|
;; => 31/45
|
||||||
|
)
|
||||||
|
|
||||||
|
;; From here on out, we follow a similar procedure.
|
||||||
|
;; What we just did, P0, is the probability of seeing something
|
||||||
|
;; that has been previously unseen.
|
||||||
|
;; We found that by using what we know about P1 (how many times
|
||||||
|
;; things have been seen once).
|
||||||
|
;;
|
||||||
|
;; Now, we need to adjust our P1 number since we just gave some probability
|
||||||
|
;; to P0, which previously had no probability since it wasn't in our
|
||||||
|
;; frequency table.
|
||||||
|
;;
|
||||||
|
;; What's the new probability that the next thing we see is from the group of
|
||||||
|
;; n-grams that we've seen once?
|
||||||
|
;;
|
||||||
|
;; The same way P0 was based off P1, P1 will be based off P2.
|
||||||
|
;;
|
||||||
|
;; It's basically 2 * the number of times we've seen things twice divided
|
||||||
|
;; by the total number of things we've seen.
|
||||||
|
;;
|
||||||
|
;; P0 was 1 * number of 1-time things / total number of n-time things.
|
||||||
|
;; P1 is 2 * number of 2-time things / total number of n-time things.
|
||||||
|
;; P2 is 3 * number of 3-time things / total number of n-time things.
|
||||||
|
;;
|
||||||
|
;; With a slight adjustment. The frequency of frequencies needs to be smoothed
|
||||||
|
;; so there are no 0-values. When you get up to P14, P15, etc... there might be gaps
|
||||||
|
;; where you'll see P14 1-time, then won't see anything 15 or 16 times, so P15 and P16 will
|
||||||
|
;; be 0, then you'll see something 17 times twice.
|
||||||
|
;;
|
||||||
|
;; This is just noise from having limited data. The noise needs to be smoothed out.
|
||||||
|
|
||||||
|
(defn simple-good-turing-map [trie n]
|
||||||
|
(let [freq-map (trie->frequency-of-frequencies-map trie n)
|
||||||
|
xs (->> freq-map keys (map #(Math/log %)))
|
||||||
|
ys (->> freq-map vals (map #(Math/log %)))
|
||||||
|
sgt (math/sgt (keys freq-map) (vals freq-map))
|
||||||
|
sgt-map (into (sorted-map) (apply map vector sgt))]
|
||||||
|
sgt-map))
|
||||||
|
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [freq-map (trie->frequency-of-frequencies-map trie 2)
|
||||||
|
xs (->> freq-map keys (map #(Math/log %)))
|
||||||
|
ys (->> freq-map vals (map #(Math/log %)))
|
||||||
|
sgt (math/sgt (keys freq-map) (vals freq-map))
|
||||||
|
sgt-map (into (sorted-map) (apply map vector sgt))
|
||||||
|
sgt-with-counts (math/sgt-with-counts (keys freq-map)
|
||||||
|
(vals freq-map))
|
||||||
|
c1 (freq-map 1)
|
||||||
|
c1* (sgt-map 1)]
|
||||||
|
[c1 c1* sgt-with-counts])
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
;; Maximum Likelihood Estimate
|
||||||
|
;;
|
||||||
|
;; It was about dusk, one evening during the supreme madness of the
|
||||||
|
;; carnival season, that I encountered my friend. He accosted me with
|
||||||
|
;; excessive warmth, for he had been drinking much. The man wore motley.
|
||||||
|
;; He had on a tight-fitting parti-striped dress, and his head was
|
||||||
|
;; surmounted by the conical cap and bells. I was so pleased to see him,
|
||||||
|
;; that I thought I should never have done wringing his hand.
|
||||||
|
;;
|
||||||
|
;; Consider 3-grams...
|
||||||
|
;;
|
||||||
|
;; it was about
|
||||||
|
;; it was there
|
||||||
|
;;
|
||||||
|
;; Let `N` be a sample text size and `nr` be the number of
|
||||||
|
;; m-grams which occurred in the text exactly `r` times.
|
||||||
|
;;
|
||||||
|
;; So that `N` = (apply + (map #(* r nr) frequency-of-frequencies)
|
||||||
|
;; `N` = sum for all seen-counts ("number of things seen 'count' times" * 'count')
|
||||||
|
;; 10 things seen 5 times
|
||||||
|
;; 4 things seen 4 times
|
||||||
|
;; 2 things seen 1 time
|
||||||
|
;;
|
||||||
|
;; 10 things seen 5 times each makes up 50 "things"
|
||||||
|
;; 4 things seen 4 times each makes up 16 "things
|
||||||
|
;; 2 things seen once each makes up 2 "things"
|
||||||
|
;;
|
||||||
|
;; Makes for `N` = 50 + 16 + 2 things... 68 things (m-grams).
|
||||||
|
;;
|
||||||
|
;; Consider the m-gram "it was about" occurred 4 times.
|
||||||
|
;; And in total we saw 60 3-grams. Then the MLE
|
||||||
|
;; is 4 / 60.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;;;; Base MLE
|
||||||
|
;;
|
||||||
|
;; Disount of the n-gram
|
||||||
|
;; *
|
||||||
|
;; Count of n-gram
|
||||||
|
;; /
|
||||||
|
;; Count of n-1-gram
|
||||||
|
|
||||||
|
|
||||||
|
(defn maximum-likelihood-estimate [trie trie-database n-gram]
|
||||||
|
(/ (get-in trie-database [n-gram :count])
|
||||||
|
(get-in trie-database [(butlast n-gram) :count])))
|
||||||
|
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(maximum-likelihood-estimate trie trie-database '("," "the"))
|
||||||
|
|
||||||
|
(maximum-likelihood-estimate trie trie-database '(","))
|
||||||
|
|
||||||
|
(let [[rs nrs ests lgts]
|
||||||
|
(apply
|
||||||
|
math/sgt-with-counts
|
||||||
|
(apply map vector (seq (trie->frequency-of-frequencies-map trie 2))))]
|
||||||
|
[rs nrs ests lgts])
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
;;;; KATZ ;;;;
|
||||||
|
|
||||||
|
;; (defn N [trie n-gram-rank]
|
||||||
|
;; (let [r->Nr (trie->frequency-of-frequencies-map trie n-gram-rank)]
|
||||||
|
;; (apply + (map (fn [[r nr]] (* r nr)) r->Nr))))
|
||||||
|
|
||||||
|
;; (defn r* [trie n-gram-rank]
|
||||||
|
;; (let [r->Nr (trie->frequency-of-frequencies-map trie n-gram-rank)
|
||||||
|
;; _ _ _ r*s]))
|
||||||
|
|
||||||
|
|
||||||
|
;;;;
|
||||||
|
|
||||||
|
(defn zipper-leaf-path-seq
|
||||||
|
[zipper]
|
||||||
|
(->> zipper
|
||||||
|
(iterate zip/next)
|
||||||
|
(take-while (complement zip/end?))
|
||||||
|
(filter (complement zip/branch?))
|
||||||
|
(map zip/path)
|
||||||
|
(map (partial map first))
|
||||||
|
(filter (comp tb2/words last))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(def target-grammar-structure
|
||||||
|
'(TOP (S (NP (WDT)) (VP (VBD) (NP (DT) (NN))))))
|
||||||
|
|
||||||
|
(reverse (zipper-leaf-path-seq (zip/seq-zip target-grammar-structure)))
|
||||||
|
|
||||||
|
(defn decode-fn
|
||||||
|
"Decodes a variable-length encoded number from a byte-buffer.
|
||||||
|
Zero gets decoded to nil."
|
||||||
|
[byte-buffer]
|
||||||
|
(let [value (encoding/decode byte-buffer)]
|
||||||
|
(if (zero? value)
|
||||||
|
nil
|
||||||
|
value)))
|
||||||
|
|
||||||
|
(def tpt (tpt/load-tightly-packed-trie-from-file
|
||||||
|
(io/resource "dark-corpus-4-gram-backwards-tpt.bin")
|
||||||
|
decode-fn))
|
||||||
|
|
||||||
|
(def database (nippy/thaw-from-file (io/resource "dark-corpus-4-gram-backwards-db.bin")))
|
||||||
|
|
||||||
|
(def example-story
|
||||||
|
(loop [generated-text (vec (repeat 3 (get database "</s>")))
|
||||||
|
i 0]
|
||||||
|
(if (> i 20)
|
||||||
|
generated-text
|
||||||
|
(let [children (loop [i 4]
|
||||||
|
(let [node
|
||||||
|
(trie/lookup
|
||||||
|
tpt
|
||||||
|
(vec (take-last i generated-text)))
|
||||||
|
children
|
||||||
|
(and node (trie/children node))]
|
||||||
|
(cond
|
||||||
|
(nil? node) (recur (dec i))
|
||||||
|
(< i 0) (throw (Exception. "Error"))
|
||||||
|
(seq children) children
|
||||||
|
:else (recur (dec i)))))]
|
||||||
|
(recur
|
||||||
|
(conj
|
||||||
|
generated-text
|
||||||
|
(->> children
|
||||||
|
(map #(vector (.key %) (get % [])))
|
||||||
|
(remove (comp nil? second))
|
||||||
|
(#(math/weighted-selection
|
||||||
|
(fn [[_ c]] c)
|
||||||
|
%))
|
||||||
|
first))
|
||||||
|
(inc i))))))
|
||||||
|
|
||||||
|
(map database example-story)
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
(defn syllabify-phrase
|
||||||
|
[phrase]
|
||||||
|
(->> phrase
|
||||||
|
(#(string/split % #" "))
|
||||||
|
(map phonetics/get-phones)
|
||||||
|
(map first)
|
||||||
|
(map syllabify/syllabify)
|
||||||
|
(reduce into [])))
|
||||||
|
|
||||||
|
(defn markov-choice
|
||||||
|
[trie generated-text k xf-filter]
|
||||||
|
(let [node (trie/lookup trie k)
|
||||||
|
children (and node
|
||||||
|
(->> node
|
||||||
|
trie/children
|
||||||
|
(map #(vector (.key %) (get % [])))
|
||||||
|
(remove (comp nil? second))))
|
||||||
|
choices (transduce
|
||||||
|
(comp
|
||||||
|
(map (fn [child]
|
||||||
|
(vector generated-text child)))
|
||||||
|
xf-filter) conj children)]
|
||||||
|
(cond
|
||||||
|
(nil? node) (recur trie generated-text (butlast k) xf-filter)
|
||||||
|
|
||||||
|
(seq children)
|
||||||
|
(if (< (rand) (/ (apply max (map second children))
|
||||||
|
(apply + (map second children))))
|
||||||
|
(recur trie generated-text (butlast k) xf-filter)
|
||||||
|
(first
|
||||||
|
(math/weighted-selection
|
||||||
|
(fn [[_ c]] c)
|
||||||
|
choices)))
|
||||||
|
|
||||||
|
(> (count k) 0) (recur trie generated-text (butlast k) xf-filter)
|
||||||
|
|
||||||
|
:else (throw (Exception. "Error")))))
|
||||||
|
|
||||||
|
(defn syllable-count-pred
|
||||||
|
[syllable-count database]
|
||||||
|
(fn [node]
|
||||||
|
(let [syllables (syllabify-phrase (database (get node [])))]
|
||||||
|
(= syllable-count
|
||||||
|
(count syllables)))))
|
||||||
|
|
||||||
|
(defn markov-select
|
||||||
|
[n]
|
||||||
|
(fn [{:keys [trie database xf-filter tokens] :as context}]
|
||||||
|
(loop [n n]
|
||||||
|
(if (= n 0)
|
||||||
|
;; Unable to find a selection
|
||||||
|
nil
|
||||||
|
(let [key (take-last n tokens)
|
||||||
|
node (trie/lookup trie key)
|
||||||
|
children (and node (->> (trie/children node)
|
||||||
|
(remove nil?)))
|
||||||
|
choices (transduce
|
||||||
|
(comp
|
||||||
|
(map #(vector (.key %) (get % [])))
|
||||||
|
(map (fn [child] [context child]))
|
||||||
|
xf-filter)
|
||||||
|
conj
|
||||||
|
children)]
|
||||||
|
(let [freqs (map #(get % []) children)]
|
||||||
|
(cond
|
||||||
|
(or (empty? choices) (empty? freqs))
|
||||||
|
(recur (dec n))
|
||||||
|
|
||||||
|
(and
|
||||||
|
(> n 1)
|
||||||
|
(< (rand)
|
||||||
|
(/ (apply max freqs)
|
||||||
|
(apply + freqs))))
|
||||||
|
(recur
|
||||||
|
(dec n))
|
||||||
|
|
||||||
|
:else
|
||||||
|
(let [result (second (math/weighted-selection
|
||||||
|
(comp second second)
|
||||||
|
choices))]
|
||||||
|
(first result)))))))))
|
||||||
|
|
||||||
|
(defn generate-sentence
|
||||||
|
[{:keys [trie database stop? xf-filter tokens] :as context}]
|
||||||
|
(let [markov-fn (markov-select 4)]
|
||||||
|
(loop [context (assoc context :i 0)]
|
||||||
|
(let [tokens (:tokens context)]
|
||||||
|
(cond
|
||||||
|
(stop? context)
|
||||||
|
tokens
|
||||||
|
|
||||||
|
:else
|
||||||
|
(let [selection (markov-fn context)]
|
||||||
|
(if (nil? selection)
|
||||||
|
(do
|
||||||
|
(println tokens)
|
||||||
|
(throw (Exception. "No possible selection")))
|
||||||
|
(recur (update
|
||||||
|
context
|
||||||
|
:tokens
|
||||||
|
conj
|
||||||
|
selection)))))))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(trie/lookup tpt '(1 1 1))
|
||||||
|
(let [context {:tokens (vec (repeat 3 (database "</s>")))
|
||||||
|
:trie tpt
|
||||||
|
:database database
|
||||||
|
:stop? (fn [{:keys [tokens] :as context}]
|
||||||
|
(let [sentence (->> tokens
|
||||||
|
(map database)
|
||||||
|
(remove #{"</s>"})
|
||||||
|
(string/join " "))]
|
||||||
|
(<= 10 (count (syllabify-phrase sentence)))))
|
||||||
|
:xf-filter (comp
|
||||||
|
(remove
|
||||||
|
(fn [[context [k v]]]
|
||||||
|
(= k 7)))
|
||||||
|
(filter
|
||||||
|
(fn [[context [k v]]]
|
||||||
|
(let [current-sentence
|
||||||
|
(->> (:tokens context)
|
||||||
|
(map database)
|
||||||
|
(remove #{"</s>"})
|
||||||
|
(string/join " "))
|
||||||
|
current-syllable-count
|
||||||
|
(count (syllabify-phrase current-sentence))
|
||||||
|
current-word (database k)
|
||||||
|
current-word-syllable-count (count (syllabify-phrase current-word))]
|
||||||
|
(>= (- 10 current-syllable-count)
|
||||||
|
current-word-syllable-count)))))}]
|
||||||
|
(->> (generate-sentence context)
|
||||||
|
(map database)))
|
||||||
|
|
||||||
|
|
||||||
|
(database "<s>")
|
||||||
|
)
|
||||||
|
#_(defn generate-sentence
|
||||||
|
[trie database stop? filters]
|
||||||
|
(loop [generated-text (vec (repeat 3 (get database "</s>")))
|
||||||
|
i 0]
|
||||||
|
(cond
|
||||||
|
(> i 400)
|
||||||
|
nil
|
||||||
|
|
||||||
|
(stop? generated-text)
|
||||||
|
generated-text
|
||||||
|
|
||||||
|
;; reset
|
||||||
|
(or (zero? (mod i 40))
|
||||||
|
(> syllable-count target-syllable-count))
|
||||||
|
(recur (vec (repeat 3 (get database "</s>"))) 0)
|
||||||
|
|
||||||
|
:else
|
||||||
|
(let [choice (markov-choice
|
||||||
|
trie
|
||||||
|
(take-last 4 generated-text)
|
||||||
|
filters)]
|
||||||
|
(recur
|
||||||
|
(conj
|
||||||
|
generated-text
|
||||||
|
choice)
|
||||||
|
(inc i))))))
|
||||||
|
|
||||||
|
(comment
|
||||||
|
(let [disallow-sentence-start-xf
|
||||||
|
(remove (= (database (first %)) "<s>") children)
|
||||||
|
])
|
||||||
|
|
||||||
|
(map database (generate-sentence tpt database 10))
|
||||||
|
|
||||||
|
(def grammar-freqs (nippy/thaw-from-file "/tmp/grammar-freqs-top-100.bin"))
|
||||||
|
(def part-of-speech-freqs (nippy/thaw-from-file "/tmp/part-of-speech-freqs.bin"))
|
||||||
|
(take 100 part-of-speech-freqs)
|
||||||
|
|
||||||
|
(loop [generated-text (vec (repeat 3 (get database "</s>")))
|
||||||
|
i 0]
|
||||||
|
(let [current-sentence
|
||||||
|
(take-while
|
||||||
|
(complement (into #{} (map database ["<s>" "</s>"])))
|
||||||
|
(reverse generated-text))]
|
||||||
|
(if (> i 20)
|
||||||
|
generated-text
|
||||||
|
(let [children (loop [i 4]
|
||||||
|
(let [node
|
||||||
|
(trie/lookup
|
||||||
|
tpt
|
||||||
|
(vec (take-last i generated-text)))
|
||||||
|
children
|
||||||
|
(and node (trie/children node))]
|
||||||
|
(cond
|
||||||
|
(nil? node) (recur (dec i))
|
||||||
|
(< i 0) (throw (Exception. "Error"))
|
||||||
|
(seq children) children
|
||||||
|
:else (recur (dec i)))))]
|
||||||
|
(recur
|
||||||
|
(conj
|
||||||
|
generated-text
|
||||||
|
(->> children
|
||||||
|
(map #(vector (.key %) (get % [])))
|
||||||
|
(remove (comp nil? second))
|
||||||
|
(#(math/weighted-selection
|
||||||
|
(fn [[_ c]] c)
|
||||||
|
%))
|
||||||
|
first))
|
||||||
|
(inc i))))))
|
||||||
|
|
||||||
|
)
|
Loading…
Reference in New Issue