Update markov code

main
Eric Ihli 3 years ago
parent 1c5ddf1797
commit f6c30ba948

@ -1125,13 +1125,15 @@
:else (recur (butlast lookup)))))) :else (recur (butlast lookup))))))
(defn trie-frequencies [node] (defn trie-frequencies [node]
(->> node (if (nil? node)
trie/children nil
(map #(second (get % []))) (->> node
frequencies trie/children
vec (map #(second (get % [])))
(sort-by first) frequencies
(into (sorted-map)))) vec
(sort-by first)
(into (sorted-map)))))
(comment (comment
(time (trie-frequencies (trie/lookup markov-tight-trie [107]))) (time (trie-frequencies (trie/lookup markov-tight-trie [107])))
@ -1140,7 +1142,7 @@
(defn perplexity (defn perplexity
"TODO: "TODO:
- Tests - Tests
- Katz back-off - *** Katz back-off
- Performance - Performance
" "
[rank model n-gram] [rank model n-gram]
@ -1163,13 +1165,42 @@
(perplexity 2 markov-tight-trie [1 1 7 90]);; => -14.360605720470575 (perplexity 2 markov-tight-trie [1 1 7 90]);; => -14.360605720470575
(perplexity 2 markov-tight-trie [1 1 7 89]);; => -12.98036901624079 (perplexity 2 markov-tight-trie [1 1 7 89]);; => -12.98036901624079
(perplexity 2 markov-tight-trie [1 1 7 174]);; => -11.84336736411312 (perplexity 2 markov-tight-trie [1 1 7 174]);; => -11.84336736411312
(perplexity 4 markov-tight-trie [22 22 22 22 34 34 18])
(trie/lookup markov-tight-trie [1 1 7 90]) (trie/lookup markov-tight-trie [1 1 7 90])
(trie/lookup markov-tight-trie [1 1 7 89]) (trie/lookup markov-tight-trie [1 1 7 89])
(map database [1 1 7]) (map database [1 1 7])
) )
(defn perplexity-add-one
"If you're only using perplexity to compare phrases generated using
the same model, this might be a reasonable and simple alternative
to Katz Back-Off.
Just give everything with 0 frequencies a freq of 1."
[rank model n-gram]
(loop [i 1
n-gram n-gram
log-prob 0]
(if (> i (count n-gram))
log-prob
(recur (min (inc i) rank)
(if (= i rank) (rest n-gram) n-gram)
(let [words (take i n-gram)
child (trie/lookup model words)
parent (trie/lookup model (butlast words))
w1-freq (second (get child [] [nil 1]))
freqs (trie-frequencies parent)
sgt (math/frequencies->simple-good-turing-probabilities freqs)]
(+ log-prob (Math/log (sgt w1-freq))))))))
(comment
(perplexity-add-one 2 markov-tight-trie [1 1 7 90]);; => -14.360605720470575
(perplexity-add-one 4 markov-tight-trie [22 22 22 22 34 34 18])
(trie/lookup markov-tight-trie [1 1 7 90])
(trie/lookup markov-tight-trie [1 1 7 89])
(map database [1 1 7])
)
;;;; WGU ;;;; WGU
(defn gen-rhyme-tree (defn gen-rhyme-tree

Loading…
Cancel
Save