Add function to calculate perplexity

main
Eric Ihli 3 years ago
parent d89f5da994
commit b2e83a9b98

@ -1152,31 +1152,31 @@
(mle markov-tight-trie [9095 452 27040])
(count (trie/children markov-tight-trie))
)
(defn perplexity
[model database rank line]
(let [tokens (into [] data-transform/xf-tokenize [line])
token-ids (map database (first tokens))
n-grams (data-transform/n-to-m-partitions rank (inc rank) token-ids)]
[(map (partial mle model) n-grams)
n-grams]))
[rank model n-gram]
(loop [i 1
n-gram n-gram
log-prob 0]
(if (> i (count n-gram))
log-prob
(recur (min (inc i) rank)
(if (= i rank) (rest n-gram) n-gram)
(let [words (take i n-gram)
child (trie/lookup model words)
parent (trie/lookup model (butlast words))
w1-freq (second (get child [] [nil 0]))
freqs (trie-frequencies parent)
sgt (math/frequencies->simple-good-turing-probabilities freqs)]
(+ log-prob (Math/log (sgt w1-freq))))))))
(comment
(perplexity markov-tight-trie database 3 "hi there eric how are you")
(database "through") ;; 1924
database
(count database)
(get markov-tight-trie [315 1924])
(->>
(map #(second (get % []))
(trie/children (trie/lookup markov-tight-trie [315])))
frequencies
vec
(sort-by first)
(into (sorted-map)))
(perplexity 2 markov-tight-trie [1 1 7 90]);; => -14.360605720470575
(perplexity 2 markov-tight-trie [1 1 7 89]);; => -12.98036901624079
(perplexity 2 markov-tight-trie [1 1 7 174]);; => -11.84336736411312
(trie/lookup markov-tight-trie [1 1 7 90])
(trie/lookup markov-tight-trie [1 1 7 89])
(map database [1 1 7])
)

Loading…
Cancel
Save