Start on perplexity calc

main
Eric Ihli 4 years ago
parent c77e101515
commit 6b8352ae30

@ -83,10 +83,10 @@
(eduction (xf-file-seq 501 2)))
database (atom {:next-id 1})
trie (file-seq->markov-trie database files 1 3)]
[(take 20 trie)
(count trie)
(get @database 1)
[(take 5 trie)
(map (comp (partial map @database) first) (take 20 (drop 105 trie)))
(take 10 @database)])
)
(defn file-seq->backwards-markov-trie
@ -1111,3 +1111,30 @@
(mapcat reverse)))
)
;;;; Accuracy
(defn mle
[model lookup]
(let [node (trie/lookup model lookup)
[_ freq] (get node [] [nil 1])
parent (trie/lookup model (butlast lookup))
[_ parent-freq] (get parent [] [nil 1])]
(/ freq parent-freq)))
(comment
(mle markov-tight-trie [795 68 69])
)
(defn perplexity
[model database rank line]
(let [tokens (into [] data-transform/xf-tokenize [line])
token-ids (map database (first tokens))
n-grams (data-transform/n-to-m-partitions rank (inc rank) token-ids)]
[(map (partial mle model) n-grams)
n-grams]))
(comment
(perplexity markov-tight-trie database 3 "hi there eric how are you")
)

@ -935,7 +935,7 @@
(map tb/make-tree)))
(let [phrase "I gave the cake to John at the store."]
(parse (tokenize phrase)))
(parse [phrase]))
(let [phrase "I've got a good feeling"]
(pos-tagger (tokenize phrase)))

Loading…
Cancel
Save