Start on perplexity calc

main
Eric Ihli 3 years ago
parent c77e101515
commit 6b8352ae30

@ -83,10 +83,10 @@
(eduction (xf-file-seq 501 2))) (eduction (xf-file-seq 501 2)))
database (atom {:next-id 1}) database (atom {:next-id 1})
trie (file-seq->markov-trie database files 1 3)] trie (file-seq->markov-trie database files 1 3)]
[(take 20 trie) [(take 5 trie)
(count trie) (map (comp (partial map @database) first) (take 20 (drop 105 trie)))
(get @database 1)
(take 10 @database)]) (take 10 @database)])
) )
(defn file-seq->backwards-markov-trie (defn file-seq->backwards-markov-trie
@ -1111,3 +1111,30 @@
(mapcat reverse))) (mapcat reverse)))
) )
;;;; Accuracy
(defn mle
[model lookup]
(let [node (trie/lookup model lookup)
[_ freq] (get node [] [nil 1])
parent (trie/lookup model (butlast lookup))
[_ parent-freq] (get parent [] [nil 1])]
(/ freq parent-freq)))
(comment
(mle markov-tight-trie [795 68 69])
)
(defn perplexity
[model database rank line]
(let [tokens (into [] data-transform/xf-tokenize [line])
token-ids (map database (first tokens))
n-grams (data-transform/n-to-m-partitions rank (inc rank) token-ids)]
[(map (partial mle model) n-grams)
n-grams]))
(comment
(perplexity markov-tight-trie database 3 "hi there eric how are you")
)

@ -935,7 +935,7 @@
(map tb/make-tree))) (map tb/make-tree)))
(let [phrase "I gave the cake to John at the store."] (let [phrase "I gave the cake to John at the store."]
(parse (tokenize phrase))) (parse [phrase]))
(let [phrase "I've got a good feeling"] (let [phrase "I've got a good feeling"]
(pos-tagger (tokenize phrase))) (pos-tagger (tokenize phrase)))

Loading…
Cancel
Save