diff --git a/src/com/owoga/corpus/markov.clj b/src/com/owoga/corpus/markov.clj index 4fb04a6..24a0bc5 100644 --- a/src/com/owoga/corpus/markov.clj +++ b/src/com/owoga/corpus/markov.clj @@ -252,6 +252,9 @@ (nippy/thaw-from-file "/home/eihli/.models/rhyme-trie-unstressed-vowels-and-trailing-consonants.bin"))) + (rhyme-trie (prhyme/phrase->unstressed-vowels-and-tailing-consonants "war on poverty")) + (get rhyme-trie ["AA" "ER" "IY"]) + ) @@ -1162,13 +1165,41 @@ (+ log-prob (Math/log (sgt w1-freq)))))))) (comment - (perplexity 2 markov-tight-trie [1 1 7 90]);; => -14.360605720470575 - (perplexity 2 markov-tight-trie [1 1 7 89]);; => -12.98036901624079 - (perplexity 2 markov-tight-trie [1 1 7 174]);; => -11.84336736411312 + (perplexity 2 markov-tight-trie [1 1 7 90]) ;; => -14.360605720470575 + (perplexity 2 markov-tight-trie [1 1 7 89]) ;; => -12.98036901624079 + (perplexity 2 markov-tight-trie [1 1 7 174]) ;; => -11.84336736411312 (perplexity 4 markov-tight-trie [22 22 22 22 34 34 18]) (trie/lookup markov-tight-trie [1 1 7 90]) (trie/lookup markov-tight-trie [1 1 7 89]) (map database [1 1 7]) + (let [likely-phrase ["a" "hole" "" ""] + less-likely-phrase ["this" "hole" "" ""] + least-likely-phrase ["that" "hole" "" ""]] + (run! + (fn [word] + (println + (format + "\"%s\" has preceeded \"hole\" \"\" \"\" a total of %s times" + word + (second (get markov-tight-trie (map database ["" "" "hole" word])))))) + ["a" "this" "that"]) + (run! + (fn [word] + (let [seed ["" "" "hole" word]] + (println + (format + "%s is the perplexity of \"%s\" \"hole\" \"\" \"\"" + (->> seed + (map database) + (perplexity 4 markov-tight-trie)) + word)))) + ["a" "this" "that"])) + + (perplexity 2 markov-tight-trie [1 1 7 90]) ;; => -14.360605720470575 + (perplexity 2 markov-tight-trie [1 1 7 89]) ;; => -12.98036901624079 + (perplexity 2 markov-tight-trie [1 1 7 174]) ;; => -11.84336736411312 + + ) @@ -1176,8 +1207,6 @@ "If you're only using perplexity to compare phrases generated using the same model, this might be a reasonable and simple alternative to Katz Back-Off. - - Just give everything with 0 frequencies a freq of 1." [rank model n-gram] (loop [i 1 n-gram n-gram