Remove unused Record, add backwards trie generator

main
Eric Ihli 3 years ago
parent efc954f4e6
commit 185bcfb6df

@ -15,26 +15,6 @@
[clojure.math.combinatorics :as combinatorics] [clojure.math.combinatorics :as combinatorics]
[com.owoga.prhyme.nlp.core :as nlp])) [com.owoga.prhyme.nlp.core :as nlp]))
(defrecord RhymeSet [phones words])
; Since we're dealing with phonetics, a word consists of the spelling as well as all possible phonetic pronunciations.
(defrecord UnpronouncedWord [word pronunciations])
(defrecord PronouncedWord [word pronunciation])
(defn all-pronunciations
[words]
(let [pronunciations (apply combinatorics/cartesian-product (map :pronunciations words))]
(map
(fn [pronunciation]
(map ->PronouncedWord (map :word words) pronunciation))
pronunciations)))
(let [input-words ["bog" "hog"]
words (map (fn [word] (->UnpronouncedWord word (phonetics/get-phones word))) input-words)
pronunciations (all-pronunciations words)]
pronunciations)
(defn clean-text [text] (defn clean-text [text]
(string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" ""))) (string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))
@ -136,6 +116,26 @@
;; [("to") [[15] 7]])] ;; [("to") [[15] 7]])]
) )
(defn line-seq->backwards-markov-trie
"For backwards markov."
[database lines n m]
(transduce
(comp
#(string/split % #"[\n+\?\.]")
(partial transduce data-transform/xf-tokenize conj)
(partial transduce data-transform/xf-filter-english conj)
(partial remove empty?)
(partial map (comp vec reverse))
;; xf-pad-tokens needs vectors to properly pad due to `into`
(partial into [] (data-transform/xf-pad-tokens (dec m) "</s>" 1 "<s>"))
(partial mapcat (partial data-transform/n-to-m-partitions n (inc m)))
(partial mapv (data-transform/make-database-processor database)))
(completing
(fn [trie lookup]
(update trie lookup (fnil #(update % 1 inc) [(peek lookup) 0]))))
(trie/make-trie)
lines))
;;;; Packing the trie into a small memory footprint ;;;; Packing the trie into a small memory footprint
@ -1206,7 +1206,7 @@
(defn perplexity-add-one (defn perplexity-add-one
"If you're only using perplexity to compare phrases generated using "If you're only using perplexity to compare phrases generated using
the same model, this might be a reasonable and simple alternative the same model, this might be a reasonable and simple alternative
to Katz Back-Off. to Katz Back-Off."
[rank model n-gram] [rank model n-gram]
(loop [i 1 (loop [i 1
n-gram n-gram n-gram n-gram

Loading…
Cancel
Save