Add flex-rhyme-trie for all pronunciations to core

main
Eric Ihli 3 years ago
parent f6b1150431
commit 69fb02db65

@ -22,6 +22,6 @@
com.taoensso/timbre {:mvn/version "4.10.0"}
com.owoga/tightly-packed-trie
{:local/root "/home/eihli/src/clj-tightly-packed-trie"}
com.owoga/phonetics {:mvn/version "0.1.1"}}
com.owoga/phonetics {:mvn/version "0.1.3"}}
:aliases {:dev {:extra-paths ["test" "examples" "dev"]
:extra-deps {}}}}

@ -247,7 +247,13 @@
)
;;;; The difference between a forwards and a backwards
;; markov is that the backwards markov has its tokens
;; reversed and has the </s> tokens padded by a number
;; equal to the markov rank (rather than the <s> padded).
(defn file-seq->markov-trie
"For forwards markov."
[database files n m]
(transduce
(comp
@ -276,41 +282,61 @@
(count trie)
(get @database 1)
(take 10 @database)])
)
(defn file-seq->backwards-markov-trie
"For backwards markov."
[database files n m]
(transduce
(comp
(map slurp)
(map #(string/split % #"[\n+\?\.]"))
(map (partial transduce data-transform/xf-tokenize conj))
(map (partial transduce data-transform/xf-filter-english conj))
(map (partial remove empty?))
(map (partial map reverse))
(map (partial into [] (data-transform/xf-pad-tokens (dec m) "</s>" 1 "<s>")))
(map (partial mapcat (partial data-transform/n-to-m-partitions n (inc m))))
(mapcat (partial mapv (data-transform/make-database-processor database))))
(completing
(fn [trie lookup]
(update trie lookup (fnil #(update % 1 inc) [lookup 0]))))
(trie/make-trie)
files))
(comment
(let [files (->> "dark-corpus"
io/file
file-seq
(eduction (xf-file-seq 501 2)))
database (atom {:next-id 1})
trie (file-seq->backwards-markov-trie database files 1 3)]
[(take 5 trie)
(->> (trie/children-at-depth trie 0 1)
(map
(fn [[k v]]
[(map @database k) v]))
(sort-by (comp - second second))
(take 5))])
;; => [([(1 1 2) [[1 1 2] 55]]
;; [(1 1) [[1 1] 55]]
;; [(1 2 3) [[1 2 3] 1]]
;; [(1 2 7) [[1 2 7] 1]]
;; [(1 2 12) [[1 2 12] 1]])
;; ([("</s>") [[1] 110]]
;; [("<s>") [[2] 55]]
;; [(",") [[19] 14]]
;; [("you") [[63] 11]]
;; [("to") [[15] 7]])]
)
(defn initialize
"Takes an atom as a context. Swaps in :database, :trie, :rhyme-trie"
[context]
(swap!
context
assoc
:database
(with-open [rdr (io/reader "resources/backwards-database.bin")]
(into {} (map read-string (line-seq rdr)))))
(swap!
context
assoc
:trie
(tpt/load-tightly-packed-trie-from-file
"resources/dark-corpus-backwards-tpt.bin"
(decode-fn (@context :database))))
(swap!
context
assoc
:perfect-rhyme-trie
(transduce
(comp
(map first)
(filter string?)
(map #(vector % (reverse (phonetics/get-phones %))))
(map reverse))
(completing
(fn [trie [k v]]
(update trie k (fnil #(update % 1 inc) [v 0]))))
(trie/make-trie)
(@context :database)))
(swap!
context

@ -1,7 +1,8 @@
(ns com.owoga.prhyme.core
(:require [clojure.zip :as zip]
[clojure.string :as string]
[com.owoga.prhyme.data.dictionary :as dict]
[clojure.math.combinatorics :as combinatorics]
[com.owoga.trie :as trie]
[com.owoga.prhyme.util :as util]
[com.owoga.phonetics :as phonetics]
[com.owoga.phonetics.syllabify :as syllabify]
@ -131,6 +132,112 @@
word))))
(merge-phrase-words phrase))))
(defn phrase->perfect-rhyme-trie
[words]
(transduce
(comp
(map #(vector (map reverse (phonetics/get-phones %)) %)))
(completing
(fn [trie [lookups v]]
(reduce
(fn [trie lookup]
(update trie lookup (fnil #(update % 1 inc) [v 0])))
trie
lookups)))
(trie/make-trie)
words))
(comment
(let [trie (words->perfect-rhyme-trie ["dog" "hog" "bog" "hop"])]
trie)
;; => {("G" "AA1" "B") ["bog" 1],
;; ("G" "AA1" "HH") ["hog" 1],
;; ,,,
;; ("P" "AA1" "HH") ["hop" 1],
;; ("P" "AA1") nil,
;; ("P") nil}
)
(defn phrase->all-flex-rhyme-phones
"Takes a space-seperated string of words
and returns the concatenation of the words
vowel phones.
Returns them in reversed order so they
are ready to be used in a lookup of a rhyme trie.
Returns all possible pronunciations. For hog -> haog, haag."
[phrase]
(->> phrase
(#(string/split % #" "))
(map (fn [word]
(let [phones (phonetics/get-phones word)]
(map #(vector % word) phones))))
;; Lots of nesting here.
;; We have phrase -> word pronunciations -> word pronunciation -> [phones word]
;; The rest will be easier if we get rid of a level of nesting
;; by mapcatting the cross product of pronunciations.
(apply combinatorics/cartesian-product)
;; Now we have [phrases [pronunciations [[phones] word]]]
(map (fn [pronunciations]
(map (fn [[phones word]]
[(syllabify/syllabify phones) word])
pronunciations)))
(map (fn [pronunciations]
(map (fn [[syllables word]]
[(map (fn [phones]
(->> phones
(filter
(partial re-find #"\d"))
(into [])))
syllables)
word])
pronunciations)))
(map (fn [pronunciations]
(reduce
(fn [[syllable-vowel-sounds words] [syllables word]]
[(into
syllable-vowel-sounds
(map #(string/replace % #"[02-9]" "")
(reduce into [] syllables)))
(into words [word])])
[[] []]
pronunciations)))
(map (fn [[phones words]]
[phones (string/join " " words)]))))
(comment
(phrase->all-flex-rhyme-phones "bog hopscotch")
;; => ([["AA1" "AA1" "AA"] "bog hopscotch"]
;; [["AO1" "AA1" "AA"] "bog hopscotch"])
(let [result (map phrase->all-flex-rhyme-phones ["dog" "hog" "hop" "bog hopscotch"])]
result)
(let [phrase '(([["B" "AA1" "G"] "bog"] [["B" "AO1" "G"] "bog"])
([["S" "K" "AA1" "CH"] "scotch"]))]
(apply combinatorics/cartesian-product phrase))
(phonetics/get-phones "bog")
)
#_(defn phrase->flex-rhyme-trie
[phrase]
(transduce
(comp
(map #(vector (map reverse (phonetics/get-phones %)) %)))
(completing
(fn [trie [lookups v]]
(reduce
(fn [trie lookup]
(update trie lookup (fnil #(update % 1 inc) [v 0])))
trie
lookups)))
(trie/make-trie)
words))
(defn phrase->flex-rhyme-phones
"Takes a space-seperated string of words
and returns the concatenation of the words
@ -151,6 +258,7 @@
(comment
(phrase->flex-rhyme-phones "bother me");; => ("IY" "ER" "AA")
(phrase->flex-rhyme-phones "hog")
)
(defn words-by-rime* [words]

Loading…
Cancel
Save