Add function phonify a space-separated phrase

main
Eric Ihli 3 years ago
parent 9d547a2733
commit 6164773515

@ -1,6 +1,7 @@
{:paths ["src" "resources"] {:paths ["src" "resources"]
:deps {org.clojure/clojure {:mvn/version "1.10.3"} :deps {org.clojure/clojure {:mvn/version "1.10.3"}
net.sf.sociaal/freetts {:mvn/version "1.2.2"}} net.sf.sociaal/freetts {:mvn/version "1.2.2"}
org.clojure/math.combinatorics {:mvn/version "0.1.6"}}
:aliases :aliases
{:test {:extra-paths ["test"] {:test {:extra-paths ["test"]
:extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}}} :extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}}}

@ -2,7 +2,8 @@
(:require [clojure.set] (:require [clojure.set]
[clojure.string :as string] [clojure.string :as string]
[clojure.java.io :as io] [clojure.java.io :as io]
[clojure.set :as set]) [clojure.set :as set]
[clojure.math.combinatorics :as combinatorics])
(:import (com.sun.speech.freetts.en.us CMULexicon))) (:import (com.sun.speech.freetts.en.us CMULexicon)))
#_(set! *warn-on-reflection* true) #_(set! *warn-on-reflection* true)
@ -202,12 +203,31 @@
(.getPhones cmu-lexicon word nil))]))) (.getPhones cmu-lexicon word nil))])))
(defn get-word (defn get-word
"Returns vector of all words that are in the CMU pronouncing dictionary
that have the pronunciation given `phones`.
Expects phones to have stress removed.
Not an exact inverse of `get-phones` since `get-phones` can figure out
somewhat appropriate phones for a made-up word. This function cannot
figure out the spelling of a made-up word provided the made-up word's phones.
Returns nil if no word can be found."
[phones] [phones]
(let [stressed? (some #(re-matches #".*\d" %) phones)] (let [stressed? (some #(re-matches #".*\d" %) phones)]
(if stressed? (if stressed?
(stressed-phones-to-cmu-word-map phones) (stressed-phones-to-cmu-word-map phones)
(unstressed-phones-to-cmu-word-map phones)))) (unstressed-phones-to-cmu-word-map phones))))
(defn phrase-phones
"Pronunciations of a words seperated by spaces."
[phrase]
(->> phrase
(#(string/split % #" "))
(map get-phones)
(apply combinatorics/cartesian-product)
(mapv (partial reduce into []))))
(comment (comment
(get-phones "alaska") (get-phones "alaska")
;; => [["AH0" "L" "AE1" "S" "K" "AH0"]] ;; => [["AH0" "L" "AE1" "S" "K" "AH0"]]
@ -220,4 +240,10 @@
;; => ["alaska"] ;; => ["alaska"]
(get-word ["N" "IY" "S"]) (get-word ["N" "IY" "S"])
;; => ["neice" "neece" "niece" "nice(1)" "kneece" "kniess" "neiss" "neace" "niess"] ;; => ["neice" "neece" "niece" "nice(1)" "kneece" "kniess" "neiss" "neace" "niess"]
(get-word ["F" "UW" "B" "AE" "Z"])
;; => nil
(phrase-phones "bog hog")
;; [["B" "AA1" "G" "HH" "AA1" "G"]
;; ["B" "AO1" "G" "HH" "AA1" "G"]]
) )

@ -4,7 +4,7 @@
(defn take-through (defn take-through
"(take-through even? [1 2 3 4 7 7 5 2 8 10]) "(take-through even? [1 2 3 4 7 7 5 2 8 10])
returns '((1 2 3 4) (7 7 5 2) (8) (10))" returns '((1 2) (3 4) (7 7 5 2) (8) (10))"
[pred coll] [pred coll]
(loop [coll coll (loop [coll coll
acc '()] acc '()]
@ -19,3 +19,8 @@
:else :else
(recur (rest coll) (recur (rest coll)
(cons (first coll) acc))))) (cons (first coll) acc)))))
(comment
(take-through even? [1 2 3 4 7 7 5 2 8 10])
;; => ((1 2) (3 4) (7 7 5 2) (8) (10))
)

@ -41,4 +41,7 @@
(syllabify ["P" "IH" "L" "OW"])))) (syllabify ["P" "IH" "L" "OW"]))))
(testing "steel" (testing "steel"
(is (= [["S" "T" "IY1" "L"]] (is (= [["S" "T" "IY1" "L"]]
(syllabify ["S" "T" "IY1" "L"]))))) (syllabify ["S" "T" "IY1" "L"]))))
(testing "scotch"
(is (= [["S" "K" "AA1" "CH"]]
(syllabify ["S" "K" "AA1" "CH"])))))

@ -13,4 +13,8 @@
(is (= ["hello(1)"] (is (= ["hello(1)"]
(get-word ["HH" "EH" "L" "OW"]))) (get-word ["HH" "EH" "L" "OW"])))
(is (= ["ensure(1)" "insure"] (is (= ["ensure(1)" "insure"]
(get-word ["IH" "N" "SH" "UH" "R"]))))) (get-word ["IH" "N" "SH" "UH" "R"]))))
(testing "phrase to phones"
(is (= [["B" "AA1" "G" "HH" "AA1" "G"]
["B" "AO1" "G" "HH" "AA1" "G"]]
(phrase-phones "bog hog")))))

Loading…
Cancel
Save