From 186583ffb99b3511daff5e92bfa97c0b7fde6159 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Thu, 22 Oct 2020 09:21:02 -0700 Subject: [PATCH] Add freetts grapheme to phoneme --- deps.edn | 1 + src/com/owoga/prhyme/core.clj | 2 ++ src/com/owoga/prhyme/gen.clj | 2 -- src/com/owoga/prhyme/util.clj | 64 +++++++++++++++++++++++++++++------ 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/deps.edn b/deps.edn index 73b6430..69cb76b 100644 --- a/deps.edn +++ b/deps.edn @@ -6,6 +6,7 @@ inflections {:mvn/version "0.13.2"} com.taoensso/tufte {:mvn/version "2.2.0"} clojure-opennlp {:mvn/version "0.5.0"} + net.sf.sociaal/freetts {:mvn/version "1.2.2"} enlive {:mvn/version "1.1.6"} com.taoensso/timbre {:mvn/version "4.10.0"}} :aliases {:dev {:extra-paths ["test"] diff --git a/src/com/owoga/prhyme/core.clj b/src/com/owoga/prhyme/core.clj index 12f72fa..928a0a0 100644 --- a/src/com/owoga/prhyme/core.clj +++ b/src/com/owoga/prhyme/core.clj @@ -165,6 +165,8 @@ [data rime] (map (partial rhyming-word data) rime)) +(defn all-rhymes [syllables] + ) (defn prhyme [phones] (let [syllables (s/syllabify phones) rhymes (remove #(some nil? %) diff --git a/src/com/owoga/prhyme/gen.clj b/src/com/owoga/prhyme/gen.clj index 59b7abf..1d23bcb 100644 --- a/src/com/owoga/prhyme/gen.clj +++ b/src/com/owoga/prhyme/gen.clj @@ -163,8 +163,6 @@ (string/join " " (map #(:norm-word %) (first r))))) poem-lines))) - - (comment (take 3 frp/words) (phrase->word frp/popular "well-off") diff --git a/src/com/owoga/prhyme/util.clj b/src/com/owoga/prhyme/util.clj index 046da12..e53a7a9 100644 --- a/src/com/owoga/prhyme/util.clj +++ b/src/com/owoga/prhyme/util.clj @@ -2,10 +2,60 @@ (:require [clojure.java.io :as io] [clojure.string :as string] [clojure.set :as set] - [clojure.zip :as z])) + [clojure.zip :as z]) + (:import (com.sun.speech.freetts.lexicon LetterToSoundImpl) + (com.sun.speech.freetts.en.us CMULexicon) + (java.io File))) + +(defn prepare-word + "Splits whitespace-separated fields into a sequence." + [line] + (string/split line #"[\t ]")) + +(def dictionary + (line-seq (io/reader (io/resource "cmudict_SPHINX_40")))) + +(def words (map prepare-word dictionary)) + +(def words-map + (into {} (map #(vector (string/lower-case (first %)) {:phonemes (rest %)}) words))) + +(def popular + (set (line-seq (io/reader (io/resource "popular.txt"))))) + +(def adverbs + (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt")))))) + +(def adjectives + (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt")))))) + +(def verbs + (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt")))))) + +(def nouns + (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt")))))) + + +(CMULexicon. "cmulex" true) + +(def cmu-lexicon (CMULexicon/getInstance true)) + +(defn remove-stress [phoneme] + (string/replace phoneme #"\d" "")) + +(defn convert-to-sphinx [phoneme] + (if (= phoneme "ax") + "ah" + phoneme)) + +(defn get-phones [dictionary phrase] + (if (dictionary phrase) + (:phonemes (dictionary phrase)) + (->> (map str (.getPhones cmu-lexicon phrase nil)) + (map remove-stress) + (map convert-to-sphinx) + (map string/upper-case)))) -;; {"AY" "vowel -;; "B" " (def phonemap (->> (io/reader (io/resource "cmudict-0.7b.phones")) (line-seq) @@ -24,14 +74,6 @@ (def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"}) -(def dictionary - (line-seq (io/reader (io/resource "cmudict_SPHINX_40")))) - -(defn prepare-word - "Splits whitespace-separated fields into a sequence." - [line] - (string/split line #"[\t ]")) - (defn take-through "(take-through even? [1 2 3 4 7 7 5 2 8 10]) returns '((1 2 3 4) (7 7 5 2) (8) (10))"