From 186583ffb99b3511daff5e92bfa97c0b7fde6159 Mon Sep 17 00:00:00 2001
From: Eric Ihli <ericihli@gmail.com>
Date: Thu, 22 Oct 2020 09:21:02 -0700
Subject: [PATCH] Add freetts grapheme to phoneme

---
 deps.edn                      |  1 +
 src/com/owoga/prhyme/core.clj |  2 ++
 src/com/owoga/prhyme/gen.clj  |  2 --
 src/com/owoga/prhyme/util.clj | 64 +++++++++++++++++++++++++++++------
 4 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/deps.edn b/deps.edn
index 73b6430..69cb76b 100644
--- a/deps.edn
+++ b/deps.edn
@@ -6,6 +6,7 @@
         inflections {:mvn/version "0.13.2"}
         com.taoensso/tufte {:mvn/version "2.2.0"}
         clojure-opennlp {:mvn/version "0.5.0"}
+        net.sf.sociaal/freetts {:mvn/version "1.2.2"}
         enlive {:mvn/version "1.1.6"}
         com.taoensso/timbre {:mvn/version "4.10.0"}}
  :aliases {:dev {:extra-paths ["test"]
diff --git a/src/com/owoga/prhyme/core.clj b/src/com/owoga/prhyme/core.clj
index 12f72fa..928a0a0 100644
--- a/src/com/owoga/prhyme/core.clj
+++ b/src/com/owoga/prhyme/core.clj
@@ -165,6 +165,8 @@
   [data rime]
   (map (partial rhyming-word data) rime))
 
+(defn all-rhymes [syllables]
+  )
 (defn prhyme [phones]
   (let [syllables (s/syllabify phones)
         rhymes (remove #(some nil? %)
diff --git a/src/com/owoga/prhyme/gen.clj b/src/com/owoga/prhyme/gen.clj
index 59b7abf..1d23bcb 100644
--- a/src/com/owoga/prhyme/gen.clj
+++ b/src/com/owoga/prhyme/gen.clj
@@ -163,8 +163,6 @@
              (string/join " " (map #(:norm-word %) (first r)))))
          poem-lines)))
 
-
-
 (comment
   (take 3 frp/words)
   (phrase->word frp/popular "well-off")
diff --git a/src/com/owoga/prhyme/util.clj b/src/com/owoga/prhyme/util.clj
index 046da12..e53a7a9 100644
--- a/src/com/owoga/prhyme/util.clj
+++ b/src/com/owoga/prhyme/util.clj
@@ -2,10 +2,60 @@
   (:require [clojure.java.io :as io]
             [clojure.string :as string]
             [clojure.set :as set]
-            [clojure.zip :as z]))
+            [clojure.zip :as z])
+  (:import (com.sun.speech.freetts.lexicon LetterToSoundImpl)
+           (com.sun.speech.freetts.en.us CMULexicon)
+           (java.io File)))
+
+(defn prepare-word
+  "Splits whitespace-separated fields into a sequence."
+  [line]
+  (string/split line #"[\t ]"))
+
+(def dictionary
+  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
+
+(def words (map prepare-word dictionary))
+
+(def words-map
+  (into {} (map #(vector (string/lower-case (first %)) {:phonemes (rest %)}) words)))
+
+(def popular
+  (set (line-seq (io/reader (io/resource "popular.txt")))))
+
+(def adverbs
+  (set/intersection popular (set (line-seq (io/reader (io/resource "adverbs.txt"))))))
+
+(def adjectives
+  (set/intersection popular (set (line-seq (io/reader (io/resource "adjectives.txt"))))))
+
+(def verbs
+  (set/intersection popular (set (line-seq (io/reader (io/resource "verbs.txt"))))))
+
+(def nouns
+  (set/intersection popular (set (line-seq (io/reader (io/resource "nouns.txt"))))))
+
+
+(CMULexicon. "cmulex" true)
+
+(def cmu-lexicon (CMULexicon/getInstance true))
+
+(defn remove-stress [phoneme]
+  (string/replace phoneme #"\d" ""))
+
+(defn convert-to-sphinx [phoneme]
+  (if (= phoneme "ax")
+    "ah"
+    phoneme))
+
+(defn get-phones [dictionary phrase]
+  (if (dictionary phrase)
+    (:phonemes (dictionary phrase))
+    (->> (map str (.getPhones cmu-lexicon phrase nil))
+         (map remove-stress)
+         (map convert-to-sphinx)
+         (map string/upper-case))))
 
-;; {"AY" "vowel
-;;  "B"  "
 (def phonemap
   (->> (io/reader (io/resource "cmudict-0.7b.phones"))
        (line-seq)
@@ -24,14 +74,6 @@
 
 (def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
 
-(def dictionary
-  (line-seq (io/reader (io/resource "cmudict_SPHINX_40"))))
-
-(defn prepare-word
-  "Splits whitespace-separated fields into a sequence."
-  [line]
-  (string/split line #"[\t ]"))
-
 (defn take-through
   "(take-through even? [1 2 3 4 7 7 5 2 8 10])
    returns '((1 2 3 4) (7 7 5 2) (8) (10))"