From 7eb429daaf6f670d115d87cdab1768bda5478512 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Tue, 20 Apr 2021 06:29:13 -0500 Subject: [PATCH] Rhyme with unstressed vowels only --- dev/examples/tpt.clj | 69 ++++++++++++++++++++++++++++-- src/com/owoga/prhyme/syllabify.clj | 3 +- src/com/owoga/prhyme/util.clj | 4 +- 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/dev/examples/tpt.clj b/dev/examples/tpt.clj index 659b719..62c33c0 100644 --- a/dev/examples/tpt.clj +++ b/dev/examples/tpt.clj @@ -407,7 +407,7 @@ (conj (peek result) (first phones)))))))) (defn syllabify-phrase-with-stress [phrase] - (map syllabify (string/split phrase #"[ -]"))) + (map syllabify-with-stress (string/split phrase #"[ -]"))) (comment (syllabify-phrase-with-stress "bother me") @@ -425,8 +425,8 @@ (comment (phrase->flex-rhyme-phones "bother me") - ) + (defonce context (atom {})) (defn initialize [] @@ -477,7 +477,7 @@ (trie/make-trie) (@context :database))) - (swap! + #_(swap! context assoc :flex-rhyme-trie @@ -493,9 +493,70 @@ (@context :database))) nil) -(initialize) +;; From a tightly-packed-trie and a database, build a trie +;; of phones of n-grams +(comment + (do + (time + (swap! + context + assoc + :flex-rhyme-trie' + (transduce + (comp + (map (fn [[k v]] + [(string/join " " (map (@context :database) k)) + [k v]])) + (map (fn [[phrase [k v]]] + [(reverse (phrase->flex-rhyme-phones phrase)) + [k v]]))) + (completing + (fn [trie [k v]] + (update trie k (fnil conj [v]) v))) + (trie/make-trie) + (tpt/children-at-depth (@context :trie) 0 2)))) + nil) + + ) + +(comment + (time (count (tpt/children-at-depth (@context :trie) 0 2))) + + (->> (trie/children-at-depth (@context :flex-rhyme-trie') 0 5) + (take 500)) + + (trie/children (trie/lookup (@context :flex-rhyme-trie') + (reverse (rest (phrase->flex-rhyme-phones "technology"))))) + + (trie/lookup (@context :flex-rhyme-trie') '("IY" "AH" "AA")) + (map (@context :database) '()) + (take 5 (@context :flex-rhyme-trie')) + + (map #(get (@context :database) %) [6177 13036]) + (map #(get (@context :database) %) [410 48670]) + (get (@context :trie) [1 2 2]) + + (trie/children (trie/lookup (@context :trie) [1 2])) + + (first (@context :trie)) + ;; 448351 + ;; 4388527 + (initialize) + + ) (comment + + + (filter + dict/english? + (flatten + (map #(get % []) + (trie/children + (trie/lookup + (@context :flex-rhyme-trie) + '("IY" "AH" "AA")))))) + (take 5 (drop 500 (@context :flex-rhyme-trie))) (let [key (reverse (phrase->flex-rhyme-phones "technology"))] [key diff --git a/src/com/owoga/prhyme/syllabify.clj b/src/com/owoga/prhyme/syllabify.clj index 913c311..cafd485 100644 --- a/src/com/owoga/prhyme/syllabify.clj +++ b/src/com/owoga/prhyme/syllabify.clj @@ -2,6 +2,7 @@ (:require [com.owoga.prhyme.data.phonetics :as phonetics] [com.owoga.prhyme.util :as util] [clojure.string :as string])) +(set! *warn-on-reflection* true) ;; ER is not yet handled properly. ;; PARENTHESES is syllabified as ("P" "ER" "IH" "N") ("TH" "UH") ("S" "IY" "S") ;; Glides are also broken. "R OY AH L" gets syllabified as a single syllable. @@ -25,7 +26,7 @@ ;; of a word. So it should be e.lip.sis ;; As an alternative to handling the isolated "s"-at-the-end-of-internal-coda case, ;; it works well-enough for me to treat all fricatives as lowest priority. -(def sonority-hierarchy +(def ^clojure.lang.PersistentVector sonority-hierarchy ["vowel" "liquid" "affricate" "fricative" "nasal" "stop" "semivowel" "aspirate"]) ;; Ok. Sonority hierarchy doesn't work. diff --git a/src/com/owoga/prhyme/util.clj b/src/com/owoga/prhyme/util.clj index 31160b4..61b73f4 100644 --- a/src/com/owoga/prhyme/util.clj +++ b/src/com/owoga/prhyme/util.clj @@ -4,6 +4,8 @@ [clojure.set :as set]) (:import (com.sun.speech.freetts.en.us CMULexicon))) +(set! *warn-on-reflection* true) + (defn prepare-word "Splits whitespace-separated fields into a sequence." [line] @@ -11,7 +13,7 @@ (CMULexicon. "cmulex" true) -(def cmu-lexicon (CMULexicon/getInstance true)) +(def ^CMULexicon cmu-lexicon (CMULexicon/getInstance true)) (defn remove-stress [phoneme] (string/replace phoneme #"\d" ""))