10 changed files with 60 additions and 176 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,22 +1,24 @@
 # Change Log
 All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/).
-## [0.1.3] - 2021-05-02
+## [Unreleased]
-### Fixed
+### Changed
- Fixed bug sylalbifying words that begin with consonants that don't adhere to sonority heirarchy.
+- Add a new arity to `make-widget-async` to provide a different widget shape.
  - "Steel", for example. "T" is less sonorous than "S" and typically wouldn't be included in an onset, but since there are no vowels preceding the "ST" then both *should* be included in the onset.
-## [0.1.2] - 2021-04-22
+## [0.1.1] - 2021-04-22
-### Fixed
+### Changed
- Fixed bug when getting phones from CMULexicon because the word wasn't found in the CMU dictionary. (Missing parens)
+- Documentation on how to make the widgets.
 - Comment out warn-on-reflection code that was just being used to find performance gains.
-## 0.1.1
+### Removed
-### Added
+- `make-widget-sync` - we're all async, all the time.
-Initial release
+### Fixed
 - Fixed widget maker to keep working when daylight savings switches over.
- Phonetics and syllabification utilities 
+## 0.1.0 - 2021-04-22
 ### Added
 - Files from the new template.
 - Widget maker public API - `make-widget-sync`.
-[Unreleased]: https://github.com/com.owoga/phonetics/compare/0.1.2...HEAD
+[Unreleased]: https://github.com/com.owoga/phonetics/compare/0.1.1...HEAD
-[0.1.1]: https://github.com/com.owoga/phonetics/compare/0.1.1...0.1.2
+[0.1.1]: https://github.com/com.owoga/phonetics/compare/0.1.0...0.1.1
--- a/deps.edn
+++ b/deps.edn
@ -1,7 +1,6 @@
 {:paths ["src" "resources"]
 :deps {org.clojure/clojure {:mvn/version "1.10.3"}
-        net.sf.sociaal/freetts {:mvn/version "1.2.2"}
+        net.sf.sociaal/freetts {:mvn/version "1.2.2"}}
        org.clojure/math.combinatorics {:mvn/version "0.1.6"}}
 :aliases
 {:test {:extra-paths ["test"]
         :extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}}}
--- a/pom.xml
+++ b/pom.xml
@ -3,10 +3,10 @@
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.owoga</groupId>
  <artifactId>phonetics</artifactId>
-  <version>0.1.3</version>
+  <version>0.1.1</version>
  <name>com.owoga/phonetics</name>
  <description>Phonetics and syllabification of English words.</description>
-  <url>https://github.com/eihli/phonetics</url>
+  <url>https://github.com/com.owoga/phonetics</url>
  <licenses>
    <license>
      <name>MIT License</name>
@ -19,9 +19,9 @@
    </developer>
  </developers>
  <scm>
-    <url>https://github.com/eihli/phonetics</url>
+    <url>https://github.com/com.owoga/phonetics</url>
-    <connection>scm:git:git://github.com/eihli/phonetics.git</connection>
+    <connection>scm:git:git://github.com/com.owoga/phonetics.git</connection>
-    <developerConnection>scm:git:ssh://git@github.com/eihli/phonetics.git</developerConnection>
+    <developerConnection>scm:git:ssh://git@github.com/com.owoga/phonetics.git</developerConnection>
    <tag>v0.1.0-SNAPSHOT</tag>
  </scm>
  <dependencies>
--- a/resources/07IJCAI-spelling-variants.pdf
+++ b/resources/07IJCAI-spelling-variants.pdf
--- a/src/com/owoga/phonetics.clj
+++ b/src/com/owoga/phonetics.clj
@ -2,8 +2,7 @@
  (:require [clojure.set]
            [clojure.string :as string]
            [clojure.java.io :as io]
-            [clojure.set :as set]
+            [clojure.set :as set])
            [clojure.math.combinatorics :as combinatorics])
  (:import (com.sun.speech.freetts.en.us CMULexicon)))
 #_(set! *warn-on-reflection* true)
@ -156,34 +155,6 @@
  two different ways of getting phonemes."
  (CMULexicon/getInstance true))
 ;; This sonority hierarchy may not be perfect.
 ;; It stems from: http://www.glottopedia.org/index.php/Sonority_hierarchy
 ;; I tried to match the phones provided by the CMU dict to the hierarchies
 ;; listed on that page:
 ;;   vowels > liquids > nasals > voiced fricatives
 ;;   > voiceless fricatives = voiced plosives
 ;;   > voiceless plosives (Anderson & Ewen 1987)
 (def ^clojure.lang.PersistentVector sonority-hierarchy
  ;;   more sonorous  < < < vowel < < < (maximal onset) vowel > > > less sonorous
  ["vowel" "liquid" "semivowel" "aspirate" "affricate" "nasal" "fricative" "stop"])
 (def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
 (defn sonority [phone]
  (.indexOf sonority-hierarchy (phonemap phone)))
 (defn vowel? [phone]
  (vowel (string/replace phone #"\d" "")))
 (def consonant? (complement vowel?))
 (defn >sonorous [a b]
  (< (sonority a) (sonority b)))
 (defn <sonorous [a b]
  (> (sonority a) (sonority b)))
 (defn remove-stress [phonemes]
  (mapv #(string/replace % #"\d" "") phonemes))
@ -231,31 +202,12 @@
        (.getPhones cmu-lexicon word nil))])))
 (defn get-word
  "Returns vector of all words that are in the CMU pronouncing dictionary
  that have the pronunciation given `phones`.
  Expects phones to have stress removed.
  Not an exact inverse of `get-phones` since `get-phones` can figure out
  somewhat appropriate phones for a made-up word. This function cannot
  figure out the spelling of a made-up word provided the made-up word's phones.
  Returns nil if no word can be found."
  [phones]
  (let [stressed? (some #(re-matches #".*\d" %) phones)]
    (if stressed?
      (stressed-phones-to-cmu-word-map phones)
      (unstressed-phones-to-cmu-word-map phones))))
 (defn phrase-phones
  "Pronunciations of a words seperated by spaces."
  [phrase]
  (->> phrase
       (#(string/split % #" "))
       (map get-phones)
       (apply combinatorics/cartesian-product)
       (mapv (partial reduce into []))))
 (comment
  (get-phones "alaska")
  ;; => [["AH0" "L" "AE1" "S" "K" "AH0"]]
@ -268,10 +220,4 @@
  ;; => ["alaska"]
  (get-word ["N" "IY" "S"])
  ;; => ["neice" "neece" "niece" "nice(1)" "kneece" "kniess" "neiss" "neace" "niess"]
  (get-word ["F" "UW" "B" "AE" "Z"])
  ;; => nil
  (phrase-phones "bog hog")
  ;;  [["B" "AA1" "G" "HH" "AA1" "G"]
  ;;   ["B" "AO1" "G" "HH" "AA1" "G"]]
  )
--- a/src/com/owoga/phonetics/stress_manip.clj
+++ b/src/com/owoga/phonetics/stress_manip.clj
@ -1,64 +0,0 @@
 (ns com.owoga.phonetics.stress-manip
  (:require [clojure.string :as string]))
 (defn primary-stress?
  [phone]
  (re-find #"1" phone))
 (defn non-primary-stress?
  [phone]
  (re-find #"[2-9]" phone))
 (defn unstressed?
  [phone]
  (re-find #"0" phone))
 (defn remove-any-stress-signifiers
  [phones]
  (map #(string/replace % #"\d" "") phones))
 (defn remove-non-primary-stress-signifiers
  [phones]
  (map #(string/replace % #"[02-9]" "") phones))
 (defn unify-stressed
  [phones]
  (map #(string/replace % #"[2-9]" "1") phones))
 (def consonant-unification-map
  "This almost aligns with the phonemap that maps phones to whether they are vowels, aspirates, nasals, etc...
  Slight but possibly important difference in stops. For example, I think T and D
  are more unified than T and G; and G and K are more unifide than G and T."
  {"T" "T"
   "CH" "CH"
   "K" "K"
   "HH" "HH"
   "L" "L"
   "JH" "CH" ;; <-
   "G" "K"   ;; <-
   "M" "M"   ;; <-
   "S" "S"
   "Y" "Y"
   "Z" "S"   ;; <-
   "R" "R"
   "F" "F"
   "B" "B"
   "SH" "CH" ;; <-
   "P" "B"   ;; <-
   "V" "F"   ;; <-
   "TH" "T"  ;; <-
   "N" "M"   ;; <-
   "DH" "T"  ;; <-
   "W"  "Y"  ;; <-
   "ZH" "S"  ;; <-
   "NG" "M"  ;; <-
   "D" "T"   ;; <-
   })
 (defn unify-consonants
  [phones]
  (mapv #(get consonant-unification-map % %) phones))
 (defn remove-unstressed-signifiers
  [phones]
  (map #(string/replace % #"0" "")))
--- a/src/com/owoga/phonetics/syllabify.clj
+++ b/src/com/owoga/phonetics/syllabify.clj
@ -5,11 +5,37 @@
 #_(set! *warn-on-reflection* true)
 ;; This sonority hierarchy may not be perfect.
 ;; It stems from: http://www.glottopedia.org/index.php/Sonority_hierarchy
 ;; I tried to match the phones provided by the CMU dict to the hierarchies
 ;; listed on that page:
 ;;   vowels > liquids > nasals > voiced fricatives
 ;;   > voiceless fricatives = voiced plosives
 ;;   > voiceless plosives (Anderson & Ewen 1987)
 (def ^clojure.lang.PersistentVector sonority-hierarchy
  ;;   more sonorous  < < < vowel < < < (maximal onset) vowel > > > less sonorous
  ["vowel" "liquid" "semivowel" "aspirate" "affricate" "nasal" "fricative" "stop"])
 (def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
 (defn sonority [phone]
  (.indexOf sonority-hierarchy (phonetics/phonemap phone)))
 (defn vowel? [phone]
  (phonetics/vowel phone))
 (def consonant? (complement vowel?))
 (defn >sonorous [a b]
  (< (sonority a) (sonority b)))
 (defn <sonorous [a b]
  (> (sonority a) (sonority b)))
 (defn slurp-rime
-  "Expects the phones in reverse order.
+  "Returns a vector of the rime and the remaining phones to process."
  Returns a vector of the rime (in forwards order) and the remaining phones to process."
  [phones]
-  (let [splits (util/take-through phonetics/vowel? phones)]
+  (let [splits (util/take-through vowel? phones)]
    [(vec (reverse (first splits))) (vec (flatten (rest splits)))]))
 (comment
@ -35,22 +61,18 @@
      ;; Two vowels next to each other is treated as two syllables.
      ;; This might not always be the case if the vowels are lax.
      ;; Is "royal" 1 syllable or two? This treats it as two.
-      (phonetics/vowel? (nth phones 0))
+      (vowel? (nth phones 0))
      [syllable phones]
      ;; Maximal onset principle with exception for lax vowels occurring in
      ;; closed syllables.
-      (and (phonetics/consonant? (nth syllable 0))
+      (and (consonant? (nth syllable 0))
-           (phonetics/<sonorous (nth phones 0) (nth syllable 0))
+           (<sonorous (nth phones 0) (nth syllable 0))
-           (not (phonetics/lax-vowels (nth phones 1 nil))))
+           (not (lax-vowels (nth phones 1 nil))))
      (recur (subvec phones 1)
             (into [(nth phones 0)] syllable))
      (phonetics/vowel? (nth syllable 0))
      (recur (subvec phones 1)
             (into [(nth phones 0)] syllable))
-      (not-any? phonetics/vowel? phones)
+      (vowel? (nth syllable 0))
      (recur (subvec phones 1)
             (into [(nth phones 0)] syllable))
@ -106,9 +128,6 @@
          (recur phones'' (into [syllable] segments)))))))
 (comment
  (syllabify ["S" "T" "IY" "L"])
  (slurp-rime (reverse ["S" "T" "IY" "L"]))
  (slurp-onset-given-rime ["T" "S"] ["IY" "L"])
  (phonetics/remove-stress ["AH" "L" "AE" "S" "K" "AH"])
  (slurp-onset-given-rime ["L" "AE" "S" "K" "AH"] ["AH"])
  (syllabify ["AH0" "L" "AE1" "S" "K" "AH0"])
--- a/src/com/owoga/phonetics/util.clj
+++ b/src/com/owoga/phonetics/util.clj
@ -4,7 +4,7 @@
 (defn take-through
  "(take-through even? [1 2 3 4 7 7 5 2 8 10])
-  returns '((1 2) (3 4) (7 7 5 2) (8) (10))"
+   returns '((1 2 3 4) (7 7 5 2) (8) (10))"
  [pred coll]
  (loop [coll coll
         acc '()]
@ -19,8 +19,3 @@
      :else
      (recur (rest coll)
             (cons (first coll) acc)))))
 (comment
  (take-through even? [1 2 3 4 7 7 5 2 8 10])
  ;; => ((1 2) (3 4) (7 7 5 2) (8) (10))
  )
--- a/test/com/owoga/phonetics/syllabify_test.clj
+++ b/test/com/owoga/phonetics/syllabify_test.clj
@ -38,10 +38,4 @@
  ;; about handling ambisyllabic words. There's no such thing.
  (testing "pillow"
    (is (= '(("P" "IH") ("L" "OW"))
-           (syllabify ["P" "IH" "L" "OW"]))))
+           (syllabify ["P" "IH" "L" "OW"])))))
  (testing "steel"
    (is (= [["S" "T" "IY1" "L"]]
           (syllabify ["S" "T" "IY1" "L"]))))
  (testing "scotch"
    (is (= [["S" "K" "AA1" "CH"]]
           (syllabify ["S" "K" "AA1" "CH"])))))
--- a/test/com/owoga/phonetics_test.clj
+++ b/test/com/owoga/phonetics_test.clj
@ -13,8 +13,4 @@
    (is (= ["hello(1)"]
           (get-word ["HH" "EH" "L" "OW"])))
    (is (= ["ensure(1)" "insure"]
-           (get-word ["IH" "N" "SH" "UH" "R"]))))
+           (get-word ["IH" "N" "SH" "UH" "R"])))))
  (testing "phrase to phones"
    (is (= [["B" "AA1" "G" "HH" "AA1" "G"]
            ["B" "AO1" "G" "HH" "AA1" "G"]]
           (phrase-phones "bog hog")))))