Compare commits

..

No commits in common. 'a93544a9c3a87c6a3ad379ca201c9902e997d447' and 'd15d9cf3e5a843d137ee02b6dabd804479b58443' have entirely different histories.

@ -1,22 +1,24 @@
# Change Log # Change Log
All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/).
## [0.1.3] - 2021-05-02 ## [Unreleased]
### Fixed ### Changed
- Fixed bug sylalbifying words that begin with consonants that don't adhere to sonority heirarchy. - Add a new arity to `make-widget-async` to provide a different widget shape.
- "Steel", for example. "T" is less sonorous than "S" and typically wouldn't be included in an onset, but since there are no vowels preceding the "ST" then both *should* be included in the onset.
## [0.1.2] - 2021-04-22 ## [0.1.1] - 2021-04-22
### Fixed ### Changed
- Fixed bug when getting phones from CMULexicon because the word wasn't found in the CMU dictionary. (Missing parens) - Documentation on how to make the widgets.
- Comment out warn-on-reflection code that was just being used to find performance gains.
## 0.1.1 ### Removed
### Added - `make-widget-sync` - we're all async, all the time.
Initial release ### Fixed
- Fixed widget maker to keep working when daylight savings switches over.
- Phonetics and syllabification utilities ## 0.1.0 - 2021-04-22
### Added
- Files from the new template.
- Widget maker public API - `make-widget-sync`.
[Unreleased]: https://github.com/com.owoga/phonetics/compare/0.1.2...HEAD [Unreleased]: https://github.com/com.owoga/phonetics/compare/0.1.1...HEAD
[0.1.1]: https://github.com/com.owoga/phonetics/compare/0.1.1...0.1.2 [0.1.1]: https://github.com/com.owoga/phonetics/compare/0.1.0...0.1.1

@ -1,7 +1,6 @@
{:paths ["src" "resources"] {:paths ["src" "resources"]
:deps {org.clojure/clojure {:mvn/version "1.10.3"} :deps {org.clojure/clojure {:mvn/version "1.10.3"}
net.sf.sociaal/freetts {:mvn/version "1.2.2"} net.sf.sociaal/freetts {:mvn/version "1.2.2"}}
org.clojure/math.combinatorics {:mvn/version "0.1.6"}}
:aliases :aliases
{:test {:extra-paths ["test"] {:test {:extra-paths ["test"]
:extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}}} :extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}}}

@ -3,10 +3,10 @@
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.owoga</groupId> <groupId>com.owoga</groupId>
<artifactId>phonetics</artifactId> <artifactId>phonetics</artifactId>
<version>0.1.3</version> <version>0.1.1</version>
<name>com.owoga/phonetics</name> <name>com.owoga/phonetics</name>
<description>Phonetics and syllabification of English words.</description> <description>Phonetics and syllabification of English words.</description>
<url>https://github.com/eihli/phonetics</url> <url>https://github.com/com.owoga/phonetics</url>
<licenses> <licenses>
<license> <license>
<name>MIT License</name> <name>MIT License</name>
@ -19,9 +19,9 @@
</developer> </developer>
</developers> </developers>
<scm> <scm>
<url>https://github.com/eihli/phonetics</url> <url>https://github.com/com.owoga/phonetics</url>
<connection>scm:git:git://github.com/eihli/phonetics.git</connection> <connection>scm:git:git://github.com/com.owoga/phonetics.git</connection>
<developerConnection>scm:git:ssh://git@github.com/eihli/phonetics.git</developerConnection> <developerConnection>scm:git:ssh://git@github.com/com.owoga/phonetics.git</developerConnection>
<tag>v0.1.0-SNAPSHOT</tag> <tag>v0.1.0-SNAPSHOT</tag>
</scm> </scm>
<dependencies> <dependencies>

BIN
resources/07IJCAI-spelling-variants.pdf (Stored with Git LFS)

Binary file not shown.

@ -2,8 +2,7 @@
(:require [clojure.set] (:require [clojure.set]
[clojure.string :as string] [clojure.string :as string]
[clojure.java.io :as io] [clojure.java.io :as io]
[clojure.set :as set] [clojure.set :as set])
[clojure.math.combinatorics :as combinatorics])
(:import (com.sun.speech.freetts.en.us CMULexicon))) (:import (com.sun.speech.freetts.en.us CMULexicon)))
#_(set! *warn-on-reflection* true) #_(set! *warn-on-reflection* true)
@ -156,34 +155,6 @@
two different ways of getting phonemes." two different ways of getting phonemes."
(CMULexicon/getInstance true)) (CMULexicon/getInstance true))
;; This sonority hierarchy may not be perfect.
;; It stems from: http://www.glottopedia.org/index.php/Sonority_hierarchy
;; I tried to match the phones provided by the CMU dict to the hierarchies
;; listed on that page:
;; vowels > liquids > nasals > voiced fricatives
;; > voiceless fricatives = voiced plosives
;; > voiceless plosives (Anderson & Ewen 1987)
(def ^clojure.lang.PersistentVector sonority-hierarchy
;; more sonorous < < < vowel < < < (maximal onset) vowel > > > less sonorous
["vowel" "liquid" "semivowel" "aspirate" "affricate" "nasal" "fricative" "stop"])
(def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
(defn sonority [phone]
(.indexOf sonority-hierarchy (phonemap phone)))
(defn vowel? [phone]
(vowel (string/replace phone #"\d" "")))
(def consonant? (complement vowel?))
(defn >sonorous [a b]
(< (sonority a) (sonority b)))
(defn <sonorous [a b]
(> (sonority a) (sonority b)))
(defn remove-stress [phonemes] (defn remove-stress [phonemes]
(mapv #(string/replace % #"\d" "") phonemes)) (mapv #(string/replace % #"\d" "") phonemes))
@ -231,31 +202,12 @@
(.getPhones cmu-lexicon word nil))]))) (.getPhones cmu-lexicon word nil))])))
(defn get-word (defn get-word
"Returns vector of all words that are in the CMU pronouncing dictionary
that have the pronunciation given `phones`.
Expects phones to have stress removed.
Not an exact inverse of `get-phones` since `get-phones` can figure out
somewhat appropriate phones for a made-up word. This function cannot
figure out the spelling of a made-up word provided the made-up word's phones.
Returns nil if no word can be found."
[phones] [phones]
(let [stressed? (some #(re-matches #".*\d" %) phones)] (let [stressed? (some #(re-matches #".*\d" %) phones)]
(if stressed? (if stressed?
(stressed-phones-to-cmu-word-map phones) (stressed-phones-to-cmu-word-map phones)
(unstressed-phones-to-cmu-word-map phones)))) (unstressed-phones-to-cmu-word-map phones))))
(defn phrase-phones
"Pronunciations of a words seperated by spaces."
[phrase]
(->> phrase
(#(string/split % #" "))
(map get-phones)
(apply combinatorics/cartesian-product)
(mapv (partial reduce into []))))
(comment (comment
(get-phones "alaska") (get-phones "alaska")
;; => [["AH0" "L" "AE1" "S" "K" "AH0"]] ;; => [["AH0" "L" "AE1" "S" "K" "AH0"]]
@ -268,10 +220,4 @@
;; => ["alaska"] ;; => ["alaska"]
(get-word ["N" "IY" "S"]) (get-word ["N" "IY" "S"])
;; => ["neice" "neece" "niece" "nice(1)" "kneece" "kniess" "neiss" "neace" "niess"] ;; => ["neice" "neece" "niece" "nice(1)" "kneece" "kniess" "neiss" "neace" "niess"]
(get-word ["F" "UW" "B" "AE" "Z"])
;; => nil
(phrase-phones "bog hog")
;; [["B" "AA1" "G" "HH" "AA1" "G"]
;; ["B" "AO1" "G" "HH" "AA1" "G"]]
) )

@ -1,64 +0,0 @@
(ns com.owoga.phonetics.stress-manip
(:require [clojure.string :as string]))
(defn primary-stress?
[phone]
(re-find #"1" phone))
(defn non-primary-stress?
[phone]
(re-find #"[2-9]" phone))
(defn unstressed?
[phone]
(re-find #"0" phone))
(defn remove-any-stress-signifiers
[phones]
(map #(string/replace % #"\d" "") phones))
(defn remove-non-primary-stress-signifiers
[phones]
(map #(string/replace % #"[02-9]" "") phones))
(defn unify-stressed
[phones]
(map #(string/replace % #"[2-9]" "1") phones))
(def consonant-unification-map
"This almost aligns with the phonemap that maps phones to whether they are vowels, aspirates, nasals, etc...
Slight but possibly important difference in stops. For example, I think T and D
are more unified than T and G; and G and K are more unifide than G and T."
{"T" "T"
"CH" "CH"
"K" "K"
"HH" "HH"
"L" "L"
"JH" "CH" ;; <-
"G" "K" ;; <-
"M" "M" ;; <-
"S" "S"
"Y" "Y"
"Z" "S" ;; <-
"R" "R"
"F" "F"
"B" "B"
"SH" "CH" ;; <-
"P" "B" ;; <-
"V" "F" ;; <-
"TH" "T" ;; <-
"N" "M" ;; <-
"DH" "T" ;; <-
"W" "Y" ;; <-
"ZH" "S" ;; <-
"NG" "M" ;; <-
"D" "T" ;; <-
})
(defn unify-consonants
[phones]
(mapv #(get consonant-unification-map % %) phones))
(defn remove-unstressed-signifiers
[phones]
(map #(string/replace % #"0" "")))

@ -5,11 +5,37 @@
#_(set! *warn-on-reflection* true) #_(set! *warn-on-reflection* true)
;; This sonority hierarchy may not be perfect.
;; It stems from: http://www.glottopedia.org/index.php/Sonority_hierarchy
;; I tried to match the phones provided by the CMU dict to the hierarchies
;; listed on that page:
;; vowels > liquids > nasals > voiced fricatives
;; > voiceless fricatives = voiced plosives
;; > voiceless plosives (Anderson & Ewen 1987)
(def ^clojure.lang.PersistentVector sonority-hierarchy
;; more sonorous < < < vowel < < < (maximal onset) vowel > > > less sonorous
["vowel" "liquid" "semivowel" "aspirate" "affricate" "nasal" "fricative" "stop"])
(def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
(defn sonority [phone]
(.indexOf sonority-hierarchy (phonetics/phonemap phone)))
(defn vowel? [phone]
(phonetics/vowel phone))
(def consonant? (complement vowel?))
(defn >sonorous [a b]
(< (sonority a) (sonority b)))
(defn <sonorous [a b]
(> (sonority a) (sonority b)))
(defn slurp-rime (defn slurp-rime
"Expects the phones in reverse order. "Returns a vector of the rime and the remaining phones to process."
Returns a vector of the rime (in forwards order) and the remaining phones to process."
[phones] [phones]
(let [splits (util/take-through phonetics/vowel? phones)] (let [splits (util/take-through vowel? phones)]
[(vec (reverse (first splits))) (vec (flatten (rest splits)))])) [(vec (reverse (first splits))) (vec (flatten (rest splits)))]))
(comment (comment
@ -35,22 +61,18 @@
;; Two vowels next to each other is treated as two syllables. ;; Two vowels next to each other is treated as two syllables.
;; This might not always be the case if the vowels are lax. ;; This might not always be the case if the vowels are lax.
;; Is "royal" 1 syllable or two? This treats it as two. ;; Is "royal" 1 syllable or two? This treats it as two.
(phonetics/vowel? (nth phones 0)) (vowel? (nth phones 0))
[syllable phones] [syllable phones]
;; Maximal onset principle with exception for lax vowels occurring in ;; Maximal onset principle with exception for lax vowels occurring in
;; closed syllables. ;; closed syllables.
(and (phonetics/consonant? (nth syllable 0)) (and (consonant? (nth syllable 0))
(phonetics/<sonorous (nth phones 0) (nth syllable 0)) (<sonorous (nth phones 0) (nth syllable 0))
(not (phonetics/lax-vowels (nth phones 1 nil)))) (not (lax-vowels (nth phones 1 nil))))
(recur (subvec phones 1)
(into [(nth phones 0)] syllable))
(phonetics/vowel? (nth syllable 0))
(recur (subvec phones 1) (recur (subvec phones 1)
(into [(nth phones 0)] syllable)) (into [(nth phones 0)] syllable))
(not-any? phonetics/vowel? phones) (vowel? (nth syllable 0))
(recur (subvec phones 1) (recur (subvec phones 1)
(into [(nth phones 0)] syllable)) (into [(nth phones 0)] syllable))
@ -106,9 +128,6 @@
(recur phones'' (into [syllable] segments))))))) (recur phones'' (into [syllable] segments)))))))
(comment (comment
(syllabify ["S" "T" "IY" "L"])
(slurp-rime (reverse ["S" "T" "IY" "L"]))
(slurp-onset-given-rime ["T" "S"] ["IY" "L"])
(phonetics/remove-stress ["AH" "L" "AE" "S" "K" "AH"]) (phonetics/remove-stress ["AH" "L" "AE" "S" "K" "AH"])
(slurp-onset-given-rime ["L" "AE" "S" "K" "AH"] ["AH"]) (slurp-onset-given-rime ["L" "AE" "S" "K" "AH"] ["AH"])
(syllabify ["AH0" "L" "AE1" "S" "K" "AH0"]) (syllabify ["AH0" "L" "AE1" "S" "K" "AH0"])

@ -4,7 +4,7 @@
(defn take-through (defn take-through
"(take-through even? [1 2 3 4 7 7 5 2 8 10]) "(take-through even? [1 2 3 4 7 7 5 2 8 10])
returns '((1 2) (3 4) (7 7 5 2) (8) (10))" returns '((1 2 3 4) (7 7 5 2) (8) (10))"
[pred coll] [pred coll]
(loop [coll coll (loop [coll coll
acc '()] acc '()]
@ -19,8 +19,3 @@
:else :else
(recur (rest coll) (recur (rest coll)
(cons (first coll) acc))))) (cons (first coll) acc)))))
(comment
(take-through even? [1 2 3 4 7 7 5 2 8 10])
;; => ((1 2) (3 4) (7 7 5 2) (8) (10))
)

@ -38,10 +38,4 @@
;; about handling ambisyllabic words. There's no such thing. ;; about handling ambisyllabic words. There's no such thing.
(testing "pillow" (testing "pillow"
(is (= '(("P" "IH") ("L" "OW")) (is (= '(("P" "IH") ("L" "OW"))
(syllabify ["P" "IH" "L" "OW"])))) (syllabify ["P" "IH" "L" "OW"])))))
(testing "steel"
(is (= [["S" "T" "IY1" "L"]]
(syllabify ["S" "T" "IY1" "L"]))))
(testing "scotch"
(is (= [["S" "K" "AA1" "CH"]]
(syllabify ["S" "K" "AA1" "CH"])))))

@ -13,8 +13,4 @@
(is (= ["hello(1)"] (is (= ["hello(1)"]
(get-word ["HH" "EH" "L" "OW"]))) (get-word ["HH" "EH" "L" "OW"])))
(is (= ["ensure(1)" "insure"] (is (= ["ensure(1)" "insure"]
(get-word ["IH" "N" "SH" "UH" "R"])))) (get-word ["IH" "N" "SH" "UH" "R"])))))
(testing "phrase to phones"
(is (= [["B" "AA1" "G" "HH" "AA1" "G"]
["B" "AO1" "G" "HH" "AA1" "G"]]
(phrase-phones "bog hog")))))

Loading…
Cancel
Save