Initial commit, syllabification and phonetics.
commit
69d88fb732
@ -0,0 +1,2 @@
|
||||
**.pdf filter=lfs diff=lfs merge=lfs -text
|
||||
resources/cmudict-0.7b filter=lfs diff=lfs merge=lfs -text
|
@ -0,0 +1,15 @@
|
||||
/target
|
||||
/classes
|
||||
/checkouts
|
||||
*.jar
|
||||
*.class
|
||||
/.calva/output-window/
|
||||
/.cpcache
|
||||
/.lein-*
|
||||
/.lsp/sqlite*.db
|
||||
/.nrepl-history
|
||||
/.nrepl-port
|
||||
/.rebel_readline_history
|
||||
/.socket-repl-port
|
||||
.hgignore
|
||||
.hg/
|
@ -0,0 +1,24 @@
|
||||
# Change Log
|
||||
All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/).
|
||||
|
||||
## [Unreleased]
|
||||
### Changed
|
||||
- Add a new arity to `make-widget-async` to provide a different widget shape.
|
||||
|
||||
## [0.1.1] - 2021-04-22
|
||||
### Changed
|
||||
- Documentation on how to make the widgets.
|
||||
|
||||
### Removed
|
||||
- `make-widget-sync` - we're all async, all the time.
|
||||
|
||||
### Fixed
|
||||
- Fixed widget maker to keep working when daylight savings switches over.
|
||||
|
||||
## 0.1.0 - 2021-04-22
|
||||
### Added
|
||||
- Files from the new template.
|
||||
- Widget maker public API - `make-widget-sync`.
|
||||
|
||||
[Unreleased]: https://github.com/com.owoga/phonetics/compare/0.1.1...HEAD
|
||||
[0.1.1]: https://github.com/com.owoga/phonetics/compare/0.1.0...0.1.1
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2021 Eric Ihli
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,21 @@
|
||||
{:paths ["src" "resources"]
|
||||
:deps {org.clojure/clojure {:mvn/version "1.10.3"}
|
||||
net.sf.sociaal/freetts {:mvn/version "1.2.2"}}
|
||||
:aliases
|
||||
{:test {:extra-paths ["test"]
|
||||
:extra-deps {org.clojure/test.check {:mvn/version "1.1.0"}}}
|
||||
:runner
|
||||
{:extra-deps {com.cognitect/test-runner
|
||||
{:git/url "https://github.com/cognitect-labs/test-runner"
|
||||
:sha "b6b3193fcc42659d7e46ecd1884a228993441182"}}
|
||||
:main-opts ["-m" "cognitect.test-runner"
|
||||
"-d" "test"]}
|
||||
:jar {:replace-deps {com.github.seancorfield/depstar {:mvn/version "2.0.211"}}
|
||||
:exec-fn hf.depstar/jar
|
||||
:exec-args {:jar "phonetics.jar" :sync-pom true}}
|
||||
:install {:replace-deps {slipset/deps-deploy {:mvn/version "0.1.5"}}
|
||||
:exec-fn deps-deploy.deps-deploy/deploy
|
||||
:exec-args {:installer :local :artifact "phonetics.jar"}}
|
||||
:deploy {:replace-deps {slipset/deps-deploy {:mvn/version "0.1.5"}}
|
||||
:exec-fn deps-deploy.deps-deploy/deploy
|
||||
:exec-args {:installer :remote :artifact "phonetics.jar"}}}}
|
@ -0,0 +1,3 @@
|
||||
# Introduction to phonetics
|
||||
|
||||
TODO: write [great documentation](http://jacobian.org/writing/what-to-write/)
|
@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.owoga</groupId>
|
||||
<artifactId>phonetics</artifactId>
|
||||
<version>0.1.1</version>
|
||||
<name>com.owoga/phonetics</name>
|
||||
<description>Phonetics and syllabification of English words.</description>
|
||||
<url>https://github.com/com.owoga/phonetics</url>
|
||||
<licenses>
|
||||
<license>
|
||||
<name>MIT License</name>
|
||||
<url>https://mit-license.org/</url>
|
||||
</license>
|
||||
</licenses>
|
||||
<developers>
|
||||
<developer>
|
||||
<name>Eric Ihli</name>
|
||||
</developer>
|
||||
</developers>
|
||||
<scm>
|
||||
<url>https://github.com/com.owoga/phonetics</url>
|
||||
<connection>scm:git:git://github.com/com.owoga/phonetics.git</connection>
|
||||
<developerConnection>scm:git:ssh://git@github.com/com.owoga/phonetics.git</developerConnection>
|
||||
<tag>v0.1.0-SNAPSHOT</tag>
|
||||
</scm>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.clojure</groupId>
|
||||
<artifactId>clojure</artifactId>
|
||||
<version>1.10.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf.sociaal</groupId>
|
||||
<artifactId>freetts</artifactId>
|
||||
<version>1.2.2</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<build>
|
||||
<sourceDirectory>src</sourceDirectory>
|
||||
</build>
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>clojars</id>
|
||||
<url>https://repo.clojars.org/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
<distributionManagement>
|
||||
<repository>
|
||||
<id>clojars</id>
|
||||
<name>Clojars repository</name>
|
||||
<url>https://clojars.org/repo</url>
|
||||
</repository>
|
||||
</distributionManagement>
|
||||
</project>
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,204 @@
|
||||
(ns com.owoga.phonetics
|
||||
(:require [clojure.set]
|
||||
[clojure.string :as string]
|
||||
[clojure.java.io :as io]
|
||||
[clojure.set :as set])
|
||||
(:import (com.sun.speech.freetts.en.us CMULexicon)))
|
||||
|
||||
;; From http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.phones
|
||||
|
||||
(def phonemap
|
||||
{"T" "stop",
|
||||
"CH" "affricate",
|
||||
"K" "stop",
|
||||
"HH" "aspirate",
|
||||
"UH" "vowel",
|
||||
"AY" "vowel",
|
||||
"AH" "vowel",
|
||||
"OW" "vowel",
|
||||
"L" "liquid",
|
||||
"JH" "affricate",
|
||||
"UW" "vowel",
|
||||
"G" "stop",
|
||||
"EH" "vowel",
|
||||
"M" "nasal",
|
||||
"OY" "vowel",
|
||||
"S" "fricative",
|
||||
"Y" "semivowel",
|
||||
"EY" "vowel",
|
||||
"Z" "fricative",
|
||||
"R" "liquid",
|
||||
"F" "fricative",
|
||||
"AW" "vowel",
|
||||
"IY" "vowel",
|
||||
"B" "stop",
|
||||
"SH" "fricative",
|
||||
"P" "stop",
|
||||
"V" "fricative",
|
||||
"TH" "fricative",
|
||||
"IH" "vowel",
|
||||
"AA" "vowel",
|
||||
"AO" "vowel",
|
||||
"N" "nasal",
|
||||
"DH" "fricative",
|
||||
"W" "semivowel",
|
||||
"ZH" "fricative",
|
||||
"NG" "nasal",
|
||||
"D" "stop",
|
||||
"ER" "vowel",
|
||||
"AE" "vowel"})
|
||||
|
||||
(def long-vowel #{"EY" "IY" "AY" "OW" "UW"})
|
||||
|
||||
(def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"})
|
||||
|
||||
(def vowel (clojure.set/union long-vowel short-vowel))
|
||||
|
||||
(def consonant (clojure.set/difference (into #{} (keys phonemap)) vowel))
|
||||
|
||||
(def syllable-end (clojure.set/union consonant long-vowel))
|
||||
|
||||
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
|
||||
|
||||
(def cmu-word-to-stressed-phones-map
|
||||
"Map of lowercase English words to their phonetic sounding based on
|
||||
the CMU Pronouncing Dictionary at http://www.speech.cs.cmu.edu/cgi-bin/cmudict/
|
||||
|
||||
Includes words with apostrophes, like possessive aaronson's.
|
||||
|
||||
Words with multiple pronunciations have keys with a `(1)` or `(2)` after their
|
||||
duplicates, like [aaronsons(1) (AA1 R AH0 N S AH0 N Z)]
|
||||
|
||||
Primary stress is indicated by a `1` after the phoneme. Secondary stress with a `2`.
|
||||
Unstressed with a `0`."
|
||||
(->> "cmudict-0.7b"
|
||||
io/resource
|
||||
io/reader
|
||||
line-seq
|
||||
(drop-while #(= \; (first %)))
|
||||
(map #(string/split % #"\s+"))
|
||||
(map (partial split-at 1))
|
||||
(map #(vector
|
||||
(string/lower-case
|
||||
(first (first %)))
|
||||
(vec (second %))))
|
||||
(into {})))
|
||||
|
||||
(def cmu-word-alternatives
|
||||
"For words with multiple pronunciations in the CMU dictionary,
|
||||
this maps from the word to its variations.
|
||||
reputed -> reputed, reputed(1), reputed(2).
|
||||
|
||||
Not particularly useful itself since reputed(1) doesn't tell you how it's
|
||||
different from reputed. But it's useful to look up the pronunciations in the
|
||||
CMU dictionary."
|
||||
(reduce
|
||||
(fn [m k]
|
||||
(let [norm-key (string/replace k #"\(\d\)" "")]
|
||||
(update m norm-key (fnil (comp sort conj) []) k)))
|
||||
{}
|
||||
(keys cmu-word-to-stressed-phones-map)))
|
||||
|
||||
(defn word-alternatives
|
||||
"For words with multiple pronunciations in the CMU dictionary,
|
||||
this maps from the word to its variations.
|
||||
reputed -> reputed, reputed(1), reputed(2).
|
||||
|
||||
Not particularly useful itself since reputed(1) doesn't tell you how it's
|
||||
different from reputed. But it's useful to look up the pronunciations in the
|
||||
CMU dictionary."
|
||||
[word]
|
||||
(get cmu-word-alternatives word))
|
||||
|
||||
(def stressed-phones-to-cmu-word-map
|
||||
"The same sequence of phones can map to multiple words."
|
||||
(reduce
|
||||
(fn [m [k v]]
|
||||
(update m v (fnil conj []) k))
|
||||
{}
|
||||
cmu-word-to-stressed-phones-map))
|
||||
|
||||
(def cmu-word-to-unstressed-phones-map
|
||||
(->> cmu-word-to-stressed-phones-map
|
||||
(mapv (fn [[k v]] [k (mapv #(string/replace % #"\d" "") v)]))
|
||||
(into {})))
|
||||
|
||||
(def unstressed-phones-to-cmu-word-map
|
||||
"There might be unstressed phones that can map
|
||||
to two different pronunciations when stress is added,
|
||||
so this maps unstressed phones to a vector of words that
|
||||
can be looked up in the CMU Pronouncing dictionary to
|
||||
see what their stressed phones are.
|
||||
|
||||
Another example, look at how many words map to [N IY S].
|
||||
[[N IY S]
|
||||
[neice neece niece nice kneece kniess neiss neace niess]]"
|
||||
(reduce
|
||||
(fn [m [k v]]
|
||||
(let [v (map #(string/replace % #"\d" "") v)]
|
||||
(update m v (fnil conj []) k)))
|
||||
{}
|
||||
cmu-word-to-stressed-phones-map))
|
||||
|
||||
(CMULexicon. "cmulex" true)
|
||||
|
||||
(def ^CMULexicon cmu-lexicon
|
||||
"The CMULexicon can get phones for words that aren't in the
|
||||
CMU Pronouncing Dictionary. But the phones are slightly different.
|
||||
The `AH` sound, as in `allow`, is returned as `ax` from the CMULexicon.
|
||||
Also, unstressed vowels don't have a `0` suffix. Instead, the CMULexicon
|
||||
just returns unstressed vowels as the vowel itself with no suffix.
|
||||
|
||||
The above is important to note if you want clean interplay between these
|
||||
two different ways of getting phonemes."
|
||||
(CMULexicon/getInstance true))
|
||||
|
||||
(defn remove-stress [phonemes]
|
||||
(mapv #(string/replace % #"\d" "") phonemes))
|
||||
|
||||
(defn cmu-lexicon->cmu-pronouncing-dict
|
||||
"The CMULexicon returns the `AH` sound, as in `allow`, as `ax`.
|
||||
The Sphinx dictionary treates that sound as `AH`. This
|
||||
converts `ax` to `AH`. It also adds `0` to phonemes that are
|
||||
unstressed, which CMULexicon returns as the plain phoneme with
|
||||
no stress marker."
|
||||
[phonemes]
|
||||
(mapv
|
||||
(fn [phoneme]
|
||||
(->> phoneme
|
||||
(#(if (.equals % "ax") "ah" %))
|
||||
string/upper-case
|
||||
(#(if (vowel %) (str % "0") %))))
|
||||
phonemes))
|
||||
|
||||
(comment
|
||||
(type (.getPhones cmu-lexicon "allow" nil)) ;; => [Ljava.lang.String;
|
||||
(vec (.getPhones cmu-lexicon "allow" nil)) ;; => ["ax" "l" "aw1"]
|
||||
(cmu-lexicon->cmu-pronouncing-dict
|
||||
(.getPhones cmu-lexicon "allowance" nil))
|
||||
;; => ["AH0" "L" "AW1" "AH0" "N" "S"]
|
||||
(cmu-word-to-stressed-phones-map "allowance")
|
||||
;; => ["AH0" "L" "AW1" "AH0" "N" "S"]
|
||||
)
|
||||
|
||||
(defn get-phones
|
||||
"Tries to get phones first from the CMU Pronouncing Dictionary
|
||||
and falls back to the CMULexicon if the word doesn't exist in
|
||||
the dictionary.
|
||||
|
||||
Input must be lower-case.
|
||||
|
||||
Returns a vector of all possible pronunciations."
|
||||
[word]
|
||||
(let [cmu-phones (mapv cmu-word-to-stressed-phones-map (word-alternatives word))]
|
||||
(if (seq cmu-phones)
|
||||
cmu-phones
|
||||
[(cmu-lexicon->cmu-pronouncing-dict
|
||||
(.getPhones cmu-lexicon word nil))])))
|
||||
|
||||
(defn get-word
|
||||
[phones]
|
||||
(let [stressed? (some #(re-matches #".*\d" %) phones)]
|
||||
(if stressed?
|
||||
(stressed-phones-to-cmu-word-map phones)
|
||||
(unstressed-phones-to-cmu-word-map phones))))
|
@ -0,0 +1,135 @@
|
||||
(ns com.owoga.phonetics.syllabify
|
||||
(:require [com.owoga.phonetics :as phonetics]
|
||||
[com.owoga.phonetics.util :as util]
|
||||
[clojure.string :as string]))
|
||||
|
||||
(set! *warn-on-reflection* true)
|
||||
|
||||
;; This sonority hierarchy may not be perfect.
|
||||
;; It stems from: http://www.glottopedia.org/index.php/Sonority_hierarchy
|
||||
;; I tried to match the phones provided by the CMU dict to the hierarchies
|
||||
;; listed on that page:
|
||||
;; vowels > liquids > nasals > voiced fricatives
|
||||
;; > voiceless fricatives = voiced plosives
|
||||
;; > voiceless plosives (Anderson & Ewen 1987)
|
||||
(def ^clojure.lang.PersistentVector sonority-hierarchy
|
||||
;; more sonorous < < < vowel < < < (maximal onset) vowel > > > less sonorous
|
||||
["vowel" "liquid" "semivowel" "aspirate" "affricate" "nasal" "fricative" "stop"])
|
||||
|
||||
(def lax-vowels #{"EH" "IH" "AE" "AH" "UH"})
|
||||
|
||||
(defn sonority [phone]
|
||||
(.indexOf sonority-hierarchy (phonetics/phonemap phone)))
|
||||
|
||||
(defn vowel? [phone]
|
||||
(phonetics/vowel phone))
|
||||
|
||||
(def consonant? (complement vowel?))
|
||||
|
||||
(defn >sonorous [a b]
|
||||
(< (sonority a) (sonority b)))
|
||||
|
||||
(defn <sonorous [a b]
|
||||
(> (sonority a) (sonority b)))
|
||||
|
||||
(defn slurp-rime
|
||||
"Returns a vector of the rime and the remaining phones to process."
|
||||
[phones]
|
||||
(let [splits (util/take-through vowel? phones)]
|
||||
[(vec (reverse (first splits))) (vec (flatten (rest splits)))]))
|
||||
|
||||
(comment
|
||||
(slurp-rime ["AH" "K" "S" "AE" "L" "AH"])
|
||||
;; => [["AH"] ["K" "S" "AE" "L" "AH"]]
|
||||
(slurp-rime ["K" "S" "AE" "L" "AH"])
|
||||
;; => [["AE" "S" "K"] ["L" "AH"]]
|
||||
)
|
||||
|
||||
(defn slurp-onset-given-rime
|
||||
"Phones and rime are vectors of phones.
|
||||
Phones is backwards since we process naturally that way
|
||||
due to the maximal onset principle. Rime is forwards since
|
||||
it's the end-result of how we're reading the word.
|
||||
|
||||
Returns a vector of the syllable and the remaining phones to process."
|
||||
[phones rime]
|
||||
(loop [phones phones
|
||||
syllable rime]
|
||||
(cond
|
||||
(empty? phones) [syllable phones]
|
||||
|
||||
;; Two vowels next to each other is treated as two syllables.
|
||||
;; This might not always be the case if the vowels are lax.
|
||||
;; Is "royal" 1 syllable or two? This treats it as two.
|
||||
(vowel? (nth phones 0))
|
||||
[syllable phones]
|
||||
|
||||
;; Maximal onset principle with exception for lax vowels occurring in
|
||||
;; closed syllables.
|
||||
(and (consonant? (nth syllable 0))
|
||||
(<sonorous (nth phones 0) (nth syllable 0))
|
||||
(not (lax-vowels (nth phones 1 nil))))
|
||||
(recur (subvec phones 1)
|
||||
(into [(nth phones 0)] syllable))
|
||||
|
||||
(vowel? (nth syllable 0))
|
||||
(recur (subvec phones 1)
|
||||
(into [(nth phones 0)] syllable))
|
||||
|
||||
:else [syllable phones])))
|
||||
|
||||
(comment
|
||||
(slurp-onset-given-rime
|
||||
["K" "S" "AE" "L" "A"]
|
||||
["AH"])
|
||||
|
||||
)
|
||||
(defn apply-stress [unstressed-syllables stressed-phones]
|
||||
(loop [unstressed-syllables unstressed-syllables
|
||||
stressed-phones stressed-phones
|
||||
result-syllables [[]]]
|
||||
(cond
|
||||
(empty? stressed-phones)
|
||||
result-syllables
|
||||
|
||||
(empty? (first unstressed-syllables))
|
||||
(recur (rest unstressed-syllables)
|
||||
stressed-phones
|
||||
(conj result-syllables []))
|
||||
|
||||
:else
|
||||
(recur
|
||||
(cons (rest (first unstressed-syllables))
|
||||
(rest unstressed-syllables))
|
||||
(rest stressed-phones)
|
||||
(conj (pop result-syllables) (conj (peek result-syllables) (first stressed-phones)))))))
|
||||
|
||||
(comment
|
||||
(apply-stress '(("AH") ("L" "AE" "S") ("K" "AH"))
|
||||
'("AH0" "L" "AE1" "S" "K" "AH0"))
|
||||
;; => [["AH0"] ["L" "AE1" "S"] ["K" "AH0"]]
|
||||
|
||||
)
|
||||
|
||||
(defn syllabify [original-phones]
|
||||
;; It's easier to work backwards.
|
||||
;; The final syllable will always be
|
||||
;; all of the last (if any) consonants preceded by
|
||||
;; (or folllowed-by considering we're working
|
||||
;; backwards through the phones) a vowel.
|
||||
;; So, reverse the phones as a first step.
|
||||
(let [phones (phonetics/remove-stress (reverse original-phones))]
|
||||
(loop [phones phones
|
||||
segments []]
|
||||
(if (empty? phones)
|
||||
(apply-stress segments original-phones)
|
||||
(let [[rime phones'] (slurp-rime phones)
|
||||
[syllable phones''] (slurp-onset-given-rime phones' rime)]
|
||||
(recur phones'' (into [syllable] segments)))))))
|
||||
|
||||
(comment
|
||||
(phonetics/remove-stress ["AH" "L" "AE" "S" "K" "AH"])
|
||||
(slurp-onset-given-rime ["L" "AE" "S" "K" "AH"] ["AH"])
|
||||
(syllabify ["AH0" "L" "AE1" "S" "K" "AH0"])
|
||||
|
||||
)
|
@ -0,0 +1,19 @@
|
||||
(ns com.owoga.phonetics.util)
|
||||
|
||||
(defn take-through
|
||||
"(take-through even? [1 2 3 4 7 7 5 2 8 10])
|
||||
returns '((1 2 3 4) (7 7 5 2) (8) (10))"
|
||||
[pred coll]
|
||||
(loop [coll coll
|
||||
acc '()]
|
||||
(cond
|
||||
(empty? coll)
|
||||
(if (empty? acc) acc (list (reverse acc)))
|
||||
|
||||
(pred (first coll))
|
||||
(let [acc (cons (first coll) acc)]
|
||||
(lazy-seq (cons (reverse acc) (take-through pred (rest coll)))))
|
||||
|
||||
:else
|
||||
(recur (rest coll)
|
||||
(cons (first coll) acc)))))
|
@ -0,0 +1,41 @@
|
||||
(ns com.owoga.phonetics.syllabify-test
|
||||
(:require [clojure.test :refer :all]
|
||||
[com.owoga.phonetics.syllabify :refer :all]))
|
||||
|
||||
(deftest syllabification-test
|
||||
(testing "alaska"
|
||||
(is (= '(("AH") ("L" "AE" "S") ("K" "AH"))
|
||||
(syllabify '("AH" "L" "AE" "S" "K" "AH")))))
|
||||
(testing "parentheses"
|
||||
(is (= '(("P" "ER") ("IH" "N") ("TH" "UH") ("S" "IY" "S"))
|
||||
(syllabify '("P" "ER" "IH" "N" "TH" "UH" "S" "IY" "S")))))
|
||||
(testing "herald"
|
||||
(is (= '(("H" "ER") ("AH" "L" "D"))
|
||||
(syllabify '("H" "ER" "AH" "L" "D")))))
|
||||
(testing "royal with cheese"
|
||||
(is (= '(("R" "OY") ("AH" "L") ("W" "IH" "TH") ("CH" "IY" "Z"))
|
||||
(syllabify ["R" "OY" "AH" "L" "W" "IH" "TH" "CH" "IY" "Z"]))))
|
||||
(testing "uprising"
|
||||
(is (= '(("UH" "P") ("R" "AY") ("S" "IY" "NG"))
|
||||
(syllabify ["UH" "P" "R" "AY" "S" "IY" "NG"]))))
|
||||
(testing "glimpstred"
|
||||
(is (= '(("G" "L" "IH" "M" "P" "S") ("T" "R" "EH" "D"))
|
||||
(syllabify ["G" "L" "IH" "M" "P" "S" "T" "R" "EH" "D"]))))
|
||||
(testing "boink"
|
||||
(is (= '(("B" "OY" "N" "K"))
|
||||
(syllabify ["B" "OY" "N" "K"]))))
|
||||
;; Lax vowels can only occur in closed syllables.
|
||||
(testing "elipsis"
|
||||
(is (= '(("IY") ("L" "IH" "P") ("S" "IH" "S"))
|
||||
(syllabify ["IY" "L" "IH" "P" "S" "IH" "S"]))))
|
||||
;; http://www.glottopedia.org/index.php/Maximal_Onset_Principle
|
||||
(testing "maximal onset principle"
|
||||
(testing "diploma"
|
||||
(is (= '(("D" "IH" "P") ("L" "OW") ("M" "AH"))
|
||||
(syllabify ["D" "IH" "P" "L" "OW" "M" "AH"])))))
|
||||
;; http://www.glottopedia.org/index.php/Ambisyllabic
|
||||
;; Since we are syllabifying phones, we don't need to worry
|
||||
;; about handling ambisyllabic words. There's no such thing.
|
||||
(testing "pillow"
|
||||
(is (= '(("P" "IH") ("L" "OW"))
|
||||
(syllabify ["P" "IH" "L" "OW"])))))
|
@ -0,0 +1,16 @@
|
||||
(ns com.owoga.phonetics-test
|
||||
(:require [clojure.test :refer :all]
|
||||
[com.owoga.phonetics :refer :all]))
|
||||
|
||||
(deftest phonetics-test
|
||||
(testing "word to phones"
|
||||
(is (= [["HH" "AH0" "L" "OW1"]
|
||||
["HH" "EH0" "L" "OW1"]]
|
||||
(get-phones "hello"))))
|
||||
(testing "phones to word"
|
||||
(is (= ["hello(1)"]
|
||||
(get-word ["HH" "EH0" "L" "OW1"])))
|
||||
(is (= ["hello(1)"]
|
||||
(get-word ["HH" "EH" "L" "OW"])))
|
||||
(is (= ["ensure(1)" "insure"]
|
||||
(get-word ["IH" "N" "SH" "UH" "R"])))))
|
Loading…
Reference in New Issue