Remove unused file

4 years ago · 01280ff29c
parent 4433ca6ca5
commit 01280ff29c
1 changed files with 0 additions and 279 deletions
--- a/syllabify.py
+++ b/syllabify.py
@ -1,279 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2012-2013 Kyle Gorman <gormanky@ohsu.edu>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be included
-# in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-# syllabify.py: prosodic parsing of ARPABET entries
-
-from itertools import chain
-
-# constants
-SLAX = {
-    "IH1",
-    "IH2",
-    "EH1",
-    "EH2",
-    "AE1",
-    "AE2",
-    "AH1",
-    "AH2",
-    "UH1",
-    "UH2",
-}
-VOWELS = {
-    "IY1",
-    "IY2",
-    "IY0",
-    "EY1",
-    "EY2",
-    "EY0",
-    "AA1",
-    "AA2",
-    "AA0",
-    "ER1",
-    "ER2",
-    "ER0",
-    "AW1",
-    "AW2",
-    "AW0",
-    "AO1",
-    "AO2",
-    "AO0",
-    "AY1",
-    "AY2",
-    "AY0",
-    "OW1",
-    "OW2",
-    "OW0",
-    "OY1",
-    "OY2",
-    "OY0",
-    "IH0",
-    "EH0",
-    "AE0",
-    "AH0",
-    "UH0",
-    "UW1",
-    "UW2",
-    "UW0",
-    "UW",
-    "IY",
-    "EY",
-    "AA",
-    "ER",
-    "AW",
-    "AO",
-    "AY",
-    "OW",
-    "OY",
-    "UH",
-    "IH",
-    "EH",
-    "AE",
-    "AH",
-    "UH",
-} | SLAX
-
-# licit medial onsets
-
-O2 = {
-    ("P", "R"),
-    ("T", "R"),
-    ("K", "R"),
-    ("B", "R"),
-    ("D", "R"),
-    ("G", "R"),
-    ("F", "R"),
-    ("TH", "R"),
-    ("P", "L"),
-    ("K", "L"),
-    ("B", "L"),
-    ("G", "L"),
-    ("F", "L"),
-    ("S", "L"),
-    ("K", "W"),
-    ("G", "W"),
-    ("S", "W"),
-    ("S", "P"),
-    ("S", "T"),
-    ("S", "K"),
-    ("HH", "Y"),  # "clerihew"
-    ("R", "W"),
-}
-O3 = {("S", "T", "R"), ("S", "K", "L"), ("T", "R", "W")}  # "octroi"
-
-# This does not represent anything like a complete list of onsets, but
-# merely those that need to be maximized in medial position.
-
-
-def syllabify(pron, alaska_rule=True):
-    """
-    Syllabifies a CMU dictionary (ARPABET) word string
-
-    # Alaska rule:
-    >>> pprint(syllabify('AH0 L AE1 S K AH0'.split())) # Alaska
-    '-AH0-.L-AE1-S.K-AH0-'
-    >>> pprint(syllabify('AH0 L AE1 S K AH0'.split(), 0)) # Alaska
-    '-AH0-.L-AE1-.S K-AH0-'
-
-    # huge medial onsets:
-    >>> pprint(syllabify('M IH1 N S T R AH0 L'.split())) # minstrel
-    'M-IH1-N.S T R-AH0-L'
-    >>> pprint(syllabify('AA1  K T R W AA0 R'.split())) # octroi
-    '-AA1-K.T R W-AA0-R'
-
-    # destressing
-    >>> pprint(destress(syllabify('M IH1 L AH0 T EH2 R IY0'.split())))
-    'M-IH-.L-AH-.T-EH-.R-IY-'
-
-    # normal treatment of 'j':
-    >>> pprint(syllabify('M EH1 N Y UW0'.split())) # menu
-    'M-EH1-N.Y-UW0-'
-    >>> pprint(syllabify('S P AE1 N Y AH0 L'.split())) # spaniel
-    'S P-AE1-N.Y-AH0-L'
-    >>> pprint(syllabify('K AE1 N Y AH0 N'.split())) # canyon
-    'K-AE1-N.Y-AH0-N'
-    >>> pprint(syllabify('M IH0 N Y UW2 EH1 T'.split())) # minuet
-    'M-IH0-N.Y-UW2-.-EH1-T'
-    >>> pprint(syllabify('JH UW1 N Y ER0'.split())) # junior
-    'JH-UW1-N.Y-ER0-'
-    >>> pprint(syllabify('K L EH R IH HH Y UW'.split())) # clerihew
-    'K L-EH-.R-IH-.HH Y-UW-'
-
-    # nuclear treatment of 'j'
-    >>> pprint(syllabify('R EH1 S K Y UW0'.split())) # rescue
-    'R-EH1-S.K-Y UW0-'
-    >>> pprint(syllabify('T R IH1 B Y UW0 T'.split())) # tribute
-    'T R-IH1-B.Y-UW0-T'
-    >>> pprint(syllabify('N EH1 B Y AH0 L AH0'.split())) # nebula
-    'N-EH1-B.Y-AH0-.L-AH0-'
-    >>> pprint(syllabify('S P AE1 CH UH0 L AH0'.split())) # spatula
-    'S P-AE1-.CH-UH0-.L-AH0-'
-    >>> pprint(syllabify('AH0 K Y UW1 M AH0 N'.split())) # acumen
-    '-AH0-K.Y-UW1-.M-AH0-N'
-    >>> pprint(syllabify('S AH1 K Y AH0 L IH0 N T'.split())) # succulent
-    'S-AH1-K.Y-AH0-.L-IH0-N T'
-    >>> pprint(syllabify('F AO1 R M Y AH0 L AH0'.split())) # formula
-    'F-AO1 R-M.Y-AH0-.L-AH0-'
-    >>> pprint(syllabify('V AE1 L Y UW0'.split())) # value
-    'V-AE1-L.Y-UW0-'
-
-    # everything else
-    >>> pprint(syllabify('N AO0 S T AE1 L JH IH0 K'.split())) # nostalgic
-    'N-AO0-.S T-AE1-L.JH-IH0-K'
-    >>> pprint(syllabify('CH ER1 CH M AH0 N'.split())) # churchmen
-    'CH-ER1-CH.M-AH0-N'
-    >>> pprint(syllabify('K AA1 M P AH0 N S EY2 T'.split())) # compensate
-    'K-AA1-M.P-AH0-N.S-EY2-T'
-    >>> pprint(syllabify('IH0 N S EH1 N S'.split())) # inCENSE
-    '-IH0-N.S-EH1-N S'
-    >>> pprint(syllabify('IH1 N S EH2 N S'.split())) # INcense
-    '-IH1-N.S-EH2-N S'
-    >>> pprint(syllabify('AH0 S EH1 N D'.split())) # ascend
-    '-AH0-.S-EH1-N D'
-    >>> pprint(syllabify('R OW1 T EY2 T'.split())) # rotate
-    'R-OW1-.T-EY2-T'
-    >>> pprint(syllabify('AA1 R T AH0 S T'.split())) # artist
-    '-AA1 R-.T-AH0-S T'
-    >>> pprint(syllabify('AE1 K T ER0'.split())) # actor
-    '-AE1-K.T-ER0-'
-    >>> pprint(syllabify('P L AE1 S T ER0'.split())) # plaster
-    'P L-AE1-S.T-ER0-'
-    >>> pprint(syllabify('B AH1 T ER0'.split())) # butter
-    'B-AH1-.T-ER0-'
-    >>> pprint(syllabify('K AE1 M AH0 L'.split())) # camel
-    'K-AE1-.M-AH0-L'
-    >>> pprint(syllabify('AH1 P ER0'.split())) # upper
-    '-AH1-.P-ER0-'
-    >>> pprint(syllabify('B AH0 L UW1 N'.split())) # balloon
-    'B-AH0-.L-UW1-N'
-    >>> pprint(syllabify('P R OW0 K L EY1 M'.split())) # proclaim
-    'P R-OW0-.K L-EY1-M'
-    >>> pprint(syllabify('IH0 N S EY1 N'.split())) # insane
-    '-IH0-N.S-EY1-N'
-    >>> pprint(syllabify('IH0 K S K L UW1 D'.split())) # exclude
-    '-IH0-K.S K L-UW1-D'
-    """
-    ## main pass
-    mypron = list(pron)
-    nuclei = []
-    onsets = []
-    i = -1
-    for (j, seg) in enumerate(mypron):
-        if seg in VOWELS:
-            nuclei.append([seg])
-            onsets.append(mypron[i + 1 : j])  # actually interludes, r.n.
-            i = j
-    codas = [mypron[i + 1 :]]
-    ## resolve disputes and compute coda
-    for i in range(1, len(onsets)):
-        coda = []
-        # boundary cases
-        if len(onsets[i]) > 1 and onsets[i][0] == "R":
-            nuclei[i - 1].append(onsets[i].pop(0))
-        if len(onsets[i]) > 2 and onsets[i][-1] == "Y":
-            nuclei[i].insert(0, onsets[i].pop())
-        if (
-            len(onsets[i]) > 1
-            and alaska_rule
-            and nuclei[i - 1][-1] in SLAX
-            and onsets[i][0] == "S"
-        ):
-            coda.append(onsets[i].pop(0))
-        # onset maximization
-        depth = 1
-        if len(onsets[i]) > 1:
-            if tuple(onsets[i][-2:]) in O2:
-                depth = 3 if tuple(onsets[i][-3:]) in O3 else 2
-        for j in range(len(onsets[i]) - depth):
-            coda.append(onsets[i].pop(0))
-        # store coda
-        codas.insert(i - 1, coda)
-
-    ## verify that all segments are included in the ouput
-    output = list(zip(onsets, nuclei, codas))  # in Python3 zip is a generator
-    flat_output = list(chain.from_iterable(chain.from_iterable(output)))
-    if flat_output != mypron:
-        raise ValueError(f"could not syllabify {mypron}, got {flat_output}")
-    return output
-
-
-def pprint(syllab):
-    """
-    Pretty-print a syllabification
-    """
-    return ".".join("-".join(" ".join(p) for p in syl) for syl in syllab)
-
-
-def destress(syllab):
-    """
-    Generate a syllabification with nuclear stress information removed
-    """
-    syls = []
-    for (onset, nucleus, coda) in syllab:
-        nuke = [p[:-1] if p[-1] in {"0", "1", "2"} else p for p in nucleus]
-        syls.append((onset, nuke, coda))
-    return syls
-
-
-if __name__ == "__main__":
-    import doctest
-
-    doctest.testmod()