WIP
parent
1cb959010c
commit
f0ea2bc513
@ -0,0 +1,44 @@
|
|||||||
|
(ns com.owoga.prhyme.data.dictionary
|
||||||
|
(:require [clojure.string :as string]
|
||||||
|
[clojure.java.io :as io]
|
||||||
|
[clojure.set]
|
||||||
|
[com.owoga.prhyme.core :as prhyme]))
|
||||||
|
|
||||||
|
(def cmu-dict
|
||||||
|
(->> (io/reader (io/resource "cmudict_SPHINX_40"))
|
||||||
|
(line-seq)
|
||||||
|
(map #(string/split % #"[\t ]"))))
|
||||||
|
|
||||||
|
(def prhyme-dict
|
||||||
|
(into [] (map prhyme/cmu->prhyme cmu-dict)))
|
||||||
|
|
||||||
|
(def popular
|
||||||
|
(set (line-seq (io/reader (io/resource "popular.txt")))))
|
||||||
|
|
||||||
|
(def adverbs
|
||||||
|
(clojure.set/intersection
|
||||||
|
popular
|
||||||
|
(set (line-seq (io/reader (io/resource "adverbs.txt"))))))
|
||||||
|
|
||||||
|
(def adjectives
|
||||||
|
(clojure.set/intersection
|
||||||
|
popular
|
||||||
|
(set (line-seq (io/reader (io/resource "adjectives.txt"))))))
|
||||||
|
|
||||||
|
(def verbs
|
||||||
|
(clojure.set/intersection
|
||||||
|
popular
|
||||||
|
(set (line-seq (io/reader (io/resource "verbs.txt"))))))
|
||||||
|
|
||||||
|
(def nouns
|
||||||
|
(clojure.set/intersection
|
||||||
|
popular
|
||||||
|
(set (line-seq (io/reader (io/resource "nouns.txt"))))))
|
||||||
|
|
||||||
|
(defn english? [text]
|
||||||
|
(let [words (string/split text #"\s+")
|
||||||
|
english-words
|
||||||
|
(->> words
|
||||||
|
(filter #((into #{} (map :normalized-word prhyme-dict))
|
||||||
|
(string/lower-case %))))]
|
||||||
|
(< 0.7 (/ (count english-words) (max 1 (count words))))))
|
@ -0,0 +1,22 @@
|
|||||||
|
(ns com.owoga.prhyme.data.phonetics
|
||||||
|
(:require [clojure.string :as string]
|
||||||
|
[clojure.set]
|
||||||
|
[clojure.java.io :as io]))
|
||||||
|
|
||||||
|
(def phonemap
|
||||||
|
(->> (io/reader (io/resource "cmudict-0.7b.phones"))
|
||||||
|
(line-seq)
|
||||||
|
(map #(string/split % #"\t"))
|
||||||
|
(into {})))
|
||||||
|
|
||||||
|
(def long-vowel #{"EY" "IY" "AY" "OW" "UW"})
|
||||||
|
|
||||||
|
(def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"})
|
||||||
|
|
||||||
|
(def vowel (clojure.set/union long-vowel short-vowel))
|
||||||
|
|
||||||
|
(def consonant (clojure.set/difference (into #{} (keys phonemap)) vowel))
|
||||||
|
|
||||||
|
(def syllable-end (clojure.set/union consonant long-vowel))
|
||||||
|
|
||||||
|
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})
|
@ -0,0 +1,9 @@
|
|||||||
|
(ns com.owoga.prhyme.data.thesaurus
|
||||||
|
(:require [clojure.string :as string]
|
||||||
|
[clojure.java.io :as io]))
|
||||||
|
|
||||||
|
(def thesaurus
|
||||||
|
(->> (line-seq (io/reader (io/resource "mthesaur.txt")))
|
||||||
|
(map #(string/split % #","))
|
||||||
|
(map #(vector (first %) (rest %)))
|
||||||
|
(into {})))
|
Loading…
Reference in New Issue