Eric Ihli 4 years ago
parent 1cb959010c
commit f0ea2bc513

@ -0,0 +1,44 @@
(ns com.owoga.prhyme.data.dictionary
(:require [clojure.string :as string]
[clojure.java.io :as io]
[clojure.set]
[com.owoga.prhyme.core :as prhyme]))
(def cmu-dict
(->> (io/reader (io/resource "cmudict_SPHINX_40"))
(line-seq)
(map #(string/split % #"[\t ]"))))
(def prhyme-dict
(into [] (map prhyme/cmu->prhyme cmu-dict)))
(def popular
(set (line-seq (io/reader (io/resource "popular.txt")))))
(def adverbs
(clojure.set/intersection
popular
(set (line-seq (io/reader (io/resource "adverbs.txt"))))))
(def adjectives
(clojure.set/intersection
popular
(set (line-seq (io/reader (io/resource "adjectives.txt"))))))
(def verbs
(clojure.set/intersection
popular
(set (line-seq (io/reader (io/resource "verbs.txt"))))))
(def nouns
(clojure.set/intersection
popular
(set (line-seq (io/reader (io/resource "nouns.txt"))))))
(defn english? [text]
(let [words (string/split text #"\s+")
english-words
(->> words
(filter #((into #{} (map :normalized-word prhyme-dict))
(string/lower-case %))))]
(< 0.7 (/ (count english-words) (max 1 (count words))))))

@ -0,0 +1,22 @@
(ns com.owoga.prhyme.data.phonetics
(:require [clojure.string :as string]
[clojure.set]
[clojure.java.io :as io]))
(def phonemap
(->> (io/reader (io/resource "cmudict-0.7b.phones"))
(line-seq)
(map #(string/split % #"\t"))
(into {})))
(def long-vowel #{"EY" "IY" "AY" "OW" "UW"})
(def short-vowel #{"AA" "AE" "AH" "AO" "AW" "EH" "ER" "IH" "OY" "UH"})
(def vowel (clojure.set/union long-vowel short-vowel))
(def consonant (clojure.set/difference (into #{} (keys phonemap)) vowel))
(def syllable-end (clojure.set/union consonant long-vowel))
(def single-sound-bigram #{"TH" "SH" "PH" "WH" "CH"})

@ -0,0 +1,9 @@
(ns com.owoga.prhyme.data.thesaurus
(:require [clojure.string :as string]
[clojure.java.io :as io]))
(def thesaurus
(->> (line-seq (io/reader (io/resource "mthesaur.txt")))
(map #(string/split % #","))
(map #(vector (first %) (rest %)))
(into {})))
Loading…
Cancel
Save