diff --git a/src/com/owoga/corpus/markov.clj b/src/com/owoga/corpus/markov.clj index ed193b8..b8452fb 100644 --- a/src/com/owoga/corpus/markov.clj +++ b/src/com/owoga/corpus/markov.clj @@ -1032,47 +1032,51 @@ banned-words (into #{} (->> existing-lines (map (comp last last)))) - seed (if existing-lines - (->> existing-lines - rand-nth - reverse - (map first) - (apply concat) - (#(rhymes - rhyme-trie - % - (fn [choices] - (->> choices - (map (fn [[phones wordset]] - [phones - (set/difference - wordset - banned-words)])) - (remove (comp empty? second)))))) - rand-nth - ((fn [[phones wordset]] - (let [word (rand-nth (vec wordset))] - [(rand-nth (phonetics/get-phones word)) - word]))) - vector) - (->> (get-next-markov - markov-trie - [eos eos eos] - (fn [children] - (remove - #(#{eos bos} (.key %)) children))) - database - (#(vector (rand-nth (phonetics/get-phones %)) %)) - vector)) - line (take-until - (best-of-20) - #(tightly-generate-n-syllable-sentence-v2 - database - markov-trie - 4 - syllable-count - (make-markov-filter [eos bos]) - seed))] + + line + (take-until + (best-of-20) + (fn [] + (let [seed (if existing-lines + (->> existing-lines + rand-nth + reverse + (map first) + (apply concat) + (#(rhymes + rhyme-trie + % + (fn [choices] + (->> choices + (map (fn [[phones wordset]] + [phones + (set/difference + wordset + banned-words)])) + (remove (comp empty? second)))))) + rand-nth + ((fn [[phones wordset]] + (let [word (rand-nth (vec wordset))] + [(rand-nth (phonetics/get-phones word)) + word]))) + vector) + (->> (get-next-markov + markov-trie + [eos eos eos] + (fn [children] + (remove + #(#{eos bos} (.key %)) children))) + database + (#(vector (rand-nth (phonetics/get-phones %)) %)) + vector)) + line (tightly-generate-n-syllable-sentence-v2 + database + markov-trie + 4 + syllable-count + (make-markov-filter [eos bos]) + seed)] + line)))] (recur (rest scheme) (update result (first scheme) (fnil conj []) line))))))) @@ -1081,6 +1085,19 @@ (rhyme-from-scheme-v2 scheme database markov-tight-trie rhymetrie)) + (phonetics/get-phones "unleashed") + (rhymes + rhymetrie + ["IY" "SH" "T"] + (fn [choices] + (->> choices + (map (fn [[phones wordset]] + [phones + (set/difference + wordset + #{"unleashed"})])) + (remove (comp empty? second))))) + ) (comment diff --git a/src/com/owoga/prhyme/nlp/core.clj b/src/com/owoga/prhyme/nlp/core.clj index 7517d1f..a28fd53 100644 --- a/src/com/owoga/prhyme/nlp/core.clj +++ b/src/com/owoga/prhyme/nlp/core.clj @@ -167,6 +167,12 @@ (#(parse-top-n % 100))) ) +(comment + (likely-sentence? + "have s y como tu cama") + + ) + (defn valid-sentence? "Tokenizes and parses the phrase using OpenNLP models from http://opennlp.sourceforge.net/models-1.5/