From 731ac8bd03ab73f20b9a91c8fed1659d396e7202 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Wed, 21 Oct 2020 14:05:17 -0700 Subject: [PATCH] Add sentence validity detection --- deps.edn | 1 + src/com/owoga/prhyme/util/lovecraft.clj | 40 ++++++++++++++++++++++--- src/com/owoga/prhyme/util/nlp.clj | 19 ++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 src/com/owoga/prhyme/util/nlp.clj diff --git a/deps.edn b/deps.edn index d56a6f1..73b6430 100644 --- a/deps.edn +++ b/deps.edn @@ -5,6 +5,7 @@ org.clojure/core.async {:mvn/version "1.2.603"} inflections {:mvn/version "0.13.2"} com.taoensso/tufte {:mvn/version "2.2.0"} + clojure-opennlp {:mvn/version "0.5.0"} enlive {:mvn/version "1.1.6"} com.taoensso/timbre {:mvn/version "4.10.0"}} :aliases {:dev {:extra-paths ["test"] diff --git a/src/com/owoga/prhyme/util/lovecraft.clj b/src/com/owoga/prhyme/util/lovecraft.clj index 2126d2a..c92636f 100644 --- a/src/com/owoga/prhyme/util/lovecraft.clj +++ b/src/com/owoga/prhyme/util/lovecraft.clj @@ -3,6 +3,7 @@ [clojure.string :as string] [com.owoga.prhyme.util.weighted-rand :as wr] [com.owoga.prhyme.core :as prhyme] + [com.owoga.prhyme.util.nlp :as nlp] [taoensso.tufte :as tufte :refer [defnp p profiled profile]] [com.owoga.prhyme.frp :as frp] [clojure.java.io :as io] @@ -437,7 +438,7 @@ weight-non-markovs (apply + (map :weight non-markovs)) target-weight-markovs (* ratio weight-non-markovs) count-markovs (count markovs) - adjustment-markovs (/ target-weight-markovs count-markovs)] + adjustment-markovs (if (= 0 count-markovs) 1 (/ target-weight-markovs count-markovs))] (concat (map (fn [markov] @@ -460,11 +461,11 @@ (> sentinel 5)) result (let [markov-options (markov (list (:norm-word (first result)))) - markov-adjuster (adjust-for-markov-1 markov-options 0.8) + markov-adjuster (adjust-for-markov-1 markov-options 0.9) syllable-count-adjuster (adjust-for-over-syllables target) - rhyme-adjuster (adjust-for-rhymes-1 target 0.8) + rhyme-adjuster (adjust-for-rhymes-1 target 0.9) lovecraft-set (into #{} (map (comp first first) lovecraft-markov)) - lovecraft-filter (adjust-for-membership-1 lovecraft-set 0.8) + lovecraft-filter (adjust-for-membership-1 lovecraft-set 0.9) adjust (comp rhyme-adjuster syllable-count-adjuster markov-adjuster @@ -502,7 +503,38 @@ (apply + (map :syllable-count result))))))) poem-lines)) +(defn rhymer [words markov target stop] + (cons (e-prhyme + words + markov + target + stop) + (lazy-seq (rhymer words markov target stop)))) + +(defn stop [target] + (fn [inner-target result] + (<= (count (:syllables target)) + (apply + (map :syllable-count result))))) + +(defn sentence-stop [target] + (fn [inner-target result] + (let [result-sentence (string/join " " (map :norm-word result))] + (when-not (empty? result) + (or (nlp/valid-sentence? result-sentence) + (< (:syllable-count target) + (apply + (map :syllable-count result))) + (< 5 (count result))))))) + (comment + (let [phrase (frp/phrase->word frp/words "i solemnly swear i am up to no good") + r (rhymer + frp/popular + lovecraft-markov + phrase + (sentence-stop phrase))] + (take 2 (map #(string/join " " (map :norm-word %)) + (filter #(nlp/valid-sentence? (string/join " " (map :norm-word %))) r)))) + (let [poem-lines ["mister sandman" "give me a dream" "make him the cutest" diff --git a/src/com/owoga/prhyme/util/nlp.clj b/src/com/owoga/prhyme/util/nlp.clj new file mode 100644 index 0000000..fe2ccd5 --- /dev/null +++ b/src/com/owoga/prhyme/util/nlp.clj @@ -0,0 +1,19 @@ +(ns com.owoga.prhyme.util.nlp + (:require [opennlp.nlp :as nlp] + [opennlp.treebank :as tb] + [clojure.string :as string])) + +(def tokenize (nlp/make-tokenizer "models/en-token.bin")) +(def get-sentences (nlp/make-sentence-detector "models/en-sent.bin")) +(def parse (tb/make-treebank-parser "en-parser-chunking.bin")) + +(defn valid-sentence? [phrase] + (->> phrase + tokenize + (string/join " ") + vector + parse + first + tb/make-tree + (#(= 'S (:tag (first (:chunk %))))))) +