From da6f63f470a8eccb1c68c970790e914325cdb223 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Wed, 28 Apr 2021 20:29:55 -0500 Subject: [PATCH] Closer to markov from grammar --- dev/examples/tpt.clj | 34 ++++++++++--- src/com/owoga/prhyme/data_transform.clj | 63 ++++++++++++++++++++----- src/com/owoga/prhyme/nlp/core.clj | 4 +- 3 files changed, 82 insertions(+), 19 deletions(-) diff --git a/dev/examples/tpt.clj b/dev/examples/tpt.clj index 1a2f9b1..77811f0 100644 --- a/dev/examples/tpt.clj +++ b/dev/examples/tpt.clj @@ -835,12 +835,34 @@ ) -(map - #(generate-n-syllable-sentence-rhyming-with - @context - (reverse (take 4 (phrase->flex-rhyme-phones "dawn of skynet"))) - 3 3 %) - [5 3]) +(comment + (println + (string/join + "\n" + (map + #(generate-n-syllable-sentence-rhyming-with + @context + (take 3 (phrase->flex-rhyme-phones "player unknown battlegrounds")) + 3 4 %) + [9 6 6 9 6 6 9 6 6])))) + +" +another day a battleground +contorts the fragile sound +that no one gives a damn about +what is the chaos all about +have we really been trampled down +cause they'll pay be blasted now +weeping to absent cow +die die lightning all around +calling for santa's now +hours of lifes battleground +just how much more could a man about +trample on and as without +i'm just like so so fragile how +killing and i'll be close damage wow +witness sky is blackened now +" (defn amul8 diff --git a/src/com/owoga/prhyme/data_transform.clj b/src/com/owoga/prhyme/data_transform.clj index b07bfdb..72f2654 100644 --- a/src/com/owoga/prhyme/data_transform.clj +++ b/src/com/owoga/prhyme/data_transform.clj @@ -206,7 +206,7 @@ (split-text-into-sentences) (map string/trim) (remove empty?) - (map nlp/treebank-zipper) + (mapv nlp/treebank-zipper) (remove nil?) (map nlp/parts-of-speech-trie-entries) (mapv (fn [file] @@ -216,9 +216,12 @@ (reduce into []) (mapv normalize-text) (mapv (fn [[k v]] - (clojure.lang.MapEntry. (into k [v]) v))))) + (clojure.lang.MapEntry. (into (vec k) [v]) v))))) (comment + ;; TODO: MOST-RECENT-STOPPING-POINT + ;; TODO: Pick BACK UP HERE and clean up the code in the future + ;; so you know where you're working. (map process-text texts) (def test-database (atom {:next-id 1})) @@ -272,22 +275,58 @@ texts)) (->> test-trie - (take 20) + (take 2000) (map (fn [[k v]] - [(map @test-database k) + [k + (map @test-database k) (last v)]))) (->> (take 100 test-trie)) - - (@test-database 16) - - (update - (conj (assoc (trie/make-trie) '[top s [np vp .]] '[np]) - '[[top s [s]] [s]]) + (let [start 'TOP + start-id (@test-database start)] + (->> (map + #(get % []) + (trie/children (trie/lookup test-trie [start-id]))) + (remove nil?) + (map (fn [[k v]] + [k (map @test-database k) v]))) + #_(trie/children (trie/lookup test-trie [start-id]))) + + (defn lookup [syms] + (->> (map @test-database syms) + (trie/lookup test-trie) + ((fn [node] + (if node (trie/children node) '()))) + (map + #(get % [])) + (remove nil?) + (sort-by (comp - second)) + (map + (fn [[k v]] + [k (map @test-database k) v])))) + + (lookup [(symbol ":")]) + + (->> (map #(get % []) + (trie/children (trie/lookup test-trie [7 8 10 22]))) + (remove nil?) + (sort-by (comp - second)) + (map + (fn [[k v]] + [k (map @test-database k) v]))) + + (@test-database (symbol "NN")) + (@test-database (symbol ":")) + + (trie/lookup test-trie [7 8 3163]) + (let [start '[TOP [S]] + start-id (map @test-database start)] + (->> (trie/children (trie/lookup test-trie start-id)) + #_(remove nil?) + #_(map (fn [[k v]] + [(map @test-database k) v])))) - '[[top s]] (fnil #(update % 1 inc) [:freq 0])) - (update {['top] 1} ['top] inc) ) (comment diff --git a/src/com/owoga/prhyme/nlp/core.clj b/src/com/owoga/prhyme/nlp/core.clj index a40a080..4565fe4 100644 --- a/src/com/owoga/prhyme/nlp/core.clj +++ b/src/com/owoga/prhyme/nlp/core.clj @@ -158,7 +158,9 @@ (cond (string? node) node (map? node) (list (:tag node) (unmake-tree (:chunk node))) - :else (map unmake-tree node))) + ;; This can fail and throw. It's easier to debug + ;; if this failure is caught and handled early. + :else (doall (map unmake-tree node)))) (comment (let [text-lines ["This is a sample test."