diff --git a/src/com/owoga/prhyme/nlg/core.clj b/src/com/owoga/prhyme/nlg/core.clj index 1fe022d..c57c045 100644 --- a/src/com/owoga/prhyme/nlg/core.clj +++ b/src/com/owoga/prhyme/nlg/core.clj @@ -1,4 +1,7 @@ (ns com.owoga.prhyme.nlg.core + (:require [clojure.walk :as walk] + [clojure.zip :as zip] + [com.owoga.prhyme.nlp.core :as nlp]) (:import (simplenlg.framework NLGFactory) (simplenlg.lexicon Lexicon) (simplenlg.realiser.english Realiser) @@ -27,6 +30,21 @@ clause (.createSentence nlg-factory [NP VP1])] (.realiseSentence realiser clause)) +(defn parse-tree-sans-leaf-words + "Takes a 'simple' parse tree (`parse-to-simple-tree`) + Removes the leaf words from the tree. + + Useful if you want to work with the structure of something + without caring about the actual words." + [tree] + (walk/postwalk + (fn [node] + (if (and (seq? node) + (string? (second node))) + (take 1 node) + node)) + tree)) + (defn create-noun-phrase ([] (.createNounPhrase nlg-factory)) @@ -71,13 +89,162 @@ (ns-unmap *ns* 'create-element) (defmulti create-element - (fn [children] (doall (map first children)))) + (fn [tree] (parse-tree-sans-leaf-words tree))) + +(defmethod create-element '(TOP (NN)) + [tree] + (let [zipper (zip/seq-zip tree) + clause (.createClause nlg-factory)] + (-> zipper + zip/down + zip/right + zip/down + zip/right + ((fn [node] + (.setSubject + clause + (.createNounPhrase nlg-factory (zip/node node))) + node))) + clause)) + +(comment + (parse-tree-sans-leaf-words '(TOP (NN "FOO"))) + (.getRealisation (.realise realiser (create-element '(TOP (NN "foo"))))) + ) + +(defmethod create-element '(TOP (NP (NN) (NN))) + [tree] + (let [zipper (zip/seq-zip tree) + clause (.createClause nlg-factory)] + (-> zipper + zip/down + zip/right + zip/down + zip/right + zip/down + zip/right + ((fn [node] + (.setSubject + clause + (.createNounPhrase + nlg-factory + (zip/node node) + (-> node + zip/up + zip/right + zip/down + zip/right + zip/node))) + node))) + clause)) + +(comment + (let [tree '(TOP (NP (NN "sample") (NN "test")))] + (.realise realiser (create-element tree))) + ) + +(defmethod create-element '(TOP (NP (JJ) (NN))) + [tree] + (let [zipper (zip/seq-zip tree) + clause (.createClause nlg-factory)] + (-> zipper + zip/down + zip/right + zip/down + zip/right + zip/right + zip/down + zip/right + ((fn [node] + (let [subject (.createNounPhrase nlg-factory (zip/node node))] + (.setSubject clause subject) + (.addModifier + subject + (-> node + zip/up + zip/left + zip/down + zip/right + zip/node))) + node))) + clause)) + +(comment + (let [tree '(TOP (NP (JJ "fast") (NN "test")))] + (.realise realiser (create-element tree))) + ) + +(defn leaf-filter [tree] + (let [zipper (zip/seq-zip tree)] + (->> zipper + nlp/iter-zip + (filter (fn [z] + (let [node (zip/node z)] + (and (seq? node) + (< 1 (count node)) + (string? (second node))))))))) + +(comment + (leaf-filter '(TOP (NN "eric") (NN "test"))) + + ) +(defmethod create-element '(TOP (NP (NP (NN)) (PP (IN) (NP (NN))))) + [tree] + (let [zipper (zip/seq-zip tree) + nouns (->> tree + leaf-filter + (filter (fn [z] + (let [[pos word] (zip/node z)] + (= pos 'NN)))) + (map zip/node)) + prepositions (->> tree + leaf-filter + (filter (fn [z] + (let [[pos word] (zip/node z)] + (= pos 'IN)))) + (map zip/node)) + subject (.createNounPhrase nlg-factory (second (first nouns))) + object (.createNounPhrase nlg-factory (second (second nouns))) + prepositional-phrase (.createPrepositionPhrase nlg-factory) + preposition (second (first prepositions)) + clause (.createClause nlg-factory)] + (.addComplement prepositional-phrase object) + (.setPreposition prepositional-phrase preposition) + (.setSubject clause subject) + (.addComplement clause prepositional-phrase) + clause)) + +(comment + (.realise + realiser + (create-element '(TOP (NP (NP (NN "Eric")) (PP (IN "in") (NP (NN "Texas"))))))) + + ) (defmethod create-element '(PRP$) [[[_ child]]] (.createNounPhrase nlg-factory child)) -(defmethod create-element '(NN) - [[[_ child]]] (.createNounPhrase nlg-factory child)) +(defmethod create-element 'NN + [clause [pos child]] + (let [noun-phrase (.createNounPhrase nlg-factory child)] + (.setNoun))) + +(comment + (let [clause (.createClause nlg-factory) + tree '(NN "Eric")] + (create-element clause tree)) + ) + +(defmethod create-element 'TOP + [clause [pos children]] + (run! + #(map (create-element clause %)) + children)) + +(comment + (let [structure '(TOP (NN "Eric"))] + (create-element structure)) + ) (defmethod create-element '(NNP) [[[_ child]]] @@ -114,6 +281,8 @@ (-> (create-element (list prp$)) (set-feature Feature/POSSESSIVE true))))) + + (defmethod create-element '(NP) [[[_ child]]] diff --git a/src/com/owoga/prhyme/nlg/prhyme_nlg.clj b/src/com/owoga/prhyme/nlg/prhyme_nlg.clj index d67ab61..b677da4 100644 --- a/src/com/owoga/prhyme/nlg/prhyme_nlg.clj +++ b/src/com/owoga/prhyme/nlg/prhyme_nlg.clj @@ -330,3 +330,20 @@ (string/join " "))))))) ) + + +;;; Most common grammars +(comment + '([(TOP (NN)) 857] + [(TOP (NP (NN) (NN))) 569] + [(TOP (NP (JJ) (NN))) 563] + [(TOP (NP (NP (NN)) (PP (IN) (NP (NN))))) 424] + [(TOP (PP (IN) (NP (DT) (NN)))) 390] + [(TOP (NP (NP (NN)) (PP (IN) (NP (DT) (NN))))) 314] + [(TOP (NP (DT) (NN))) 300] + [(TOP (NP (DT) (JJ) (NN))) 265] + [(TOP (NP (NP (DT) (NN)) (PP (IN) (NP (NN))))) 250] + [(TOP (VP (VB) (NP (DT) (NN)))) 221] + [(TOP (NP (NP (NN)) (PP (IN) (NP (PRP$) (NN))))) 218] + [(TOP (NP (JJ) (NNS))) 211] + [(TOP (VB)) 204]))