Remove unused code

5 years ago · aecb8d42f1
parent 7006069aa3
commit aecb8d42f1
1 changed files with 0 additions and 222 deletions
--- a/src/com/owoga/corpus/markov.clj
+++ b/src/com/owoga/corpus/markov.clj
@ -1,8 +1,6 @@
 (ns com.owoga.corpus.markov
  (:require [com.owoga.prhyme.core :as prhyme]
            [com.owoga.prhyme.util :as util]
-            [com.owoga.prhyme.data.dictionary :as dict]
-            [com.owoga.prhyme.nlp.core :as nlp]
            [com.owoga.prhyme.data-transform :as data-transform]
            [com.owoga.prhyme.util.math :as math]
            [com.owoga.trie :as trie]
@ -17,59 +15,6 @@
 (defn clean-text [text]
  (string/lower-case (string/replace text #"[^a-zA-Z'\-\s]" "")))

-(defn make-markov [tokens n]
-  (reduce
-   (fn [a w]
-     (let [k (butlast w)
-           v (last w)]
-       (update-in a [k v] (fnil inc 0))))
-   {}
-   ((util/window (inc n)) tokens)))
-
-(defn merge-markov [& maps]
-  (apply
-   merge-with
-   (fn [a-possibilities b-possibilities]
-     (merge-with
-      (fn [a b]
-        ((fnil + 0) a b))
-      a-possibilities
-      b-possibilities))
-   maps))
-
-(comment
-  (merge-markov
-   {'("away") {"her" 1
-               "foo" 7}}
-   {'("away") {"her" 2
-               "them" 1
-               "bar" 8}}
-    {'("away") {"her" 10
-               "them" 50
-                "baz" 99}})
-
-  )
-
-
-(defn slurp-file-to-read-string
-  "Returns the value of read-string of the contents of the file.
-  Useful for reading into memory a saved database of n-grams to identifiers
-  and identifiers to n-grams."
-  [filepath]
-  (read-string (slurp filepath)))
-
-(defn spit-edn-to-file
-  [filepath data]
-  (spit filepath (pr-str data)))
-
-(comment
-  (do
-    (spit-edn-to-file
-     "/tmp/spit-edn-test.txt"
-     {:a {:b :c}})
-    (slurp-file-to-read-string "/tmp/spit-edn-test.txt"));; => {:a {:b :c}}
-  )
-
 (defn xf-file-seq [start end]
  (comp (remove #(.isDirectory %))
        (drop start)
@ -84,173 +29,6 @@
 ;; Also, create a database to map integer IDs back to
 ;; their string values and string values to integer IDs.

-(defn stateful-transducer
-  "Stateful transform that crates a trie and populates an `atom` database."
-  [database xf]
-  (let [trie (volatile! (trie/make-trie))
-        next-id (volatile! 1)]
-    (fn
-      ([] (xf))
-      ([result]
-       (xf result))
-      ([result map-entries-in]
-       (let [map-entries-out
-             (mapv
-              (fn [[lookup v]]
-                (mapv
-                 (fn [key]
-                   (let [key-id (get @database key @next-id)]
-                     (when (.equals key-id @next-id)
-                       (swap! database
-                              #(-> %
-                                   (assoc key key-id)
-                                   (assoc key-id key)))
-                       (vswap! next-id inc))
-                     (mapv @database lookup)))
-                 lookup))
-              map-entries-in)]
-         (vswap!
-          trie
-          (fn [trie map-entries-out]
-            (reduce
-             (fn [trie [lookup _]]
-               (update trie lookup (fnil #(update % 1 inc) [(peek lookup) 0])))
-             trie
-             map-entries-out))
-          map-entries-out))))))
-
-(defn pad-tokens
-  "Pads the beginning with n - 1 <s> tokens and
-  the end with 1 </s> token."
-  [tokens n]
-  (vec (concat (vec (repeat (max 1 (dec n)) "<s>")) tokens ["</s>"])))
-
-(def re-word
-  "Regex for tokenizing a string into words
-  (including contractions and hyphenations),
-  commas, periods, and newlines."
-  #"(?s).*?([a-zA-Z\d]+(?:['\-]?[a-zA-Z]+)?|,|\.|\n)")
-
-(defn tokenize-line
-  [line]
-  (->> line
-       (string/trim)
-       (re-seq re-word)
-       (mapv second)
-       (mapv string/lower-case)))
-
-(defn text->ngrams
-  "Takes text from a file, including newlines.
-  Pads lines with <s> and </s> for start/end of line.
-  Pads beginning with n - 1 <s>s"
-  [text n]
-  (->> text
-       util/clean-text
-       (#(string/split % #"\n+"))
-       (remove empty?)
-       (mapv tokenize-line)
-       (mapv #(pad-tokens % n))
-       (mapv #(partition n 1 %))
-       (mapv #(mapv vec %))
-       (reduce #(into %1 %2) [])))
-
-(defn n-to-m-grams
-  "Exclusive of m, similar to range."
-  [n m text]
-  (loop [i n
-         r []]
-    (cond
-      (= i m)
-      r
-      :else
-      (recur (inc i)
-             (into r (text->ngrams text i))))))
-
-(comment
-  (n-to-m-grams 1 3 "The quick brown fox jumps over the lazy dog.")
-  ;; => [["<s>"]
-  ;;     ["the"]
-  ;;     ["quick"]
-  ;;     ,,,
-  ;;     ["the" "lazy"]
-  ;;     ["lazy" "dog"]
-  ;;     ["dog" "</s>"]]
-
-  )
-
-(defn text->backwards-ngrams
-  "Takes text from a file, including newlines.
-  Pads lines with <s> and </s> for start/end of line.
-  Pads beginning with n - 1 <s>s"
-  [text n]
-  (->> text
-       util/clean-text
-       (#(string/split % #"\n+"))
-       (remove empty?)
-       (mapv tokenize-line)
-       (mapv #(pad-tokens % n))
-       reverse
-       (mapv reverse)
-       (mapv #(partition n 1 %))
-       (mapv #(mapv vec %))
-       (reduce #(into %1 %2) [])))
-
-(defn n-to-m-backwards-grams
-  "Exclusive of m, similar to range."
-  [n m text]
-  (loop [i n
-         r []]
-    (cond
-      (= i m)
-      r
-      :else
-      (recur (inc i)
-             (into r (text->backwards-ngrams text i))))))
-
-(defn prep-ngram-for-trie
-  "The tpt/trie expects values conjed into an ngram
-  to be of format '[[k1 k2 k3] value]."
-  [ngram]
-  (clojure.lang.MapEntry. (vec ngram) ngram))
-
-(defn make-trie-and-database
-  "Takes a file seq, like (file-seq (io/file \"dark-corpus\"))"
-  [file-seq]
-  (let [database (atom {})
-        trie (transduce (comp (xf-file-seq 501 2)
-                              (map slurp)
-                              (map (partial n-to-m-grams 1 4))
-                              (map (fn [ngrams] (mapv #(prep-ngram-for-trie %) ngrams)))
-                              (partial stateful-transducer database))
-                        conj
-                        file-seq)]
-    [trie database]))
-
-(defn make-backwards-trie-and-database
-  [file-seq]
-  (let [database (atom {})
-        trie (transduce (comp (xf-file-seq 0 1000)
-                              (map slurp)
-                              (map (partial n-to-m-backwards-grams 1 4))
-                              (map (fn [ngrams] (mapv #(prep-ngram-for-trie %) ngrams)))
-                              (partial stateful-transducer database))
-                        conj
-                        (file-seq (io/file "dark-corpus")))]
-    [trie database]))
-
-
-
-(comment
-
-  (take 20 trie)
-  (take 20 @trie-database)
-  (->> (map #(get % []) (trie/children (trie/lookup trie [1])))
-       (map first)
-       (map @trie-database))
-
-  )
-
-
 ;;;; The difference between a forwards and a backwards
 ;;   markov is that the backwards markov has its tokens
 ;;   reversed and has the </s> tokens padded by a number