(ns com.owoga.corpus.util
(:require [taoensso.tufte :as tufte :refer (defnp p profiled profile)]
[clojure.string :as string]))
[clojure.string :as string]
[net.cgrand.enlive-html :as html]))
(set! *warn-on-reflection* true)
(tufte/add-basic-println-handler! {})
(defn fix-url
"This is specific to some non-conformity on DarkLyrics.com.
Some hrefs are relative and some are absolute."
(string/replace url #".*(http://.*(?!http://).*$)" "$1"))
(defn fetch-url-
"Memoized for faster iterations in development."
(let [url (fix-url url)]
(html/html-resource (java.net.URL. url))
(catch Exception e
(prn "Exception during fetch " e)
(def fetch-url (memoize fetch-url-))
(defn clean-text
"Removes all non-alphabetical characters and lowercases everything.
Very spartan way of cleaning."