Move url helpers to utils namespace

main
Eric Ihli 2 years ago
parent 185bcfb6df
commit d0a8202b63

@ -2,29 +2,13 @@
(:require [net.cgrand.enlive-html :as html]
[com.owoga.prhyme.util :as util]
[clojure.string :as string]
[clojure.java.io :as io]))
[clojure.java.io :as io]
[com.owoga.corpus.util :refer [fetch-url]]))
(def root-url "http://www.darklyrics.com")
(def base-url "http://www.darklyrics.com/a.html")
(def data-dir "dark-corpus")
(defn fix-url
"Some hrefs are relative and some are absolute."
[url]
(string/replace url #".*(http://.*(?!http://).*$)" "$1"))
(defn fetch-url-
"Memoized for faster iterations in development."
[url]
(let [url (fix-url url)]
(try
(html/html-resource (java.net.URL. url))
(catch Exception e
(prn "Exception during fetch " e)
{}))))
(def fetch-url (memoize fetch-url-))
(defn parse-letters-urls [index]
(-> index
(html/select [:div.listrow])

@ -1,10 +1,29 @@
(ns com.owoga.corpus.util
(:require [taoensso.tufte :as tufte :refer (defnp p profiled profile)]
[clojure.string :as string]))
[clojure.string :as string]
[net.cgrand.enlive-html :as html]))
(set! *warn-on-reflection* true)
(tufte/add-basic-println-handler! {})
(defn fix-url
"This is specific to some non-conformity on DarkLyrics.com.
Some hrefs are relative and some are absolute."
[url]
(string/replace url #".*(http://.*(?!http://).*$)" "$1"))
(defn fetch-url-
"Memoized for faster iterations in development."
[url]
(let [url (fix-url url)]
(try
(html/html-resource (java.net.URL. url))
(catch Exception e
(prn "Exception during fetch " e)
{}))))
(def fetch-url (memoize fetch-url-))
(defn clean-text
"Removes all non-alphabetical characters and lowercases everything.
Very spartan way of cleaning."

Loading…
Cancel
Save