From 398fd058080978a769960a6a831e4e39cb76100c Mon Sep 17 00:00:00 2001
From: tdhood <taylordannhood@gmail.com>
Date: Tue, 11 Apr 2023 17:20:29 -0700
Subject: [PATCH] changes were made from pull request added file descprition at
 top

---
 lottery_data_scraper/louisiana.py | 52 ++++++++++++++++++++++++++-----
 1 file changed, 45 insertions(+), 7 deletions(-)

diff --git a/lottery_data_scraper/louisiana.py b/lottery_data_scraper/louisiana.py
index 149c89d..caae394 100644
--- a/lottery_data_scraper/louisiana.py
+++ b/lottery_data_scraper/louisiana.py
@@ -1,3 +1,42 @@
+"""
+Scrapes the Louisiana lottery website for scratch-off ticket
+data and calculates the expected value for each game.
+
+Louisiana publishes the number of tickets printed and how many
+tickets are printed at each prize level.
+
+We can calculated the expected value of a game by summing
+the value of all the prizes and dividing that by the cost
+of all the tickets.
+
+The louisianalottery website has an "top prizes remaining" or an "index" page that 
+has links to every game that could still be profitable.
+Each individual game has a section for the "game rules" page and a prize table.
+We can use each individual game page to gather the important data, and 
+then run our calculations.
+
+Website that we'll be scraping:
+https://louisianalottery.com/scratch-offs/top-prizes-remaining
+
+Example usage:
+    python -m louisiana
+Or:
+    LOGLEVEL=DEBUG USE_CACHE=True python -m louisiana
+
+The following behavior is configurable through shell environment variables.
+
+Set LOGLEVEL to print useful debug info to console.
+LOGLEVEL=[DEBUG,INFO,WARNING,ERROR,CRITICAL]
+Defaults to WARNING.
+
+Set USE_CACHE to cache responses. This speeds up development
+and is nice to the servers we're hitting.
+USE_CACHE=[True]
+Defaults to False. Note: Setting this env variable to the string False
+will cause it to use cache because the string "False" evaluates to Truthy.
+Either set it to True or don't set it.
+"""
+
 import sys
 import traceback
 from copy import deepcopy
@@ -25,7 +64,7 @@ def parse_index(html):
     game_urls = list(map(lambda x: "https:" + x.attrs["href"], game_hrefs))
     return game_urls
 
-
+# TODO: convert pandas to beautiful soup
 def parse_game(url, html):
     soup = bs(html, "lxml")
     price = soup.select('div[id="scratch-off-prize-info"] td')[1].text.replace("$", "")
@@ -41,12 +80,11 @@ def parse_game(url, html):
     num_tx = int(grand_prize_odds * grand_prize_num)
     table = soup.find_all("table")[2]
     df = pd.read_html(str(table))[0]
-    df.iloc[:, 0] = df.iloc[:, 0].str.replace("$", "")  # noqa: E231
     df = df.replace("TICKET", price)
     prizes = [
         {
             "prize": prize,
-            "value": float(prize.replace(",", "")),
+            "value": float(prize.replace("$", "").replace(",", "")),
             "claimed": int(claimed),
             "available": int(total) - int(claimed),
         }
@@ -65,15 +103,15 @@ def parse_game(url, html):
 
 
 def main():
-    index_html = requests.get(INDEX_URL).text
+    index_html = fetch_html(INDEX_URL)
     game_urls = parse_index(index_html)
-    url_htmls = zip(game_urls, [requests.get(url).text for url in game_urls])
+    url_htmls = zip(game_urls, [fetch_html(url) for url in game_urls])
     games = []
     for url, html in url_htmls:
         try:
             game = parse_game(url, html)
         except Exception as e:
-            logger.warn("Unable to parse {}.\n{}".format(url, e))
+            logger.error("Unable to parse {}.\n{}".format(url, e))
             continue
         games.append(game)
     return games
@@ -82,4 +120,4 @@ def main():
 if __name__ == "__main__":
     games = main()
     schema = GameSchema(many=True)
-    print(schema.dumps(games))
\ No newline at end of file
+    print(schema.dumps(games))