Merge pull request #15 from owogawc/add-new-mexico

Add new mexico
3 years ago · 29d9607bb6
parent 8b95d387e6 a4f7cd940a
commit 29d9607bb6
2 changed files with 114 additions and 0 deletions
--- a/lottery_data_scraper/new_mexico.py
+++ b/lottery_data_scraper/new_mexico.py
@ -0,0 +1,113 @@
+import logging
+import os
+import re
+from xmlrpc import client
+import traceback
+
+from bs4 import BeautifulSoup as bs
+from lottery_data_scraper.schemas import GameSchema
+from lottery_data_scraper.util import fetch_html
+
+
+logger = logging.getLogger(__name__)
+
+BASE_URL = "https://www.nmlottery.com"
+INDEX_URL = "https://www.nmlottery.com/games/scratchers"
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
+}
+
+
+def get_games(site_url):
+    """
+    Takes the URL from the scratcher site
+    parses page for game ids and game info
+    returns and list of tuples with the id and game info for each game
+    """
+    html = fetch_html(site_url)
+    soup = bs(html, "html.parser")
+
+    games_html = soup.find_all("div", class_="filter-block")
+
+    ids = [
+        re.search("\d+", id.text).group(0)
+        for id in soup.find_all("p", class_="game-number")
+    ]
+
+    game_names = [name.text for name in soup.find_all("h3")]
+
+    return list(zip(ids, game_names, games_html))
+
+
+def process_game(game_info):
+    """
+    function takes game info: [game id, game_name, game_html_data]
+
+    parses info to find specific game data
+    ex name, game_id, price, odds, prizes, how to play, image_url
+
+    returns game object
+    """
+
+    game_html = game_info[2]
+
+    name = game_info[1]
+
+    game_id = game_info[0]
+
+    price = float(game_html.find("p", class_="price").text.replace("$", ""))
+
+    how_to_play = game_html.find("p", class_="how-to-play").find_next("span").text
+
+    prizes = [
+        {
+            "prize": row[0].strip(),
+            "value": price
+            if "prize ticket" in row[0].lower()
+            else float(row[0].replace("$", "").replace(",", "")),
+            "claimed": int(row[2].replace(",", "")) - int(row[3].replace(",", "")),
+            "available": int(row[3].replace(",", "")),
+            "total": int(row[2].replace(",", "")),
+            "odds": float(row[1].replace(",", "")),
+        }
+        for row in [
+            row.text.split("\n")[1:-1] for row in game_html.table.find_all("tr")[1:]
+        ]
+    ]
+
+    num_of_tix = int(prizes[0]["odds"] * prizes[0]["total"])
+
+    image_url = game_html.find("div", class_="scratcher-image").find_next("img")["src"]
+
+    game = {
+        "name": name,
+        "game_id": game_id,
+        "price": price,
+        "how_to_play": how_to_play,
+        "prizes": prizes,
+        "num_tx_initial": num_of_tix,
+        "state": "nm",
+        "image_urls": f'["{image_url}"]',
+    }
+
+    return game
+
+
+def main():
+    final_games = []
+    games = get_games(INDEX_URL)
+    for game in games:
+        try:
+            game = process_game(game)
+            final_games.append(game)
+        except Exception as e:
+            logger.warning(f"Unable to process game: {game[0]}-{game[1]}")
+            logger.warning(e)
+            traceback.print_exception(e)
+    return final_games
+
+
+if __name__ == "__main__":
+    games = main()
+    schema = GameSchema(many=True)
+    print(schema.dumps(games))
--- a/lottery_data_scraper/util.py
+++ b/lottery_data_scraper/util.py
@ -3,6 +3,7 @@ import os
 import requests
 from tempfile import gettempdir

+
 def fetch_html(url):
    """
    Helper to fetch and cache html responses.