adding new mexico to main
main
Taylor Hood 2 years ago
commit b64fe3a307

@ -0,0 +1,113 @@
import logging
import os
import re
from xmlrpc import client
import traceback
from bs4 import BeautifulSoup as bs
from lottery_data_scraper.schemas import GameSchema
from lottery_data_scraper.util import fetch_html
logger = logging.getLogger(__name__)
BASE_URL = "https://www.nmlottery.com"
INDEX_URL = "https://www.nmlottery.com/games/scratchers"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
}
def get_games(site_url):
"""
Takes the URL from the scratcher site
parses page for game ids and game info
returns and list of tuples with the id and game info for each game
"""
html = fetch_html(site_url)
soup = bs(html, "html.parser")
games_html = soup.find_all("div", class_="filter-block")
ids = [
re.search("\d+", id.text).group(0)
for id in soup.find_all("p", class_="game-number")
]
game_names = [name.text for name in soup.find_all("h3")]
return list(zip(ids, game_names, games_html))
def process_game(game_info):
"""
function takes game info: [game id, game_name, game_html_data]
parses info to find specific game data
ex name, game_id, price, odds, prizes, how to play, image_url
returns game object
"""
game_html = game_info[2]
name = game_info[1]
game_id = game_info[0]
price = float(game_html.find("p", class_="price").text.replace("$", ""))
how_to_play = game_html.find("p", class_="how-to-play").find_next("span").text
prizes = [
{
"prize": row[0].strip(),
"value": price
if "prize ticket" in row[0].lower()
else float(row[0].replace("$", "").replace(",", "")),
"claimed": int(row[2].replace(",", "")) - int(row[3].replace(",", "")),
"available": int(row[3].replace(",", "")),
"total": int(row[2].replace(",", "")),
"odds": float(row[1].replace(",", "")),
}
for row in [
row.text.split("\n")[1:-1] for row in game_html.table.find_all("tr")[1:]
]
]
num_of_tix = int(prizes[0]["odds"] * prizes[0]["total"])
image_url = game_html.find("div", class_="scratcher-image").find_next("img")["src"]
game = {
"name": name,
"game_id": game_id,
"price": price,
"how_to_play": how_to_play,
"prizes": prizes,
"num_tx_initial": num_of_tix,
"state": "nm",
"image_urls": f'["{image_url}"]',
}
return game
def main():
final_games = []
games = get_games(INDEX_URL)
for game in games:
try:
game = process_game(game)
final_games.append(game)
except Exception as e:
logger.warning(f"Unable to process game: {game[0]}-{game[1]}")
logger.warning(e)
traceback.print_exception(e)
return final_games
if __name__ == "__main__":
games = main()
schema = GameSchema(many=True)
print(schema.dumps(games))

@ -3,6 +3,7 @@ import os
import requests import requests
from tempfile import gettempdir from tempfile import gettempdir
def fetch_html(url): def fetch_html(url):
""" """
Helper to fetch and cache html responses. Helper to fetch and cache html responses.

Loading…
Cancel
Save