Merge pull request #15 from owogawc/add-new-mexico

Add new mexico
main
Taylor Hood 2 years ago committed by GitHub
commit 29d9607bb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,113 @@
import logging
import os
import re
from xmlrpc import client
import traceback
from bs4 import BeautifulSoup as bs
from lottery_data_scraper.schemas import GameSchema
from lottery_data_scraper.util import fetch_html
logger = logging.getLogger(__name__)
BASE_URL = "https://www.nmlottery.com"
INDEX_URL = "https://www.nmlottery.com/games/scratchers"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
}
def get_games(site_url):
"""
Takes the URL from the scratcher site
parses page for game ids and game info
returns and list of tuples with the id and game info for each game
"""
html = fetch_html(site_url)
soup = bs(html, "html.parser")
games_html = soup.find_all("div", class_="filter-block")
ids = [
re.search("\d+", id.text).group(0)
for id in soup.find_all("p", class_="game-number")
]
game_names = [name.text for name in soup.find_all("h3")]
return list(zip(ids, game_names, games_html))
def process_game(game_info):
"""
function takes game info: [game id, game_name, game_html_data]
parses info to find specific game data
ex name, game_id, price, odds, prizes, how to play, image_url
returns game object
"""
game_html = game_info[2]
name = game_info[1]
game_id = game_info[0]
price = float(game_html.find("p", class_="price").text.replace("$", ""))
how_to_play = game_html.find("p", class_="how-to-play").find_next("span").text
prizes = [
{
"prize": row[0].strip(),
"value": price
if "prize ticket" in row[0].lower()
else float(row[0].replace("$", "").replace(",", "")),
"claimed": int(row[2].replace(",", "")) - int(row[3].replace(",", "")),
"available": int(row[3].replace(",", "")),
"total": int(row[2].replace(",", "")),
"odds": float(row[1].replace(",", "")),
}
for row in [
row.text.split("\n")[1:-1] for row in game_html.table.find_all("tr")[1:]
]
]
num_of_tix = int(prizes[0]["odds"] * prizes[0]["total"])
image_url = game_html.find("div", class_="scratcher-image").find_next("img")["src"]
game = {
"name": name,
"game_id": game_id,
"price": price,
"how_to_play": how_to_play,
"prizes": prizes,
"num_tx_initial": num_of_tix,
"state": "nm",
"image_urls": f'["{image_url}"]',
}
return game
def main():
final_games = []
games = get_games(INDEX_URL)
for game in games:
try:
game = process_game(game)
final_games.append(game)
except Exception as e:
logger.warning(f"Unable to process game: {game[0]}-{game[1]}")
logger.warning(e)
traceback.print_exception(e)
return final_games
if __name__ == "__main__":
games = main()
schema = GameSchema(many=True)
print(schema.dumps(games))

@ -3,6 +3,7 @@ import os
import requests
from tempfile import gettempdir
def fetch_html(url):
"""
Helper to fetch and cache html responses.

Loading…
Cancel
Save