You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
3.0 KiB
Python

import logging
import os
import re
from xmlrpc import client
import traceback
from bs4 import BeautifulSoup as bs
import requests
from lotto_site_parsers.util import save_image
from lotto_site_parsers.util import save_game
logger = logging.getLogger(__name__)
DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989")
BASE_URL = "https://www.nmlottery.com"
INDEX_URL = "https://www.nmlottery.com/games/scratchers"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
}
def get_games(site_url):
"""
Takes the URL from the scratcher site
parses page for game ids and game info
returns and list of tuples with the id and game info for each game
"""
html = requests.get(site_url, headers=HEADERS).text
soup = bs(html, "html.parser")
games_html = soup.find_all("div", class_="filter-block")
ids = [
re.search("\d+", id.text).group(0)
for id in soup.find_all("p", class_="game-number")
]
game_names = [name.text for name in soup.find_all("h3")]
return list(zip(ids, game_names, games_html))
def process_game(game_info):
"""
function takes game info: [game id, game_name, game_html_data]
parses info to find specific game data
ex name, game_id, price, odds, prizes, how to play, image_url
returns game object
"""
game_html = game_info[2]
name = game_info[1]
game_id = game_info[0]
price = float(game_html.find("p", class_="price").text.replace("$", ""))
how_to_play = game_html.find("p", class_="how-to-play").find_next("span").text
prizes = [
{
"prize": row[0].strip(),
"value": price
if "prize ticket" in row[0].lower()
else float(row[0].replace("$", "").replace(",", "")),
"claimed": int(row[2].replace(",", "")) - int(row[3].replace(",", "")),
"available": int(row[3].replace(",", "")),
"total": int(row[2].replace(",", "")),
"odds": float(row[1].replace(",", "")),
}
for row in [
row.text.split("\n")[1:-1] for row in game_html.table.find_all("tr")[1:]
]
]
num_of_tix = int(prizes[0]["odds"] * prizes[0]["total"])
image_url = game_html.find("div", class_="scratcher-image").find_next("img")["src"]
image_location = save_image("nm", game_id, image_url, headers=HEADERS)
game = {
"name": name,
"game_id": game_id,
"price": price,
"how_to_play": how_to_play,
"prizes": prizes,
"num_tx_initial": num_of_tix,
"state": "nm",
"image_urls": '["{}"]'.format(image_location),
}
return game
def main():
games = get_games(INDEX_URL)
for game in games:
try:
game = process_game(game)
save_game(game)
except Exception as e:
logger.warning(f"Unable to process game: {game[0]}-{game[1]}")
logger.warning(e)
traceback.print_exception(e)
if __name__ == "__main__":
main()