You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
3.0 KiB
Python
114 lines
3.0 KiB
Python
import logging
|
|
import os
|
|
import re
|
|
from xmlrpc import client
|
|
import traceback
|
|
|
|
from bs4 import BeautifulSoup as bs
|
|
import requests
|
|
|
|
|
|
from lotto_site_parsers.util import save_image
|
|
from lotto_site_parsers.util import save_game
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989")
|
|
BASE_URL = "https://www.nmlottery.com"
|
|
INDEX_URL = "https://www.nmlottery.com/games/scratchers"
|
|
HEADERS = {
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
|
|
}
|
|
|
|
|
|
def get_games(site_url):
|
|
"""
|
|
Takes the URL from the scratcher site
|
|
parses page for game ids and game info
|
|
returns and list of tuples with the id and game info for each game
|
|
"""
|
|
html = requests.get(site_url, headers=HEADERS).text
|
|
soup = bs(html, "html.parser")
|
|
|
|
games_html = soup.find_all("div", class_="filter-block")
|
|
|
|
ids = [
|
|
re.search("\d+", id.text).group(0)
|
|
for id in soup.find_all("p", class_="game-number")
|
|
]
|
|
|
|
game_names = [name.text for name in soup.find_all("h3")]
|
|
|
|
return list(zip(ids, game_names, games_html))
|
|
|
|
|
|
def process_game(game_info):
|
|
"""
|
|
function takes game info: [game id, game_name, game_html_data]
|
|
|
|
parses info to find specific game data
|
|
ex name, game_id, price, odds, prizes, how to play, image_url
|
|
|
|
returns game object
|
|
"""
|
|
|
|
game_html = game_info[2]
|
|
|
|
name = game_info[1]
|
|
|
|
game_id = game_info[0]
|
|
|
|
price = float(game_html.find("p", class_="price").text.replace("$", ""))
|
|
|
|
how_to_play = game_html.find("p", class_="how-to-play").find_next("span").text
|
|
|
|
prizes = [
|
|
{
|
|
"prize": row[0].strip(),
|
|
"value": price
|
|
if "prize ticket" in row[0].lower()
|
|
else float(row[0].replace("$", "").replace(",", "")),
|
|
"claimed": int(row[2].replace(",", "")) - int(row[3].replace(",", "")),
|
|
"available": int(row[3].replace(",", "")),
|
|
"total": int(row[2].replace(",", "")),
|
|
"odds": float(row[1].replace(",", "")),
|
|
}
|
|
for row in [
|
|
row.text.split("\n")[1:-1] for row in game_html.table.find_all("tr")[1:]
|
|
]
|
|
]
|
|
|
|
num_of_tix = int(prizes[0]["odds"] * prizes[0]["total"])
|
|
|
|
image_url = game_html.find("div", class_="scratcher-image").find_next("img")["src"]
|
|
image_location = save_image("nm", game_id, image_url, headers=HEADERS)
|
|
|
|
game = {
|
|
"name": name,
|
|
"game_id": game_id,
|
|
"price": price,
|
|
"how_to_play": how_to_play,
|
|
"prizes": prizes,
|
|
"num_tx_initial": num_of_tix,
|
|
"state": "nm",
|
|
"image_urls": '["{}"]'.format(image_location),
|
|
}
|
|
|
|
return game
|
|
|
|
|
|
def main():
|
|
games = get_games(INDEX_URL)
|
|
for game in games:
|
|
try:
|
|
game = process_game(game)
|
|
save_game(game)
|
|
except Exception as e:
|
|
logger.warning(f"Unable to process game: {game[0]}-{game[1]}")
|
|
logger.warning(e)
|
|
traceback.print_exception(e)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|