Initial commit

main
anela 2 years ago
parent ad73070e6b
commit 164d4bec0e

@ -0,0 +1,113 @@
import logging
import os
import re
from xmlrpc import client
import traceback
from bs4 import BeautifulSoup as bs
import requests
from lotto_site_parsers.util import save_image
from lotto_site_parsers.util import save_game
logger = logging.getLogger(__name__)
DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989")
BASE_URL = "https://www.nmlottery.com"
INDEX_URL = "https://www.nmlottery.com/games/scratchers"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:70.0) Gecko/20100101 Firefox/70.0",
}
def get_games(site_url):
"""
Takes the URL from the scratcher site
parses page for game ids and game info
returns and list of tuples with the id and game info for each game
"""
html = requests.get(site_url, headers=HEADERS).text
soup = bs(html, "html.parser")
games_html = soup.find_all("div", class_="filter-block")
ids = [
re.search("\d+", id.text).group(0)
for id in soup.find_all("p", class_="game-number")
]
game_names = [name.text for name in soup.find_all("h3")]
return list(zip(ids, game_names, games_html))
def process_game(game_info):
"""
function takes game info: [game id, game_name, game_html_data]
parses info to find specific game data
ex name, game_id, price, odds, prizes, how to play, image_url
returns game object
"""
game_html = game_info[2]
name = game_info[1]
game_id = game_info[0]
price = float(game_html.find("p", class_="price").text.replace("$", ""))
how_to_play = game_html.find("p", class_="how-to-play").find_next("span").text
prizes = [
{
"prize": row[0].strip(),
"value": price
if "prize ticket" in row[0].lower()
else float(row[0].replace("$", "").replace(",", "")),
"claimed": int(row[2].replace(",", "")) - int(row[3].replace(",", "")),
"available": int(row[3].replace(",", "")),
"total": int(row[2].replace(",", "")),
"odds": float(row[1].replace(",", "")),
}
for row in [
row.text.split("\n")[1:-1] for row in game_html.table.find_all("tr")[1:]
]
]
num_of_tix = int(prizes[0]["odds"] * prizes[0]["total"])
image_url = game_html.find("div", class_="scratcher-image").find_next("img")["src"]
image_location = save_image("nm", game_id, image_url, headers=HEADERS)
game = {
"name": name,
"game_id": game_id,
"price": price,
"how_to_play": how_to_play,
"prizes": prizes,
"num_tx_initial": num_of_tix,
"state": "nm",
"image_urls": '["{}"]'.format(image_location),
}
return game
def main():
games = get_games(INDEX_URL)
for game in games:
try:
game = process_game(game)
save_game(game)
except Exception as e:
logger.warning(f"Unable to process game: {game[0]}-{game[1]}")
logger.warning(e)
traceback.print_exception(e)
if __name__ == "__main__":
main()
Loading…
Cancel
Save