You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.1 KiB
Python

import logging
import os
import re
from xmlrpc import client
import html2text
import requests
from selenium import webdriver
from bs4 import BeautifulSoup as bs
logger = logging.getLogger(__name__)
s = requests.Session()
h = html2text.HTML2Text()
DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989")
BASE_URL = "https://www.mdlottery.com"
BASE_INDEX_URL = "https://www.mdlottery.com/games/scratch-offs/"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0",
"Host": "www.mdlottery.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
}
INDEX_URL = "https://www.mdlottery.com/wp-admin/admin-ajax.php?action=jquery_shortcode&shortcode=scratch_offs"
def _name(game_div):
return game_div.find(class_="name").text
def _num(game_li):
return game_li.find(text="Game: ").next.text
def _price(game_li):
return int(game_li.find(class_="price").text.replace("$", ""))
def _odds(game_li):
odds = game_li.find(class_="probability").text
return float(odds)
def _num_tx(game_li):
return int(sum(p["available"] + p["claimed"] for p in _prizes(game_li)) * _odds(game_li))
def _prizes(game_li):
table = game_li.find("table")
rows = table.find_all("tr")[1:]
prizes = []
for row in rows:
cells = row.find_all("td")
prize = cells[0].text
value = float(re.sub(r"[\$,]", "", prize))
available = int(cells[2].text)
claimed = int(cells[1].text) - available
prizes.append(
{"prize": prize, "value": value, "available": available, "claimed": claimed}
)
return prizes
def _how_to_play(game_li):
return h.handle(str(game_li.find(class_="how-to-play")))
def games(requests):
# Headless needed to run on server with no display
options = webdriver.firefox.options.Options()
options.headless = True
driver = webdriver.Firefox(options=options)
driver.get(INDEX_URL)
html = driver.page_source
soup = bs(html, "lxml")
game_lis = soup.find_all("li", class_="ticket")
games = [
{
"name": _name(game_li),
"game_id": _num(game_li),
"url": BASE_INDEX_URL,
"how_to_play": _how_to_play(game_li),
"price": _price(game_li),
"state": "md",
"num_tx_initial": _num_tx(game_li),
"prizes": _prizes(game_li),
}
for game_li in game_lis
]
return games
def fetch_games():
result_games = []
for game in games(s)[:2]:
print("fetch_games!", game)
result_games.append(game)
return result_games
# def save_game(game):
# with client.ServerProxy(DB_REPO_URI) as c:
# logger.debug("Saving game: {} - {}".format(game["game_id"], game["name"]))
# c.persist([game])
def main():
print('inside main')
# logger.info("Saving games to {}".format(DB_REPO_URI))
for game in fetch_games():
print("main!", game)
if __name__ == "__main__":
main()