Initial commit

main
anela 2 years ago
parent 2b8e352db7
commit 9e7a64d711

@ -0,0 +1,118 @@
import logging
import os
import re
from xmlrpc import client
import html2text
import requests
from selenium import webdriver
from bs4 import BeautifulSoup as bs
from lotto_site_parsers.util import save_image
logger = logging.getLogger(__name__)
s = requests.Session()
h = html2text.HTML2Text()
DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989")
BASE_URL = "https://www.mdlottery.com"
BASE_INDEX_URL = "https://www.mdlottery.com/games/scratch-offs/"
HEADERS = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0",
"Host": "www.mdlottery.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.5",
}
INDEX_URL = "https://www.mdlottery.com/wp-admin/admin-ajax.php?action=jquery_shortcode&shortcode=scratch_offs"
def _name(game_div):
return game_div.find(class_="name").text
def _num(game_li):
return game_li.find(text="Game: ").next.text
def _price(game_li):
return int(game_li.find(class_="price").text.replace("$", ""))
def _odds(game_li):
odds = game_li.find(class_="probability").text
return float(odds)
def _num_tx(game_li):
return int(sum(p["available"] + p["claimed"] for p in _prizes(game_li)) * _odds(game_li))
def _prizes(game_li):
table = game_li.find("table")
rows = table.find_all("tr")[1:]
prizes = []
for row in rows:
cells = row.find_all("td")
prize = cells[0].text
value = float(re.sub(r"[\$,]", "", prize))
available = int(cells[2].text)
claimed = int(cells[1].text) - available
prizes.append(
{"prize": prize, "value": value, "available": available, "claimed": claimed}
)
return prizes
def _how_to_play(game_li):
return h.handle(str(game_li.find(class_="how-to-play")))
def games(requests):
# Headless needed to run on server with no display
options = webdriver.firefox.options.Options()
options.headless = True
driver = webdriver.Firefox(options=options)
driver.get(INDEX_URL)
html = driver.page_source
soup = bs(html, "lxml")
game_lis = soup.find_all("li", class_="ticket")
games = [
{
"name": _name(game_li),
"game_id": _num(game_li),
"url": BASE_INDEX_URL,
"how_to_play": _how_to_play(game_li),
"price": _price(game_li),
"state": "md",
"num_tx_initial": _num_tx(game_li),
"prizes": _prizes(game_li),
}
for game_li in game_lis
]
return games
def fetch_games():
result_games = []
for game in games(s):
result_games.append(game)
return result_games
def save_game(game):
with client.ServerProxy(DB_REPO_URI) as c:
logger.debug("Saving game: {} - {}".format(game["game_id"], game["name"]))
c.persist([game])
def main():
logger.info("Saving games to {}".format(DB_REPO_URI))
for game in fetch_games():
save_game(game)
if __name__ == "__main__":
main()
Loading…
Cancel
Save