Initial commit
parent
2b8e352db7
commit
9e7a64d711
@ -0,0 +1,118 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from xmlrpc import client
|
||||
|
||||
import html2text
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from lotto_site_parsers.util import save_image
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
s = requests.Session()
|
||||
h = html2text.HTML2Text()
|
||||
|
||||
DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989")
|
||||
BASE_URL = "https://www.mdlottery.com"
|
||||
BASE_INDEX_URL = "https://www.mdlottery.com/games/scratch-offs/"
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0",
|
||||
"Host": "www.mdlottery.com",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
}
|
||||
INDEX_URL = "https://www.mdlottery.com/wp-admin/admin-ajax.php?action=jquery_shortcode&shortcode=scratch_offs"
|
||||
|
||||
|
||||
def _name(game_div):
|
||||
return game_div.find(class_="name").text
|
||||
|
||||
|
||||
def _num(game_li):
|
||||
return game_li.find(text="Game: ").next.text
|
||||
|
||||
|
||||
def _price(game_li):
|
||||
return int(game_li.find(class_="price").text.replace("$", ""))
|
||||
|
||||
|
||||
def _odds(game_li):
|
||||
odds = game_li.find(class_="probability").text
|
||||
return float(odds)
|
||||
|
||||
|
||||
def _num_tx(game_li):
|
||||
return int(sum(p["available"] + p["claimed"] for p in _prizes(game_li)) * _odds(game_li))
|
||||
|
||||
|
||||
def _prizes(game_li):
|
||||
table = game_li.find("table")
|
||||
rows = table.find_all("tr")[1:]
|
||||
prizes = []
|
||||
for row in rows:
|
||||
cells = row.find_all("td")
|
||||
prize = cells[0].text
|
||||
value = float(re.sub(r"[\$,]", "", prize))
|
||||
available = int(cells[2].text)
|
||||
claimed = int(cells[1].text) - available
|
||||
prizes.append(
|
||||
{"prize": prize, "value": value, "available": available, "claimed": claimed}
|
||||
)
|
||||
return prizes
|
||||
|
||||
|
||||
def _how_to_play(game_li):
|
||||
return h.handle(str(game_li.find(class_="how-to-play")))
|
||||
|
||||
|
||||
def games(requests):
|
||||
# Headless needed to run on server with no display
|
||||
options = webdriver.firefox.options.Options()
|
||||
options.headless = True
|
||||
driver = webdriver.Firefox(options=options)
|
||||
driver.get(INDEX_URL)
|
||||
html = driver.page_source
|
||||
soup = bs(html, "lxml")
|
||||
game_lis = soup.find_all("li", class_="ticket")
|
||||
games = [
|
||||
{
|
||||
"name": _name(game_li),
|
||||
"game_id": _num(game_li),
|
||||
"url": BASE_INDEX_URL,
|
||||
"how_to_play": _how_to_play(game_li),
|
||||
"price": _price(game_li),
|
||||
"state": "md",
|
||||
"num_tx_initial": _num_tx(game_li),
|
||||
"prizes": _prizes(game_li),
|
||||
}
|
||||
for game_li in game_lis
|
||||
]
|
||||
return games
|
||||
|
||||
|
||||
def fetch_games():
|
||||
result_games = []
|
||||
for game in games(s):
|
||||
result_games.append(game)
|
||||
return result_games
|
||||
|
||||
|
||||
def save_game(game):
|
||||
with client.ServerProxy(DB_REPO_URI) as c:
|
||||
logger.debug("Saving game: {} - {}".format(game["game_id"], game["name"]))
|
||||
c.persist([game])
|
||||
|
||||
|
||||
def main():
|
||||
logger.info("Saving games to {}".format(DB_REPO_URI))
|
||||
for game in fetch_games():
|
||||
save_game(game)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue