From a4f7cd940a1a136161741a2b91eb250eeda26e4d Mon Sep 17 00:00:00 2001 From: tdhood Date: Fri, 21 Apr 2023 14:11:09 -0700 Subject: [PATCH] removed maryland --- lottery_data_scraper/maryland.py | 119 ------------------------------- 1 file changed, 119 deletions(-) delete mode 100644 lottery_data_scraper/maryland.py diff --git a/lottery_data_scraper/maryland.py b/lottery_data_scraper/maryland.py deleted file mode 100644 index 5d49778..0000000 --- a/lottery_data_scraper/maryland.py +++ /dev/null @@ -1,119 +0,0 @@ -import logging -import os -import re -from xmlrpc import client - -import html2text -import requests -from selenium import webdriver -from bs4 import BeautifulSoup as bs - -logger = logging.getLogger(__name__) - -s = requests.Session() -h = html2text.HTML2Text() - -DB_REPO_URI = os.environ.get("DB_REPO_URI", "http://localhost:8989") -BASE_URL = "https://www.mdlottery.com" -BASE_INDEX_URL = "https://www.mdlottery.com/games/scratch-offs/" -HEADERS = { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0", - "Host": "www.mdlottery.com", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Cache-Control": "no-cache", - "Connection": "keep-alive", - "Accept-Encoding": "gzip, deflate, br", - "Accept-Language": "en-US,en;q=0.5", -} -INDEX_URL = "https://www.mdlottery.com/wp-admin/admin-ajax.php?action=jquery_shortcode&shortcode=scratch_offs" - - -def _name(game_div): - return game_div.find(class_="name").text - - -def _num(game_li): - return game_li.find(text="Game: ").next.text - - -def _price(game_li): - return int(game_li.find(class_="price").text.replace("$", "")) - - -def _odds(game_li): - odds = game_li.find(class_="probability").text - return float(odds) - - -def _num_tx(game_li): - return int(sum(p["available"] + p["claimed"] for p in _prizes(game_li)) * _odds(game_li)) - - -def _prizes(game_li): - table = game_li.find("table") - rows = table.find_all("tr")[1:] - prizes = [] - for row in rows: - cells = row.find_all("td") - prize = cells[0].text - value = float(re.sub(r"[\$,]", "", prize)) - available = int(cells[2].text) - claimed = int(cells[1].text) - available - prizes.append( - {"prize": prize, "value": value, "available": available, "claimed": claimed} - ) - return prizes - - -def _how_to_play(game_li): - return h.handle(str(game_li.find(class_="how-to-play"))) - - -def games(requests): - # Headless needed to run on server with no display - options = webdriver.firefox.options.Options() - options.headless = True - driver = webdriver.Firefox(options=options) - driver.get(INDEX_URL) - html = driver.page_source - soup = bs(html, "lxml") - game_lis = soup.find_all("li", class_="ticket") - games = [ - { - "name": _name(game_li), - "game_id": _num(game_li), - "url": BASE_INDEX_URL, - "how_to_play": _how_to_play(game_li), - "price": _price(game_li), - "state": "md", - "num_tx_initial": _num_tx(game_li), - "prizes": _prizes(game_li), - } - for game_li in game_lis - ] - return games - - -def fetch_games(): - result_games = [] - for game in games(s)[:2]: - print("fetch_games!", game) - result_games.append(game) - return result_games - - -# def save_game(game): -# with client.ServerProxy(DB_REPO_URI) as c: -# logger.debug("Saving game: {} - {}".format(game["game_id"], game["name"])) -# c.persist([game]) - - -def main(): - print('inside main') - # logger.info("Saving games to {}".format(DB_REPO_URI)) - for game in fetch_games(): - print("main!", game) - - -if __name__ == "__main__": - main()