From bb1014f5416599f12ce68e0c3e7b31d59fc043c1 Mon Sep 17 00:00:00 2001 From: tdhood Date: Sat, 6 May 2023 15:55:45 -0700 Subject: [PATCH] fixed issue --- lottery_data_scraper/connecticut.py | 32 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/lottery_data_scraper/connecticut.py b/lottery_data_scraper/connecticut.py index cd4ba19..35ad1a1 100644 --- a/lottery_data_scraper/connecticut.py +++ b/lottery_data_scraper/connecticut.py @@ -8,7 +8,7 @@ from xmlrpc import client from bs4 import BeautifulSoup as bs import html2text import requests -from lottery_data_scraper.schemas import GameSchema +from lottery_data_scraper.schemas import GameSchema from lottery_data_scraper.util import fetch_html logger = logging.getLogger(__name__) @@ -28,7 +28,6 @@ headers = { } - def get_games_urls(url): html = fetch_html(url) soup = bs(html, "lxml") @@ -37,6 +36,7 @@ def get_games_urls(url): game_urls = list(map(lambda x: BASE + x.attrs["href"], game_hrefs)) return game_urls + def parse_game(game_url): # Each game page has two tables # Table 1: Ticket Price, Num_Tx_remaining, Odds @@ -46,22 +46,23 @@ def parse_game(game_url): game_soup = bs(game_html, "lxml") name = game_soup.find("h2").text - game_id = re.match(r"GAME #(\d*)",game_soup.find(class_="heading-sub-info").text).group(1) + game_id = re.match( + r"GAME #(\d*)", game_soup.find(class_="heading-sub-info").text + ).group(1) - #soup for table 1 + # soup for table 1 table_one = game_soup.find(class_="img-detail-block") price = int(re.search(r"Ticket Price:\$(\d*)", table_one.text).group(1)) - - num_tx_str = re.search(r"Total # of Tickets:([\d*][,\d*]+)", table_one.text).group(1) - num_tx_initial = int(num_tx_str.replace(",", "")) + num_tx_str = re.search(r"Total # of Tickets:([\d*][,\d*]+)", table_one.text).group( + 1 + ) + num_tx_initial = int(num_tx_str.replace(",", "")) - #soup for table 2 + # soup for table 2 table_two = game_soup.find(class_="unclaimed-prize-wrap") - prize_rows = ( - table_two.find("tbody").find_all("tr") - ) + prize_rows = table_two.find("tbody").find_all("tr") prizes = [] for row in prize_rows: prize, total, available = [r.text for r in row.find_all("td")] @@ -86,7 +87,7 @@ def parse_game(game_url): ) how_to_play_soup = game_soup.find(class_="play-text-wrap") - #remove heading and button tags + # remove heading and button tags how_to_play_soup.h3.extract() how_to_play_soup.a.extract() @@ -104,10 +105,11 @@ def parse_game(game_url): "prizes": prizes, "num_tx_initial": num_tx_initial, "how_to_play": how_to_play, - "image_urls": image_urls + "image_urls": image_urls, } return game + def main(): games_urls = get_games_urls(INDEX) games = [] @@ -116,7 +118,8 @@ def main(): game = parse_game(game) except Exception as e: logger.error("Unable to parse game {}.\n{}".format(game, e)) - games.append(game) + continue + games.append(game) return games @@ -124,4 +127,3 @@ if __name__ == "__main__": games = main() schema = GameSchema(many=True) print(schema.dumps(games)) -