From e20bad2d386492734bccb71c2761ff3e5b7a5cff Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Thu, 20 Apr 2023 22:36:31 -0700 Subject: [PATCH 1/5] Add parsing for California --- lottery_data_scraper/california.py | 59 ++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 lottery_data_scraper/california.py diff --git a/lottery_data_scraper/california.py b/lottery_data_scraper/california.py new file mode 100644 index 0000000..b38fd38 --- /dev/null +++ b/lottery_data_scraper/california.py @@ -0,0 +1,59 @@ +import locale +import logging +import json +import requests +import html2text + +from lottery_data_scraper.schemas import GameSchema +from lottery_data_scraper.util import fetch_html + +# Set local for currency conversion and formatting +# because California only gives prize values and our schema +# expects a string representation of the prize. +# https://docs.python.org/3/library/locale.html +locale.setlocale(locale.LC_ALL, 'en_US.utf8') + +logger = logging.getLogger(__name__) +h = html2text.HTML2Text() + +BASE_URL = "https://www.calottery.com" +SCRATCHER_URL = "https://www.calottery.com/api/games/scratchers" + +def num_tx_initial(game): + grand_prize = game["topPrizeTier"] + return grand_prize["odds"] * grand_prize["totalNumberOfPrizes"] + + +def fetch_games(): + response = json.loads(fetch_html(SCRATCHER_URL)) + games = [] + for game_ in response["games"]: + prizes = [] + for prize_ in game_["prizeTiers"]: + prize = { + "available": prize_["numberOfPrizesPending"], + "claimed": prize_["numberOfPrizesCashed"], + "value": prize_["value"], + "prize": locale.currency(prize_["value"], grouping=True)[:-3] # -3 to drop the cents + } + prizes.append(prize) + game = { + "game_id": game_["gameNumber"], + "name": game_["name"], + "desription": h.handle(game_["description"]), + "image_urls": [game_["unScratchedImage"], game_["scratchedImage"]], + "how_to_play": h.handle(game_["howToPlay"]), + "num_tx_initial": sum(prize["available"] + prize["claimed"] for prize in prizes), + "price": game_["price"], + "prizes": prizes, + "state": "tx", + "url": BASE_URL + game_["productPage"], + } + games.append(game) + return games + + +if __name__ == "__main__": + games = fetch_games() + schema = GameSchema(many=True) + print(schema.dumps(games)) From 1bd13cb7fc492c088bfc3f92ff1f349f3b857788 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Sun, 23 Apr 2023 15:47:03 -0700 Subject: [PATCH 2/5] Correctly calculate total number of prizes --- lottery_data_scraper/california.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lottery_data_scraper/california.py b/lottery_data_scraper/california.py index b38fd38..aeb7027 100644 --- a/lottery_data_scraper/california.py +++ b/lottery_data_scraper/california.py @@ -1,6 +1,7 @@ import locale import logging import json +import operator import requests import html2text @@ -37,13 +38,14 @@ def fetch_games(): "prize": locale.currency(prize_["value"], grouping=True)[:-3] # -3 to drop the cents } prizes.append(prize) + grand_prize = sorted(game_["prizeTiers"], key=operator.itemgetter("value"))[-1] game = { "game_id": game_["gameNumber"], "name": game_["name"], "desription": h.handle(game_["description"]), "image_urls": [game_["unScratchedImage"], game_["scratchedImage"]], "how_to_play": h.handle(game_["howToPlay"]), - "num_tx_initial": sum(prize["available"] + prize["claimed"] for prize in prizes), + "num_tx_initial": grand_prize["odds"] * grand_prize["totalNumberOfPrizes"], "price": game_["price"], "prizes": prizes, "state": "tx", From db49b62a46c9e0ac4563c4fc5bdfa916c4035d41 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Sun, 23 Apr 2023 15:55:17 -0700 Subject: [PATCH 3/5] Add test for california --- tests/test_california.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/test_california.py diff --git a/tests/test_california.py b/tests/test_california.py new file mode 100644 index 0000000..d757fd0 --- /dev/null +++ b/tests/test_california.py @@ -0,0 +1,12 @@ +import json +import subprocess +import unittest + +from lottery_data_scraper.util import fetch_html + +class TestCalifornia(unittest.TestCase): + def test_all(self): + result = subprocess.run(["python3", "-m", "lottery_data_scraper.california"], capture_output=True) + data = json.loads(result.stdout) + self.assertEqual(data[0]["game_id"], "1405", "Expected the first game to be PAC-MAN, #1405.") + self.assertEqual(data[0]["num_tx_initial"], 37080000, "Expected 37,080,000 tickets for PAC-MAN #1405.") From 426906cadbf50f8d969804589797b013e0f84850 Mon Sep 17 00:00:00 2001 From: Eric Ihli Date: Sun, 23 Apr 2023 15:56:47 -0700 Subject: [PATCH 4/5] Format with black and remove unused imports --- lottery_data_scraper/california.py | 8 +++++--- tests/test_california.py | 15 +++++++++++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/lottery_data_scraper/california.py b/lottery_data_scraper/california.py index aeb7027..468308f 100644 --- a/lottery_data_scraper/california.py +++ b/lottery_data_scraper/california.py @@ -2,7 +2,6 @@ import locale import logging import json import operator -import requests import html2text from lottery_data_scraper.schemas import GameSchema @@ -12,7 +11,7 @@ from lottery_data_scraper.util import fetch_html # because California only gives prize values and our schema # expects a string representation of the prize. # https://docs.python.org/3/library/locale.html -locale.setlocale(locale.LC_ALL, 'en_US.utf8') +locale.setlocale(locale.LC_ALL, "en_US.utf8") logger = logging.getLogger(__name__) h = html2text.HTML2Text() @@ -20,6 +19,7 @@ h = html2text.HTML2Text() BASE_URL = "https://www.calottery.com" SCRATCHER_URL = "https://www.calottery.com/api/games/scratchers" + def num_tx_initial(game): grand_prize = game["topPrizeTier"] return grand_prize["odds"] * grand_prize["totalNumberOfPrizes"] @@ -35,7 +35,9 @@ def fetch_games(): "available": prize_["numberOfPrizesPending"], "claimed": prize_["numberOfPrizesCashed"], "value": prize_["value"], - "prize": locale.currency(prize_["value"], grouping=True)[:-3] # -3 to drop the cents + "prize": locale.currency(prize_["value"], grouping=True)[ + :-3 + ], # -3 to drop the cents } prizes.append(prize) grand_prize = sorted(game_["prizeTiers"], key=operator.itemgetter("value"))[-1] diff --git a/tests/test_california.py b/tests/test_california.py index d757fd0..b4ec1e9 100644 --- a/tests/test_california.py +++ b/tests/test_california.py @@ -2,11 +2,18 @@ import json import subprocess import unittest -from lottery_data_scraper.util import fetch_html class TestCalifornia(unittest.TestCase): def test_all(self): - result = subprocess.run(["python3", "-m", "lottery_data_scraper.california"], capture_output=True) + result = subprocess.run( + ["python3", "-m", "lottery_data_scraper.california"], capture_output=True + ) data = json.loads(result.stdout) - self.assertEqual(data[0]["game_id"], "1405", "Expected the first game to be PAC-MAN, #1405.") - self.assertEqual(data[0]["num_tx_initial"], 37080000, "Expected 37,080,000 tickets for PAC-MAN #1405.") + self.assertEqual( + data[0]["game_id"], "1405", "Expected the first game to be PAC-MAN, #1405." + ) + self.assertEqual( + data[0]["num_tx_initial"], + 37080000, + "Expected 37,080,000 tickets for PAC-MAN #1405.", + ) From d83d9151a742a284367b82ee2ed1de1b6b711099 Mon Sep 17 00:00:00 2001 From: tdhood Date: Mon, 1 May 2023 15:51:08 -0700 Subject: [PATCH 5/5] updated california --- lottery_data_scraper/california.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lottery_data_scraper/california.py b/lottery_data_scraper/california.py index 468308f..e252838 100644 --- a/lottery_data_scraper/california.py +++ b/lottery_data_scraper/california.py @@ -47,7 +47,7 @@ def fetch_games(): "desription": h.handle(game_["description"]), "image_urls": [game_["unScratchedImage"], game_["scratchedImage"]], "how_to_play": h.handle(game_["howToPlay"]), - "num_tx_initial": grand_prize["odds"] * grand_prize["totalNumberOfPrizes"], + "num_tx_initial": num_tx_initial(game_), "price": game_["price"], "prizes": prizes, "state": "tx",