68 lines
1.7 KiB
Python
68 lines
1.7 KiB
Python
import csv
|
|
import os
|
|
import pathlib
|
|
import typing
|
|
|
|
import playwright.sync_api
|
|
|
|
ScoreList = typing.List[typing.Dict[str, typing.Union[str, float]]]
|
|
HEADLESS = False if os.getenv("DEBUG") else False
|
|
|
|
|
|
class BaseLoader:
|
|
PAGE_URL: str
|
|
FILENAME: str
|
|
|
|
#################
|
|
# Table Parsing #
|
|
#################
|
|
|
|
def _data_by_row(self, table):
|
|
rows = [
|
|
[td.inner_text() for td in tr.query_selector_all("td")]
|
|
for tr in table.query_selector_all("tr")
|
|
]
|
|
return rows
|
|
|
|
def _find_tables(self, page):
|
|
page.wait_for_selector("table")
|
|
tables = page.query_selector_all("table")
|
|
return tables
|
|
|
|
def _column_from_table(self, table, index):
|
|
return [row[index] for row in self._data_by_row(table) if row]
|
|
|
|
################
|
|
# Base Helpers #
|
|
################
|
|
|
|
def _data_from_page(self, page):
|
|
raise NotImplementedError
|
|
|
|
def _get_page_data(self):
|
|
with playwright.sync_api.sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=HEADLESS)
|
|
page = browser.new_page()
|
|
page.goto(self.PAGE_URL)
|
|
content = self._data_from_page(page)
|
|
browser.close()
|
|
return content
|
|
|
|
##################
|
|
# Public Methods #
|
|
##################
|
|
|
|
def get_data(
|
|
self,
|
|
) -> ScoreList:
|
|
return self._get_page_data()
|
|
|
|
def write_data(self, data):
|
|
with pathlib.Path(f"{self.FILENAME}.csv").open("w") as fo:
|
|
dw = csv.DictWriter(fo, fieldnames=["Team", "Value"])
|
|
dw.writeheader()
|
|
dw.writerows(data)
|
|
|
|
def update(self):
|
|
return self.write_data(self.get_data())
|