Files
cfpresume/cfpresume/base.py
2022-07-24 16:26:21 -05:00

68 lines
1.7 KiB
Python

import csv
import os
import pathlib
import typing
import playwright.sync_api
ScoreList = typing.List[typing.Dict[str, typing.Union[str, float]]]
HEADLESS = False if os.getenv("DEBUG") else False
class BaseLoader:
PAGE_URL: str
FILENAME: str
#################
# Table Parsing #
#################
def _data_by_row(self, table):
rows = [
[td.inner_text() for td in tr.query_selector_all("td")]
for tr in table.query_selector_all("tr")
]
return rows
def _find_tables(self, page):
page.wait_for_selector("table")
tables = page.query_selector_all("table")
return tables
def _column_from_table(self, table, index):
return [row[index] for row in self._data_by_row(table) if row]
################
# Base Helpers #
################
def _data_from_page(self, page):
raise NotImplementedError
def _get_page_data(self):
with playwright.sync_api.sync_playwright() as p:
browser = p.chromium.launch(headless=HEADLESS)
page = browser.new_page()
page.goto(self.PAGE_URL)
content = self._data_from_page(page)
browser.close()
return content
##################
# Public Methods #
##################
def get_data(
self,
) -> ScoreList:
return self._get_page_data()
def write_data(self, data):
with pathlib.Path(f"{self.FILENAME}.csv").open("w") as fo:
dw = csv.DictWriter(fo, fieldnames=["Team", "Value"])
dw.writeheader()
dw.writerows(data)
def update(self):
return self.write_data(self.get_data())