optimize download -> parse process

This commit is contained in:
Arian Nasr
2025-12-11 04:34:06 -05:00
parent 25469a54b3
commit 0997005994
4 changed files with 16 additions and 16 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@
.venv/ .venv/
usage_data.db usage_data.db
__pycache__/ __pycache__/
usage_data.db.bak

View File

@@ -1,9 +1,9 @@
from pathlib import Path
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
from schemas import DownloadParameters from schemas import DownloadParameters
from io import BytesIO
async def download_xml_files(params: DownloadParameters) -> Path: async def download_xml_files(params: DownloadParameters) -> BytesIO:
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.chromium.launch(headless=True) browser = await p.chromium.launch(headless=True)
page = await browser.new_page() page = await browser.new_page()
@@ -60,7 +60,12 @@ async def download_xml_files(params: DownloadParameters) -> Path:
# Perform the action that initiates download # Perform the action that initiates download
await download_button.click() await download_button.click()
download = await download_info.value download = await download_info.value
download_path = params.output_dir / download.suggested_filename
await download.save_as(download_path) filepath = await download.path()
with open(filepath, 'rb') as f:
xml_file = BytesIO(f.read())
await download.delete()
await browser.close() await browser.close()
return download_path
return xml_file

13
main.py
View File

@@ -1,15 +1,14 @@
import shutil
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
from time import sleep from time import sleep
from datetime import datetime, timedelta from datetime import datetime, timedelta
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
from pathlib import Path
from greenbutton import parse from greenbutton import parse
from db_connector import insert_usage_data from db_connector import insert_usage_data
from sqlite3 import Connection from sqlite3 import Connection
from schemas import DatabaseRecord, DownloadParameters from schemas import DatabaseRecord, DownloadParameters
import asyncio import asyncio
from io import BytesIO
load_dotenv() load_dotenv()
@@ -17,7 +16,6 @@ account_name = os.getenv("ALECTRA_ACCOUNT_NAME")
account_number = os.getenv("ALECTRA_ACCOUNT_NUMBER") account_number = os.getenv("ALECTRA_ACCOUNT_NUMBER")
account_phone = os.getenv("ALECTRA_ACCOUNT_PHONE") account_phone = os.getenv("ALECTRA_ACCOUNT_PHONE")
db_path = os.getenv("USAGE_DB_PATH", "./usage_data.db") db_path = os.getenv("USAGE_DB_PATH", "./usage_data.db")
xml_download_dir = os.getenv("XML_DOWNLOAD_DIR", "./downloads")
def calculate_dates_for_retrieval(bill_start_date: str, bill_end_date: str) -> tuple[datetime, datetime, datetime, datetime]: def calculate_dates_for_retrieval(bill_start_date: str, bill_end_date: str) -> tuple[datetime, datetime, datetime, datetime]:
@@ -36,7 +34,7 @@ def calculate_dates_for_retrieval(bill_start_date: str, bill_end_date: str) -> t
return utc_retrieval_start_date, utc_retrieval_end_date, utc_bill_start_date, utc_bill_end_date return utc_retrieval_start_date, utc_retrieval_end_date, utc_bill_start_date, utc_bill_end_date
def process_xml_file(conn: Connection, xml_file: Path): def process_xml_file(conn: Connection, xml_file: BytesIO):
def get_correct_meter_reading(meter_readings): def get_correct_meter_reading(meter_readings):
for meterReading in meter_readings: for meterReading in meter_readings:
@@ -71,7 +69,6 @@ def get_dates_last_2_weeks() -> tuple[datetime, datetime]:
if __name__ == "__main__": if __name__ == "__main__":
while True: while True:
Path(xml_download_dir).mkdir(parents=True, exist_ok=True)
from download_xml import download_xml_files from download_xml import download_xml_files
from db_connector import connect_db, initialize_database from db_connector import connect_db, initialize_database
@@ -80,22 +77,20 @@ if __name__ == "__main__":
download_params = DownloadParameters( download_params = DownloadParameters(
start_date=start_date, start_date=start_date,
end_date=end_date, end_date=end_date,
output_dir=Path(xml_download_dir),
account_name=account_name, account_name=account_name,
account_number=account_number, account_number=account_number,
account_phone=account_phone account_phone=account_phone
) )
xml_file_path = asyncio.run(download_xml_files(download_params)) xml_file = asyncio.run(download_xml_files(download_params))
conn = connect_db(db_path) conn = connect_db(db_path)
initialize_database(conn) initialize_database(conn)
process_xml_file(conn, xml_file_path) process_xml_file(conn, xml_file)
conn.close() conn.close()
shutil.rmtree(xml_download_dir)
print(f"Processed data from {start_date} to {end_date}. Waiting for next cycle...") print(f"Processed data from {start_date} to {end_date}. Waiting for next cycle...")
sleep(4 * 60 * 60) sleep(4 * 60 * 60)

View File

@@ -1,11 +1,10 @@
from pydantic import BaseModel, DirectoryPath from pydantic import BaseModel
from datetime import datetime from datetime import datetime
class DownloadParameters(BaseModel): class DownloadParameters(BaseModel):
start_date: datetime start_date: datetime
end_date: datetime end_date: datetime
output_dir: DirectoryPath
account_name: str account_name: str
account_number: str account_number: str
account_phone: str account_phone: str