optimize download -> parse process
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@
|
|||||||
.venv/
|
.venv/
|
||||||
usage_data.db
|
usage_data.db
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
usage_data.db.bak
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
from pathlib import Path
|
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
from schemas import DownloadParameters
|
from schemas import DownloadParameters
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
async def download_xml_files(params: DownloadParameters) -> Path:
|
async def download_xml_files(params: DownloadParameters) -> BytesIO:
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
browser = await p.chromium.launch(headless=True)
|
browser = await p.chromium.launch(headless=True)
|
||||||
page = await browser.new_page()
|
page = await browser.new_page()
|
||||||
@@ -60,7 +60,12 @@ async def download_xml_files(params: DownloadParameters) -> Path:
|
|||||||
# Perform the action that initiates download
|
# Perform the action that initiates download
|
||||||
await download_button.click()
|
await download_button.click()
|
||||||
download = await download_info.value
|
download = await download_info.value
|
||||||
download_path = params.output_dir / download.suggested_filename
|
|
||||||
await download.save_as(download_path)
|
filepath = await download.path()
|
||||||
|
with open(filepath, 'rb') as f:
|
||||||
|
xml_file = BytesIO(f.read())
|
||||||
|
|
||||||
|
await download.delete()
|
||||||
await browser.close()
|
await browser.close()
|
||||||
return download_path
|
|
||||||
|
return xml_file
|
||||||
13
main.py
13
main.py
@@ -1,15 +1,14 @@
|
|||||||
import shutil
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
from time import sleep
|
from time import sleep
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
from pathlib import Path
|
|
||||||
from greenbutton import parse
|
from greenbutton import parse
|
||||||
from db_connector import insert_usage_data
|
from db_connector import insert_usage_data
|
||||||
from sqlite3 import Connection
|
from sqlite3 import Connection
|
||||||
from schemas import DatabaseRecord, DownloadParameters
|
from schemas import DatabaseRecord, DownloadParameters
|
||||||
import asyncio
|
import asyncio
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -17,7 +16,6 @@ account_name = os.getenv("ALECTRA_ACCOUNT_NAME")
|
|||||||
account_number = os.getenv("ALECTRA_ACCOUNT_NUMBER")
|
account_number = os.getenv("ALECTRA_ACCOUNT_NUMBER")
|
||||||
account_phone = os.getenv("ALECTRA_ACCOUNT_PHONE")
|
account_phone = os.getenv("ALECTRA_ACCOUNT_PHONE")
|
||||||
db_path = os.getenv("USAGE_DB_PATH", "./usage_data.db")
|
db_path = os.getenv("USAGE_DB_PATH", "./usage_data.db")
|
||||||
xml_download_dir = os.getenv("XML_DOWNLOAD_DIR", "./downloads")
|
|
||||||
|
|
||||||
def calculate_dates_for_retrieval(bill_start_date: str, bill_end_date: str) -> tuple[datetime, datetime, datetime, datetime]:
|
def calculate_dates_for_retrieval(bill_start_date: str, bill_end_date: str) -> tuple[datetime, datetime, datetime, datetime]:
|
||||||
|
|
||||||
@@ -36,7 +34,7 @@ def calculate_dates_for_retrieval(bill_start_date: str, bill_end_date: str) -> t
|
|||||||
|
|
||||||
return utc_retrieval_start_date, utc_retrieval_end_date, utc_bill_start_date, utc_bill_end_date
|
return utc_retrieval_start_date, utc_retrieval_end_date, utc_bill_start_date, utc_bill_end_date
|
||||||
|
|
||||||
def process_xml_file(conn: Connection, xml_file: Path):
|
def process_xml_file(conn: Connection, xml_file: BytesIO):
|
||||||
|
|
||||||
def get_correct_meter_reading(meter_readings):
|
def get_correct_meter_reading(meter_readings):
|
||||||
for meterReading in meter_readings:
|
for meterReading in meter_readings:
|
||||||
@@ -71,7 +69,6 @@ def get_dates_last_2_weeks() -> tuple[datetime, datetime]:
|
|||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
while True:
|
while True:
|
||||||
Path(xml_download_dir).mkdir(parents=True, exist_ok=True)
|
|
||||||
from download_xml import download_xml_files
|
from download_xml import download_xml_files
|
||||||
from db_connector import connect_db, initialize_database
|
from db_connector import connect_db, initialize_database
|
||||||
|
|
||||||
@@ -80,22 +77,20 @@ if __name__ == "__main__":
|
|||||||
download_params = DownloadParameters(
|
download_params = DownloadParameters(
|
||||||
start_date=start_date,
|
start_date=start_date,
|
||||||
end_date=end_date,
|
end_date=end_date,
|
||||||
output_dir=Path(xml_download_dir),
|
|
||||||
account_name=account_name,
|
account_name=account_name,
|
||||||
account_number=account_number,
|
account_number=account_number,
|
||||||
account_phone=account_phone
|
account_phone=account_phone
|
||||||
)
|
)
|
||||||
|
|
||||||
xml_file_path = asyncio.run(download_xml_files(download_params))
|
xml_file = asyncio.run(download_xml_files(download_params))
|
||||||
|
|
||||||
|
|
||||||
conn = connect_db(db_path)
|
conn = connect_db(db_path)
|
||||||
initialize_database(conn)
|
initialize_database(conn)
|
||||||
|
|
||||||
process_xml_file(conn, xml_file_path)
|
process_xml_file(conn, xml_file)
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
shutil.rmtree(xml_download_dir)
|
|
||||||
print(f"Processed data from {start_date} to {end_date}. Waiting for next cycle...")
|
print(f"Processed data from {start_date} to {end_date}. Waiting for next cycle...")
|
||||||
sleep(4 * 60 * 60)
|
sleep(4 * 60 * 60)
|
||||||
@@ -1,11 +1,10 @@
|
|||||||
from pydantic import BaseModel, DirectoryPath
|
from pydantic import BaseModel
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class DownloadParameters(BaseModel):
|
class DownloadParameters(BaseModel):
|
||||||
start_date: datetime
|
start_date: datetime
|
||||||
end_date: datetime
|
end_date: datetime
|
||||||
output_dir: DirectoryPath
|
|
||||||
account_name: str
|
account_name: str
|
||||||
account_number: str
|
account_number: str
|
||||||
account_phone: str
|
account_phone: str
|
||||||
|
|||||||
Reference in New Issue
Block a user