new script parse caixabank account movements

This commit is contained in:
Roger Oriol
2025-10-11 15:11:15 +02:00
parent b15550cefa
commit 77f3a31b02

View File

@@ -152,8 +152,7 @@ async def get_beancount_price_statements(r4_report: str) -> str:
result = None
async for message in query(
prompt="Convert this financial account movements table to "
f"beancount price statements:\n{
r4_report}",
f"beancount price statements:\n{r4_report}",
options=options
):
if isinstance(message, ResultMessage) and message.subtype == "success":
@@ -170,8 +169,8 @@ async def get_beancount_price_statements(r4_report: str) -> str:
def parse_response(beancount_statements: str):
"""
The input beancount statements might be inside a markdown beancount code block
or in plain text.
The input beancount statements might be inside a markdown beancount
code block or in plain text.
"""
import re
@@ -185,45 +184,122 @@ def parse_response(beancount_statements: str):
return content
def save_statements(beancount_statements: str):
def extract_last_balances_by_month(markdown_report: str) -> dict:
"""
The statements are saved in a beancount file in ledger/transactions/YYYY/MM.beancount.
The year and month are extracted from the first beancount statement in the input.
The file is created if it doesn't exist or the statements are appended to the
end of the file if it already exists.
Extract the last transaction date and balance for each month from the markdown table.
Returns a dict with keys as (year, month) tuples and values as (date, balance) tuples.
"""
import re
from collections import defaultdict
lines = markdown_report.split('\n')
transactions_by_month = {}
for line in lines:
if '|' not in line:
continue
parts = line.split('|')
if len(parts) < 7:
continue
fecha_col = parts[1].strip()
saldo_col = parts[6].strip()
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col)
if date_match and saldo_col:
try:
balance = saldo_col.replace(',', '').replace(' ', '')
float(balance)
year = date_match.group(1)
month = date_match.group(2)
date = f"{year}-{month}-{date_match.group(3)}"
month_key = (year, month)
if month_key not in transactions_by_month or date > transactions_by_month[month_key][0]:
transactions_by_month[month_key] = (date, balance)
except ValueError:
continue
return transactions_by_month
def save_statements(
beancount_statements: str,
last_balances_by_month: dict
):
"""
The statements are saved in beancount files in
ledger/transactions/YYYY/MM.beancount.
Statements are sorted chronologically and split by month if they
span multiple months.
A balance assertion is added at the end of each month's file.
"""
import re
from pathlib import Path
from collections import defaultdict
if not beancount_statements.strip():
print("Warning: No valid statements to save")
return
# Extract date from first statement (format: YYYY-MM-DD price ...)
first_line = beancount_statements.strip().split('\n')[0]
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', first_line)
lines = beancount_statements.strip().split('\n')
if not date_match:
print(f"Error: Could not extract date from first statement: {
first_line}")
return
transactions = []
current_transaction = []
year = date_match.group(1)
month = date_match.group(2)
for line in lines:
if re.match(r'^\d{4}-\d{2}-\d{2}', line):
if current_transaction:
transactions.append('\n'.join(current_transaction))
current_transaction = [line]
elif current_transaction:
current_transaction.append(line)
# Create directory structure if it doesn't exist
output_dir = Path(f"ledger/transactions/{year}")
output_dir.mkdir(parents=True, exist_ok=True)
if current_transaction:
transactions.append('\n'.join(current_transaction))
# Define output file path
output_file = output_dir / f"{month}.beancount"
transactions.sort(key=lambda t: re.match(
r'^(\d{4}-\d{2}-\d{2})', t).group(1))
# Append statements to file (create if doesn't exist)
with open(output_file, 'a') as f:
f.write(beancount_statements)
f.write('\n')
transactions_by_month = defaultdict(list)
for transaction in transactions:
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction)
if date_match:
year = date_match.group(1)
month = date_match.group(2)
key = (year, month)
transactions_by_month[key].append(transaction)
print(f"Saved price statements to {output_file}")
for (year, month), month_transactions in sorted(transactions_by_month.items()):
output_dir = Path(f"ledger/transactions/{year}")
output_dir.mkdir(parents=True, exist_ok=True)
output_file = output_dir / f"{month}.beancount"
existing_content = ""
if output_file.exists():
with open(output_file, 'r') as f:
existing_content = f.read()
with open(output_file, 'w') as f:
if existing_content:
f.write(existing_content)
if not existing_content.endswith('\n'):
f.write('\n')
f.write('\n'.join(month_transactions))
f.write('\n')
month_key = (year, month)
if month_key in last_balances_by_month:
last_date, last_balance = last_balances_by_month[month_key]
if last_date and last_balance:
f.write(f'\n{last_date} balance Assets:Liquid:Caixabank:Corrent {
last_balance} EUR\n')
print(f"Saved statements to {output_file}")
def convert_file_to_markdown(path: str):
@@ -248,7 +324,8 @@ async def main():
print(f"Final result: \n{beancount_statements}")
clean_beancount_statements = parse_response(beancount_statements)
save_statements(clean_beancount_statements)
last_balances_by_month = extract_last_balances_by_month(markdown_report)
save_statements(clean_beancount_statements, last_balances_by_month)
if __name__ == "__main__":