From 77f3a31b0234ed68913b1952b31c3b40584b44b3 Mon Sep 17 00:00:00 2001 From: Roger Oriol Date: Sat, 11 Oct 2025 15:11:15 +0200 Subject: [PATCH] new script parse caixabank account movements --- commands/parse-caixabank-account-movements | 135 ++++++++++++++++----- 1 file changed, 106 insertions(+), 29 deletions(-) diff --git a/commands/parse-caixabank-account-movements b/commands/parse-caixabank-account-movements index 0dc2ea5..78a65c8 100755 --- a/commands/parse-caixabank-account-movements +++ b/commands/parse-caixabank-account-movements @@ -152,8 +152,7 @@ async def get_beancount_price_statements(r4_report: str) -> str: result = None async for message in query( prompt="Convert this financial account movements table to " - f"beancount price statements:\n{ - r4_report}", + f"beancount price statements:\n{r4_report}", options=options ): if isinstance(message, ResultMessage) and message.subtype == "success": @@ -170,8 +169,8 @@ async def get_beancount_price_statements(r4_report: str) -> str: def parse_response(beancount_statements: str): """ - The input beancount statements might be inside a markdown beancount code block - or in plain text. + The input beancount statements might be inside a markdown beancount + code block or in plain text. """ import re @@ -185,45 +184,122 @@ def parse_response(beancount_statements: str): return content -def save_statements(beancount_statements: str): +def extract_last_balances_by_month(markdown_report: str) -> dict: """ - The statements are saved in a beancount file in ledger/transactions/YYYY/MM.beancount. - The year and month are extracted from the first beancount statement in the input. - The file is created if it doesn't exist or the statements are appended to the - end of the file if it already exists. + Extract the last transaction date and balance for each month from the markdown table. + Returns a dict with keys as (year, month) tuples and values as (date, balance) tuples. + """ + import re + from collections import defaultdict + + lines = markdown_report.split('\n') + + transactions_by_month = {} + + for line in lines: + if '|' not in line: + continue + + parts = line.split('|') + if len(parts) < 7: + continue + + fecha_col = parts[1].strip() + saldo_col = parts[6].strip() + + date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col) + if date_match and saldo_col: + try: + balance = saldo_col.replace(',', '').replace(' ', '') + float(balance) + + year = date_match.group(1) + month = date_match.group(2) + date = f"{year}-{month}-{date_match.group(3)}" + month_key = (year, month) + + if month_key not in transactions_by_month or date > transactions_by_month[month_key][0]: + transactions_by_month[month_key] = (date, balance) + except ValueError: + continue + + return transactions_by_month + + +def save_statements( + beancount_statements: str, + last_balances_by_month: dict +): + """ + The statements are saved in beancount files in + ledger/transactions/YYYY/MM.beancount. + Statements are sorted chronologically and split by month if they + span multiple months. + A balance assertion is added at the end of each month's file. """ import re from pathlib import Path + from collections import defaultdict if not beancount_statements.strip(): print("Warning: No valid statements to save") return - # Extract date from first statement (format: YYYY-MM-DD price ...) - first_line = beancount_statements.strip().split('\n')[0] - date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', first_line) + lines = beancount_statements.strip().split('\n') - if not date_match: - print(f"Error: Could not extract date from first statement: { - first_line}") - return + transactions = [] + current_transaction = [] - year = date_match.group(1) - month = date_match.group(2) + for line in lines: + if re.match(r'^\d{4}-\d{2}-\d{2}', line): + if current_transaction: + transactions.append('\n'.join(current_transaction)) + current_transaction = [line] + elif current_transaction: + current_transaction.append(line) - # Create directory structure if it doesn't exist - output_dir = Path(f"ledger/transactions/{year}") - output_dir.mkdir(parents=True, exist_ok=True) + if current_transaction: + transactions.append('\n'.join(current_transaction)) - # Define output file path - output_file = output_dir / f"{month}.beancount" + transactions.sort(key=lambda t: re.match( + r'^(\d{4}-\d{2}-\d{2})', t).group(1)) - # Append statements to file (create if doesn't exist) - with open(output_file, 'a') as f: - f.write(beancount_statements) - f.write('\n') + transactions_by_month = defaultdict(list) + for transaction in transactions: + date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction) + if date_match: + year = date_match.group(1) + month = date_match.group(2) + key = (year, month) + transactions_by_month[key].append(transaction) - print(f"Saved price statements to {output_file}") + for (year, month), month_transactions in sorted(transactions_by_month.items()): + output_dir = Path(f"ledger/transactions/{year}") + output_dir.mkdir(parents=True, exist_ok=True) + + output_file = output_dir / f"{month}.beancount" + + existing_content = "" + if output_file.exists(): + with open(output_file, 'r') as f: + existing_content = f.read() + + with open(output_file, 'w') as f: + if existing_content: + f.write(existing_content) + if not existing_content.endswith('\n'): + f.write('\n') + f.write('\n'.join(month_transactions)) + f.write('\n') + + month_key = (year, month) + if month_key in last_balances_by_month: + last_date, last_balance = last_balances_by_month[month_key] + if last_date and last_balance: + f.write(f'\n{last_date} balance Assets:Liquid:Caixabank:Corrent { + last_balance} EUR\n') + + print(f"Saved statements to {output_file}") def convert_file_to_markdown(path: str): @@ -248,7 +324,8 @@ async def main(): print(f"Final result: \n{beancount_statements}") clean_beancount_statements = parse_response(beancount_statements) - save_statements(clean_beancount_statements) + last_balances_by_month = extract_last_balances_by_month(markdown_report) + save_statements(clean_beancount_statements, last_balances_by_month) if __name__ == "__main__":