#!/usr/bin/env python3 import os import argparse import asyncio import re from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage from docling.document_converter import DocumentConverter GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: R4 Movements to Beancount Parser You are a specialized financial transaction parser that converts R4 account movements into Beancount format. ## Input Format You will receive a table with the following columns: - **FECHA**: Transaction date (DD/MM/YYYY format) - **CONCEPTO**: Transaction description - **MOVIMIENTOS**: Amount (negative for expenses/debits, positive for income/credits) - **SALDO**: Account balance after transaction Example input: ``` | FECHA | CONCEPTO | MOVIMIENTOS | SALDO | |------------|----------------------------------------------|-------------|------------| | 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32 | | 11/10/2025 | TRANSF. RECIBIDA DE ROGER ORIOL PEREZ | 150 | -19971.3 | ``` Warning: The table might be incorrecly parsed and may appear split. For example: ``` | MOVIMIENTOS DEL PERIODO EN EUROS | |------------------------------------| | FECHA | | CONCEPTO | MOVIMIENTOS | SALDO | |----------------------------------------------|---------------|----------| | SALDO ANTERIOR | -19876.16 | -19876.2 | | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.3 | | INTERESES TERCER TRIMESTRE DE 2025 | -175.23 | -20051.5 | | SALDO FINAL | None | -19971.3 | | 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32 | |--------------|----------------------------------------------|---------|-------------| | 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -175.23 | -20051.5 | ``` You should interpet the previous table like this: ``` | MOVIMIENTOS DEL PERIODO EN EUROS | |------------------------------------| | FECHA | CONCEPTO | MOVIMIENTOS | SALDO | |-------------------------------------------------------------|---------------|----------| | | SALDO ANTERIOR | -19876.16 | -19876.2 | | 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.3 | | 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -175.23 | -20051.5 | | | SALDO FINAL | None | -19971.3 | ``` ## Output Format Convert each transaction into a Beancount entry with this structure: ``` YYYY-MM-DD * "Payee" "Description" Account1 AMOUNT EUR Account2 ``` ### Rules for Conversion 1. **Date**: Convert from DD/MM/YYYY to YYYY-MM-DD format 2. **Flag**: Always use `*` (cleared transaction) 3. **Payee**: Extract the main entity from "CONCEPTO" field 4. **Description**: Use the full "CONCEPTO" text as the description 5. **Amount**: Use the absolute value of "MOVIMIENTOS" 6. **Currency**: Always use EUR 7. **Source Account**: Always use `Liabilities:Credit:Renta4:PolissaCredit` as one of the accounts ### Account Classification **Available Expense Accounts:** - Expenses:R4:Comissions - Expenses:R4:Interessos **Available Income Accounts:** - Income:Invest:R4:Dividends - Income:Invest:R4:CapitalGains - Income:Invest:R4:CapitalGains:Untaxable **Available Tax Accounts:** - Expenses:Taxes:BeneficisDividends - Expenses:Taxes:BeneficisDividendsOrigen **Transfer Account:** - Assets:Liquid:Caixabank:Corrent ### Transaction Type Rules 1. **Commissions** (COMISION): Use `Expenses:R4:Comissions` 2. **Interest** (INTERESES): Use `Expenses:R4:Interessos` 3. **Dividends**: Use `Income:Invest:R4:Dividends` 4. **Capital Gains**: Use `Income:Invest:R4:CapitalGains` or `Income:Invest:R4:CapitalGains:Untaxable` 5. **Dividend Tax**: Use `Expenses:Taxes:BeneficisDividends` or `Expenses:Taxes:BeneficisDividendsOrigen` 6. **IVA**: Use `Expenses:R4:Comissions` 7. **Received Transfer from ROGER ORIOL PEREZ**: - Credit: `Liabilities:Credit:Renta4:PolissaCredit` with amount - Debit: `Assets:Liquid:Caixabank:Corrent` ### Special Cases - For negative amounts in MOVIMIENTOS: debit the expense account, credit `Liabilities:Credit:Renta4:PolissaCredit` - For positive amounts in MOVIMIENTOS: debit `Liabilities:Credit:Renta4:PolissaCredit`, credit the income account - For transfers from ROGER ORIOL PEREZ: debit `Liabilities:Credit:Renta4:PolissaCredit`, credit `Assets:Liquid:Caixabank:Corrent` ## Examples **Input:** ``` 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32 ``` **Output:** ``` 2025-10-01 * "R4" "INTERESES TERCER TRIMESTRE DE 2025" Expenses:R4:Interessos 0.16 EUR Liabilities:Credit:Renta4:PolissaCredit ``` **Input:** ``` 11/10/2025 | TRANSF. RECIBIDA DE ROGER ORIOL PEREZ | 150 | -19971.3 ``` **Output:** ``` 2025-10-11 * "ROGER ORIOL PEREZ" "TRANSF. RECIBIDA DE ROGER ORIOL PEREZ" Liabilities:Credit:Renta4:PolissaCredit 150.00 EUR Assets:Liquid:Caixabank:Corrent ``` ## Output Requirements - Process all transactions in the input table - Skip rows with "SALDO ANTERIOR" or "SALDO FINAL" in CONCEPTO - Maintain chronological order - Ensure proper indentation (2 spaces for posting lines) - Be consistent with account naming conventions - Only output Beancount code, explanations are not needed. ## Your Task Parse the provided R4 movements table and generate the corresponding Beancount statements. Output only the Beancount code. """ async def get_beancount_statements(markdown_report: str) -> str: options = ClaudeAgentOptions( system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT, cwd=os.getcwd() ) result = None async for message in query( prompt=f"Convert this R4 movements table to beancount statements:\n\n{ markdown_report}", options=options ): if isinstance(message, ResultMessage) and message.subtype == "success": result = message.result else: print(message) if result is not None and isinstance(result, str): return result else: raise ValueError( "Unable to get Beancount statements from the report!") def parse_response(beancount_statements: str): """ The input beancount statements might be inside a markdown beancount code block or in plain text. """ code_block_pattern = r'```(?:beancount)?\n(.*?)```' match = re.search(code_block_pattern, beancount_statements, re.DOTALL) if match: content = match.group(1) else: content = beancount_statements return content def extract_movements_table(markdown_report: str) -> str: """ Extract the MOVIMIENTOS DEL PERIODO EN EUROS table from the markdown. """ lines = markdown_report.split('\n') in_movements_section = False table_lines = [] for i, line in enumerate(lines): if 'MOVIMIENTOS DEL PERIODO EN EUROS' in line: in_movements_section = True continue if in_movements_section: if line.strip() and ('|' in line or line.startswith('FECHA')): table_lines.append(line) elif 'RESUMEN DE RESULTADOS' in line: break return '\n'.join(table_lines) def extract_balance_and_last_date(markdown_report: str) -> tuple[str, str]: """ Extract the final balance from SALDO FINAL row and the date of the last transaction. Returns (last_date, balance) tuple. """ lines = markdown_report.split('\n') balance = "" last_date = "" for line in lines: if '|' not in line: continue parts = line.split('|') if len(parts) >= 3 and 'SALDO FINAL' in line: balance_str = parts[-2].strip() balance = balance_str.replace(',', '.').replace(' ', '') if len(parts) >= 2: fecha_col = parts[1].strip() date_match = re.search(r'(\d{2})/(\d{2})/(\d{4})', fecha_col) if date_match: current_date = f"{date_match.group( 3)}-{date_match.group(2)}-{date_match.group(1)}" if not last_date or current_date > last_date: last_date = current_date return last_date, balance def save_statements(beancount_statements: str, last_date: str, balance: str): """ The statements are saved in beancount files in ledger/transactions/YYYY/MM.beancount. Statements are sorted chronologically and split by month if they span multiple months. A balance assertion is added at the end of the last month's file. """ from pathlib import Path from collections import defaultdict if not beancount_statements.strip(): print("Warning: No valid statements to save") return lines = beancount_statements.strip().split('\n') transactions = [] current_transaction = [] for line in lines: if re.match(r'^\d{4}-\d{2}-\d{2}', line): if current_transaction: transactions.append('\n'.join(current_transaction)) current_transaction = [line] elif current_transaction: current_transaction.append(line) if current_transaction: transactions.append('\n'.join(current_transaction)) transactions.sort(key=lambda t: re.match( r'^(\d{4}-\d{2}-\d{2})', t).group(1)) transactions_by_month = defaultdict(list) for transaction in transactions: date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction) if date_match: year = date_match.group(1) month = date_match.group(2) key = (year, month) transactions_by_month[key].append(transaction) last_month_key = max(transactions_by_month.keys() ) if transactions_by_month else None for (year, month), month_transactions in sorted(transactions_by_month.items()): output_dir = Path(f"ledger/transactions/{year}") output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / f"{month}.beancount" existing_content = "" if output_file.exists(): with open(output_file, 'r') as f: existing_content = f.read() with open(output_file, 'w') as f: if existing_content: f.write(existing_content) if not existing_content.endswith('\n'): f.write('\n') f.write('\n'.join(month_transactions)) f.write('\n') if (year, month) == last_month_key and last_date and balance: f.write(f'\n{last_date} balance Liabilities:Credit:Renta4:PolissaCredit { balance} EUR\n') print(f"Saved statements to {output_file}") def filter_markdown_by_date(markdown_report: str, from_date: str) -> str: """ Filter markdown table to only include rows with dates >= from_date. """ from datetime import datetime if not from_date: return markdown_report try: filter_date = datetime.strptime(from_date, "%Y-%m-%d") except ValueError: print(f"Warning: Invalid date format '{ from_date}'. Expected YYYY-MM-DD. Ignoring filter.") return markdown_report lines = markdown_report.split('\n') filtered_lines = [] for line in lines: if '|' not in line: filtered_lines.append(line) continue parts = line.split('|') if len(parts) < 2: filtered_lines.append(line) continue fecha_col = parts[1].strip() date_match = re.search(r'(\d{2})/(\d{2})/(\d{4})', fecha_col) if date_match: line_date = datetime.strptime(f"{date_match.group( 3)}-{date_match.group(2)}-{date_match.group(1)}", "%Y-%m-%d") if line_date >= filter_date: filtered_lines.append(line) else: filtered_lines.append(line) return '\n'.join(filtered_lines) def convert_file_to_markdown(path: str): converter = DocumentConverter() result = converter.convert(path) return result.document.export_to_markdown() async def main(): parser = argparse.ArgumentParser( description="Parse R4 movement statements from XLSX format") parser.add_argument("source", help="Path to the input XLSX file") parser.add_argument("--from", dest="from_date", help="Filter transactions from this date (YYYY-MM-DD)") args = parser.parse_args() if not args.source.endswith(".xlsx"): parser.error("Input file must have .xlsx format") markdown_report = convert_file_to_markdown(args.source) movements_table = extract_movements_table(markdown_report) if not movements_table: print("Error: Could not find MOVIMIENTOS DEL PERIODO EN EUROS table") return last_date, balance = extract_balance_and_last_date(movements_table) print(f"Extracted balance: {balance} on date: {last_date}") if args.from_date: movements_table = filter_markdown_by_date( movements_table, args.from_date) beancount_statements = await get_beancount_statements(movements_table) print(f"Final result: \n{beancount_statements}") clean_beancount_statements = parse_response(beancount_statements) save_statements(clean_beancount_statements, last_date, balance) if __name__ == "__main__": asyncio.run(main())