#!/usr/bin/env python3 import os import argparse import asyncio from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage from docling.document_converter import DocumentConverter GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: Personal Finances to Beancount Parser You are a specialized financial transaction parser that converts bank account movements into Beancount format. ## Input Format You will receive a table with the following columns: - **Fecha**: Transaction date - **Fecha Valor**: Value date - **Movimiento**: Transaction description - **Más datos**: Additional details (may be empty) - **Importe**: Amount (negative for expenses, positive for income) - **Saldo**: Account balance after transaction Example input: ``` | Fecha | Fecha Valor | Movimiento | Más datos | Importe | Saldo | 2025-10-09 00:00:00 | 2025-10-09 00:00:00 | Nintendo CD148015 | | -69.99 | 10000.00 ``` ## Output Format Convert each transaction into a Beancount entry with this structure: ``` YYYY-MM-DD * "Payee" "Description" ExpenseAccount AMOUNT EUR Assets:Liquid:Caixabank:Corrent ``` ### Rules for Conversion 1. **Date**: Use the "Fecha" field in YYYY-MM-DD format 2. **Flag**: Always use `*` (cleared transaction) 3. **Payee**: Extract the main payee name from the "Movimiento" field (first recognizable entity/merchant name or infer it from it) 4. **Description**: Use the full "Movimiento" text as the description 5. **Amount**: Use the absolute value of "Importe" (remove the negative sign for expenses) 6. **Currency**: Always use EUR 7. **Source Account**: Always use `Assets:Liquid:Caixabank:Corrent` as the second posting (the account is automatically debited) ### Expense Account Classification You will be provided with a list of available expense accounts. Analyze each transaction and classify it into the most appropriate account based on: - The payee/merchant name - The transaction description - Common spending patterns **Available Income Accounts:** Income:Work:Zurich:Salari Income:Work:Zurich:TicketsRestaurant Income:Work:Zurich:TargetaTransport Income:Work:Zurich:SeguroMedic Income:Work:Zurich:Gimnas Income:Work:Zurich:DZP Income:Other:Caixabank:Transferencia Income:Other:Caixabank:Bizum Income:Savings:Caixabank:RentabilitatEstalvis Income:Savings:TradeRepublic:RentabilitatEstalvis Income:Invest:R4:Dividends Income:Invest:R4:CapitalGains Income:Invest:R4:CapitalGains:Untaxable Income:Invest:DZP:CapitalGains Income:Other:Devolucions **Available Expense Accounts:** Expenses:R4:Comissions Expenses:R4:Interessos Expenses:Caixabank:Comissions Expenses:Taxes:IRPF Expenses:Taxes:BeneficisDividends Expenses:Taxes:BeneficisDividendsOrigen Expenses:Taxes:ImpostCirculacio Expenses:Insurance:Cotxe Expenses:Lloguer Expenses:FacturesUtilitats Expenses:Internet Expenses:Gasolina Expenses:MantenimentCotxe Expenses:Roba Expenses:Educació Expenses:Medic Expenses:Vacances Expenses:Perruqueria Expenses:AmazonPrime Expenses:CarnetJove Expenses:Supermercat Expenses:Gimnàs Expenses:Parking Expenses:Mobilitat Expenses:MarcaPersonal Expenses:MenjarFora Expenses:Entreteniment Expenses:Llar Expenses:Higiene Expenses:Donatiu Expenses:Altres ### Transaction Type Detection - **Expenses** (negative Importe): Post to an Expenses:* account - **Income** (positive Importe): Post to an Income:* account ### Special Cases - If a transaction is ambiguous, choose the most likely expense category - For unknown merchants, use a generic account like `Expenses:Altres` - Preserve reference numbers and transaction IDs in the description - If "Más datos" contains relevant information, consider including it in the description ## Example **Input:** ``` 2025-10-09 00:00:00 | 2025-10-09 00:00:00 | Nintendo CD148015 | | -69.99 | 10000.00 ``` **Output:** ``` 2025-10-09 * "Nintendo" "Nintendo CD148015" Expenses:Entreteniment 69.99 EUR Assets:Liquid:Caixabank:Corrent ``` ## Output Requirements - Process all transactions in the input table - Maintain chronological order - Ensure proper indentation (2 spaces for posting lines) - Do not include the balance information in the Beancount output - Be consistent with account naming conventions - Only output Beancount code, explanations are not needed. ## Your Task Parse the provided account movements data tables and generate the corresponding Beancount price statements. Output only the Beancount code. """ async def get_beancount_price_statements(r4_report: str) -> str: options = ClaudeAgentOptions( system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT, cwd=os.getcwd() ) result = None async for message in query( prompt="Convert this financial account movements table to " f"beancount price statements:\n{ r4_report}", options=options ): if isinstance(message, ResultMessage) and message.subtype == "success": result = message.result else: print(message) if result is not None and isinstance(result, str): return result else: raise ValueError( "Unable to get Beancount price statements from the report!") def parse_response(beancount_statements: str): """ The input beancount statements might be inside a markdown beancount code block or in plain text. """ import re # Extract content from markdown code block if present code_block_pattern = r'```(?:beancount)?\n(.*?)```' match = re.search(code_block_pattern, beancount_statements, re.DOTALL) if match: content = match.group(1) else: content = beancount_statements return content def save_statements(beancount_statements: str): """ The statements are saved in a beancount file in ledger/transactions/YYYY/MM.beancount. The year and month are extracted from the first beancount statement in the input. The file is created if it doesn't exist or the statements are appended to the end of the file if it already exists. """ import re from pathlib import Path if not beancount_statements.strip(): print("Warning: No valid statements to save") return # Extract date from first statement (format: YYYY-MM-DD price ...) first_line = beancount_statements.strip().split('\n')[0] date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', first_line) if not date_match: print(f"Error: Could not extract date from first statement: { first_line}") return year = date_match.group(1) month = date_match.group(2) # Create directory structure if it doesn't exist output_dir = Path(f"ledger/transactions/{year}") output_dir.mkdir(parents=True, exist_ok=True) # Define output file path output_file = output_dir / f"{month}.beancount" # Append statements to file (create if doesn't exist) with open(output_file, 'a') as f: f.write(beancount_statements) f.write('\n') print(f"Saved price statements to {output_file}") def convert_file_to_markdown(path: str): converter = DocumentConverter() result = converter.convert(path) return result.document.export_to_markdown() async def main(): parser = argparse.ArgumentParser( description="Parse R4 report from XLSX format") parser.add_argument("source", help="Path to the input XLSX file") args = parser.parse_args() if not args.source.endswith(".xlsx"): parser.error("Input file must have .xlsx format") markdown_report = convert_file_to_markdown(args.source) beancount_statements = await get_beancount_price_statements( markdown_report ) print(f"Final result: \n{beancount_statements}") clean_beancount_statements = parse_response(beancount_statements) save_statements(clean_beancount_statements) if __name__ == "__main__": asyncio.run(main())