From b15550cefaeb2466aed8e9115c8cdc9c8dfa5ab7 Mon Sep 17 00:00:00 2001 From: Roger Oriol Date: Sat, 11 Oct 2025 14:22:51 +0200 Subject: [PATCH] 11 octubre --- commands/parse-caixabank-account-movements | 255 +++++++++++++++++++++ ledger/transactions/2025/10.beancount | 34 ++- 2 files changed, 284 insertions(+), 5 deletions(-) create mode 100755 commands/parse-caixabank-account-movements diff --git a/commands/parse-caixabank-account-movements b/commands/parse-caixabank-account-movements new file mode 100755 index 0000000..0dc2ea5 --- /dev/null +++ b/commands/parse-caixabank-account-movements @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 + +import os +import argparse +import asyncio +from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage +from docling.document_converter import DocumentConverter + +GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: Personal Finances to Beancount Parser + +You are a specialized financial transaction parser that converts bank account movements into Beancount format. + +## Input Format + +You will receive a table with the following columns: +- **Fecha**: Transaction date +- **Fecha Valor**: Value date +- **Movimiento**: Transaction description +- **Más datos**: Additional details (may be empty) +- **Importe**: Amount (negative for expenses, positive for income) +- **Saldo**: Account balance after transaction + +Example input: +``` +| Fecha | Fecha Valor | Movimiento | Más datos | Importe | Saldo | +2025-10-09 00:00:00 | 2025-10-09 00:00:00 | Nintendo CD148015 | | -69.99 | 10000.00 +``` + +## Output Format + +Convert each transaction into a Beancount entry with this structure: + +``` +YYYY-MM-DD * "Payee" "Description" + ExpenseAccount AMOUNT EUR + Assets:Liquid:Caixabank:Corrent +``` + +### Rules for Conversion + +1. **Date**: Use the "Fecha" field in YYYY-MM-DD format +2. **Flag**: Always use `*` (cleared transaction) +3. **Payee**: Extract the main payee name from the "Movimiento" field (first recognizable entity/merchant name or infer it from it) +4. **Description**: Use the full "Movimiento" text as the description +5. **Amount**: Use the absolute value of "Importe" (remove the negative sign for expenses) +6. **Currency**: Always use EUR +7. **Source Account**: Always use `Assets:Liquid:Caixabank:Corrent` as the second posting (the account is automatically debited) + +### Expense Account Classification + +You will be provided with a list of available expense accounts. Analyze each transaction and classify it into the most appropriate account based on: +- The payee/merchant name +- The transaction description +- Common spending patterns + +**Available Income Accounts:** +Income:Work:Zurich:Salari +Income:Work:Zurich:TicketsRestaurant +Income:Work:Zurich:TargetaTransport +Income:Work:Zurich:SeguroMedic +Income:Work:Zurich:Gimnas +Income:Work:Zurich:DZP +Income:Other:Caixabank:Transferencia +Income:Other:Caixabank:Bizum +Income:Savings:Caixabank:RentabilitatEstalvis +Income:Savings:TradeRepublic:RentabilitatEstalvis +Income:Invest:R4:Dividends +Income:Invest:R4:CapitalGains +Income:Invest:R4:CapitalGains:Untaxable +Income:Invest:DZP:CapitalGains +Income:Other:Devolucions + +**Available Expense Accounts:** +Expenses:R4:Comissions +Expenses:R4:Interessos +Expenses:Caixabank:Comissions +Expenses:Taxes:IRPF +Expenses:Taxes:BeneficisDividends +Expenses:Taxes:BeneficisDividendsOrigen +Expenses:Taxes:ImpostCirculacio +Expenses:Insurance:Cotxe +Expenses:Lloguer +Expenses:FacturesUtilitats +Expenses:Internet +Expenses:Gasolina +Expenses:MantenimentCotxe +Expenses:Roba +Expenses:Educació +Expenses:Medic +Expenses:Vacances +Expenses:Perruqueria +Expenses:AmazonPrime +Expenses:CarnetJove +Expenses:Supermercat +Expenses:Gimnàs +Expenses:Parking +Expenses:Mobilitat +Expenses:MarcaPersonal +Expenses:MenjarFora +Expenses:Entreteniment +Expenses:Llar +Expenses:Higiene +Expenses:Donatiu +Expenses:Altres + +### Transaction Type Detection + +- **Expenses** (negative Importe): Post to an Expenses:* account +- **Income** (positive Importe): Post to an Income:* account + +### Special Cases + +- If a transaction is ambiguous, choose the most likely expense category +- For unknown merchants, use a generic account like `Expenses:Altres` +- Preserve reference numbers and transaction IDs in the description +- If "Más datos" contains relevant information, consider including it in the description + +## Example + +**Input:** +``` +2025-10-09 00:00:00 | 2025-10-09 00:00:00 | Nintendo CD148015 | | -69.99 | 10000.00 +``` + +**Output:** +``` +2025-10-09 * "Nintendo" "Nintendo CD148015" + Expenses:Entreteniment 69.99 EUR + Assets:Liquid:Caixabank:Corrent +``` + +## Output Requirements + +- Process all transactions in the input table +- Maintain chronological order +- Ensure proper indentation (2 spaces for posting lines) +- Do not include the balance information in the Beancount output +- Be consistent with account naming conventions +- Only output Beancount code, explanations are not needed. + +## Your Task +Parse the provided account movements data tables and generate the corresponding Beancount price statements. Output only the Beancount code. +""" + + +async def get_beancount_price_statements(r4_report: str) -> str: + options = ClaudeAgentOptions( + system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT, + cwd=os.getcwd() + ) + + result = None + async for message in query( + prompt="Convert this financial account movements table to " + f"beancount price statements:\n{ + r4_report}", + options=options + ): + if isinstance(message, ResultMessage) and message.subtype == "success": + result = message.result + else: + print(message) + + if result is not None and isinstance(result, str): + return result + else: + raise ValueError( + "Unable to get Beancount price statements from the report!") + + +def parse_response(beancount_statements: str): + """ + The input beancount statements might be inside a markdown beancount code block + or in plain text. + """ + import re + + # Extract content from markdown code block if present + code_block_pattern = r'```(?:beancount)?\n(.*?)```' + match = re.search(code_block_pattern, beancount_statements, re.DOTALL) + if match: + content = match.group(1) + else: + content = beancount_statements + return content + + +def save_statements(beancount_statements: str): + """ + The statements are saved in a beancount file in ledger/transactions/YYYY/MM.beancount. + The year and month are extracted from the first beancount statement in the input. + The file is created if it doesn't exist or the statements are appended to the + end of the file if it already exists. + """ + import re + from pathlib import Path + + if not beancount_statements.strip(): + print("Warning: No valid statements to save") + return + + # Extract date from first statement (format: YYYY-MM-DD price ...) + first_line = beancount_statements.strip().split('\n')[0] + date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', first_line) + + if not date_match: + print(f"Error: Could not extract date from first statement: { + first_line}") + return + + year = date_match.group(1) + month = date_match.group(2) + + # Create directory structure if it doesn't exist + output_dir = Path(f"ledger/transactions/{year}") + output_dir.mkdir(parents=True, exist_ok=True) + + # Define output file path + output_file = output_dir / f"{month}.beancount" + + # Append statements to file (create if doesn't exist) + with open(output_file, 'a') as f: + f.write(beancount_statements) + f.write('\n') + + print(f"Saved price statements to {output_file}") + + +def convert_file_to_markdown(path: str): + converter = DocumentConverter() + result = converter.convert(path) + return result.document.export_to_markdown() + + +async def main(): + parser = argparse.ArgumentParser( + description="Parse R4 report from XLSX format") + parser.add_argument("source", help="Path to the input XLSX file") + args = parser.parse_args() + + if not args.source.endswith(".xlsx"): + parser.error("Input file must have .xlsx format") + + markdown_report = convert_file_to_markdown(args.source) + beancount_statements = await get_beancount_price_statements( + markdown_report + ) + print(f"Final result: \n{beancount_statements}") + + clean_beancount_statements = parse_response(beancount_statements) + save_statements(clean_beancount_statements) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/ledger/transactions/2025/10.beancount b/ledger/transactions/2025/10.beancount index bafcbf8..c6cc2ad 100644 --- a/ledger/transactions/2025/10.beancount +++ b/ledger/transactions/2025/10.beancount @@ -37,9 +37,6 @@ 2025-10-02 * "Plusfresc" "Compra de la setmana" Expenses:Supermercat 39.82 EUR Assets:Liquid:Caixabank:Corrent -2025-10-03 * "Finques Samsó" "Lloguer octubre" - Expenses:Lloguer 1146.20 EUR - Assets:Liquid:Caixabank:Corrent -1146.20 EUR 2025-10-03 * "Google Play" "Suscripció Strong Pro" Expenses:Gimnàs 32.99 EUR Assets:Liquid:Caixabank:Corrent @@ -49,8 +46,36 @@ 2025-10-04 * "Beltran Alimentación" "Aigua i paper de cuina" Expenses:Supermercat 5.65 EUR Assets:Liquid:Caixabank:Corrent +2025-10-04 * "Finques Samso" "FINQ.SAMSO S.L Recibo de fincas, alquileres" + Expenses:Lloguer 1146.20 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-06 * "Wellhub" "Wellhub EU" + Expenses:Gimnàs 35.99 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-07 * "Plus Barcelona" "PLUS BARCELONA -" + Expenses:Supermercat 2.86 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-07 * "Amazon" "WWW.AMAZON.ES" + Expenses:Altres 8.29 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-07 * "Bizum" "BIZUM RECIBIDO" + Income:Other:Caixabank:Bizum -8.29 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-07 * "Plus Teodora Lama" "PLUS TEODORA LAMA" + Expenses:Supermercat 24.85 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-09 * "Comercializadora Gas Power" "COMERCIALIZADORA GAS POWER11877106871025" + Income:Other:Devolucions -11.96 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-09 * "Nintendo" "Pokemon Legends ZA DLC" + Expenses:Entreteniment 29.99 EUR + Assets:Liquid:Caixabank:Corrent +2025-10-09 * "Nintendo" "Pokemon Legends ZA" + Expenses:Entreteniment 69.99 EUR + Assets:Liquid:Caixabank:Corrent -2025-11-01 balance Assets:Liquid:Caixabank:Corrent 11891.04 EUR + +2025-11-01 balance Assets:Liquid:Caixabank:Corrent 11739.32 EUR 2025-11-01 balance Assets:Liquid:R4:EUR 0 EUR 2025-11-01 balance Assets:Invest:Fund:Vanguard:EMMK 14.99 VANEMMK 2025-11-01 balance Assets:Invest:Fund:Vanguard:GL 755.40 VANGL @@ -68,4 +93,3 @@ 2025-11-01 balance Assets:PersonalProperty:AltresPropietats 0 EUR 2025-11-01 balance Liabilities:Credit:Caixabank:TargetaCredit 0 EUR 2025-11-01 balance Liabilities:Credit:Renta4:PolissaCredit -19876.16 EUR -