Files
contabilitat/commands/parse-edenred-account-movements

344 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import argparse
import asyncio
import re
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage
from docling.document_converter import DocumentConverter
GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: Edenred Transactions to Beancount Parser
You are a specialized financial transaction parser that converts Edenred account movements into Beancount format.
## Input Format
You will receive a table with the following columns:
- **Fecha**: Transaction date
- **Detalle movimiento**: Transaction description
- **Importe**: Amount (always negative for expenses)
You will also receive the source account to use for all transactions.
Example input:
```
| Producto: | Ticket Restaurant |
| Fecha | Detalle movimiento | Importe |
2025-10-09 00:00:00 | MCDONALD'S BARCELONA | 12,50
```
## Output Format
Convert each transaction into a Beancount entry with this structure:
```
YYYY-MM-DD * "Payee" "Description"
ExpenseAccount AMOUNT EUR
SourceAccount
```
### Rules for Conversion
1. **Date**: Use the "Fecha" field in YYYY-MM-DD format
2. **Flag**: Always use `*` (cleared transaction)
3. **Payee**: Extract the main payee name from the "Detalle movimiento" field (first recognizable entity/merchant name)
4. **Description**: Use the full "Detalle movimiento" text as the description
5. **Amount**: Use the absolute value of "Importe" (remove the negative sign)
6. **Currency**: Always use EUR
7. **Source Account**: Use the provided source account as the second posting (the account is automatically debited)
### Expense Account Classification
Analyze each transaction and classify it into the most appropriate account based on:
- The payee/merchant name
- The transaction description
- Common spending patterns
**Available Expense Accounts:**
Expenses:Supermercat
Expenses:MenjarFora
Expenses:Mobilitat
Expenses:Parking
Expenses:Gasolina
Expenses:Altres
### Guidelines
- Restaurants, cafes, food delivery: `Expenses:MenjarFora`
- Supermarkets, grocery stores: `Expenses:Supermercat`
- Public transport, taxi, ride-sharing: `Expenses:Mobilitat`
- Parking: `Expenses:Parking`
- Gas stations: `Expenses:Gasolina`
- Other/unknown: `Expenses:Altres`
## Example
**Input:**
```
Source Account: Assets:Benefits:Edenred:TicketsRestaurant
2025-10-09 00:00:00 | MCDONALD'S BARCELONA | 12,50
```
**Output:**
```
2025-10-09 * "MCDONALD'S" "MCDONALD'S BARCELONA"
Expenses:MenjarFora 12.50 EUR
Assets:Benefits:Edenred:TicketsRestaurant
```
## Output Requirements
- Process all transactions in the input table
- Maintain chronological order
- Ensure proper indentation (2 spaces for posting lines)
- Be consistent with account naming conventions
- Only output Beancount code, explanations are not needed.
## Your Task
Parse the provided account movements data tables and generate the corresponding Beancount statements. Output only the Beancount code.
"""
async def get_beancount_statements(markdown_report: str, source_account: str) -> str:
options = ClaudeAgentOptions(
system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT,
cwd=os.getcwd()
)
result = None
async for message in query(
prompt=f"Convert this Edenred account movements table to beancount statements.\n\n"
f"Source Account: {source_account}\n\n{markdown_report}",
options=options
):
if isinstance(message, ResultMessage) and message.subtype == "success":
result = message.result
else:
print(message)
if result is not None and isinstance(result, str):
return result
else:
raise ValueError(
"Unable to get Beancount statements from the report!")
def parse_response(beancount_statements: str):
"""
The input beancount statements might be inside a markdown beancount
code block or in plain text.
"""
code_block_pattern = r'```(?:beancount)?\n(.*?)```'
match = re.search(code_block_pattern, beancount_statements, re.DOTALL)
if match:
content = match.group(1)
else:
content = beancount_statements
return content
def extract_product_type(markdown_report: str) -> str:
"""
Extract the Producto field to determine which account to use.
Returns the appropriate Beancount account.
"""
lines = markdown_report.split('\n')
for line in lines:
if 'Producto:' in line or 'producto:' in line.lower():
if 'Ticket Restaurant' in line:
return 'Assets:Benefits:Edenred:TicketsRestaurant'
elif 'Edenred Movilidad' in line or 'Movilidad' in line:
return 'Assets:Benefits:Edenred:TargetaTransport'
print("Warning: Could not determine product type. Defaulting to TicketsRestaurant")
return 'Assets:Benefits:Edenred:TicketsRestaurant'
def extract_balance_and_last_date(markdown_report: str) -> tuple[str, str]:
"""
Extract the balance from the Saldo field and the date of the last transaction.
Returns (last_date, balance) tuple.
"""
lines = markdown_report.split('\n')
balance = ""
last_date = ""
for line in lines:
if '|' not in line:
continue
parts = line.split('|')
if len(parts) >= 3 and 'Saldo:' in parts[1]:
balance_str = parts[2].strip()
balance = balance_str.replace(',', '.').replace(' ', '')
if len(parts) >= 2:
fecha_col = parts[1].strip()
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col)
if date_match:
current_date = f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}"
if not last_date or current_date > last_date:
last_date = current_date
return last_date, balance
def save_statements(beancount_statements: str, last_date: str, balance: str, source_account: str):
"""
The statements are saved in beancount files in
ledger/transactions/YYYY/MM.beancount.
Statements are sorted chronologically and split by month if they
span multiple months.
A balance assertion is added at the end of the last month's file.
"""
from pathlib import Path
from collections import defaultdict
if not beancount_statements.strip():
print("Warning: No valid statements to save")
return
lines = beancount_statements.strip().split('\n')
transactions = []
current_transaction = []
for line in lines:
if re.match(r'^\d{4}-\d{2}-\d{2}', line):
if current_transaction:
transactions.append('\n'.join(current_transaction))
current_transaction = [line]
elif current_transaction:
current_transaction.append(line)
if current_transaction:
transactions.append('\n'.join(current_transaction))
transactions.sort(key=lambda t: re.match(
r'^(\d{4}-\d{2}-\d{2})', t).group(1))
transactions_by_month = defaultdict(list)
for transaction in transactions:
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction)
if date_match:
year = date_match.group(1)
month = date_match.group(2)
key = (year, month)
transactions_by_month[key].append(transaction)
last_month_key = max(transactions_by_month.keys()) if transactions_by_month else None
for (year, month), month_transactions in sorted(transactions_by_month.items()):
output_dir = Path(f"ledger/transactions/{year}")
output_dir.mkdir(parents=True, exist_ok=True)
output_file = output_dir / f"{month}.beancount"
existing_content = ""
if output_file.exists():
with open(output_file, 'r') as f:
existing_content = f.read()
with open(output_file, 'w') as f:
if existing_content:
f.write(existing_content)
if not existing_content.endswith('\n'):
f.write('\n')
f.write('\n'.join(month_transactions))
f.write('\n')
if (year, month) == last_month_key and last_date and balance:
f.write(f'\n{last_date} balance {source_account} {balance} EUR\n')
print(f"Saved statements to {output_file}")
def filter_markdown_by_date(markdown_report: str, from_date: str) -> str:
"""
Filter markdown table to only include rows with dates >= from_date.
"""
from datetime import datetime
if not from_date:
return markdown_report
try:
filter_date = datetime.strptime(from_date, "%Y-%m-%d")
except ValueError:
print(f"Warning: Invalid date format '{
from_date}'. Expected YYYY-MM-DD. Ignoring filter.")
return markdown_report
lines = markdown_report.split('\n')
filtered_lines = []
for line in lines:
if '|' not in line:
filtered_lines.append(line)
continue
parts = line.split('|')
if len(parts) < 4:
filtered_lines.append(line)
continue
fecha_col = parts[1].strip()
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col)
if date_match:
line_date = datetime.strptime(f"{date_match.group(
1)}-{date_match.group(2)}-{date_match.group(3)}", "%Y-%m-%d")
if line_date >= filter_date:
filtered_lines.append(line)
else:
filtered_lines.append(line)
return '\n'.join(filtered_lines)
def convert_file_to_markdown(path: str):
converter = DocumentConverter()
result = converter.convert(path)
return result.document.export_to_markdown()
async def main():
parser = argparse.ArgumentParser(
description="Parse Edenred report from XLSX format")
parser.add_argument("source", help="Path to the input XLSX file")
parser.add_argument("--from", dest="from_date",
help="Filter transactions from this date (YYYY-MM-DD)")
args = parser.parse_args()
if not args.source.endswith(".xlsx"):
parser.error("Input file must have .xlsx format")
markdown_report = convert_file_to_markdown(args.source)
source_account = extract_product_type(markdown_report)
print(f"Detected source account: {source_account}")
last_date, balance = extract_balance_and_last_date(markdown_report)
print(f"Extracted balance: {balance} on date: {last_date}")
if args.from_date:
markdown_report = filter_markdown_by_date(
markdown_report, args.from_date)
beancount_statements = await get_beancount_statements(
markdown_report, source_account
)
print(f"Final result: \n{beancount_statements}")
clean_beancount_statements = parse_response(beancount_statements)
save_statements(clean_beancount_statements, last_date, balance, source_account)
if __name__ == "__main__":
asyncio.run(main())