new scripts parse edenred account movements and parse r4 movements
This commit is contained in:
@@ -302,6 +302,48 @@ def save_statements(
|
||||
print(f"Saved statements to {output_file}")
|
||||
|
||||
|
||||
def filter_markdown_by_date(markdown_report: str, from_date: str) -> str:
|
||||
"""
|
||||
Filter markdown table to only include rows with dates >= from_date.
|
||||
"""
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
if not from_date:
|
||||
return markdown_report
|
||||
|
||||
try:
|
||||
filter_date = datetime.strptime(from_date, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
print(f"Warning: Invalid date format '{from_date}'. Expected YYYY-MM-DD. Ignoring filter.")
|
||||
return markdown_report
|
||||
|
||||
lines = markdown_report.split('\n')
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
if '|' not in line:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
parts = line.split('|')
|
||||
if len(parts) < 7:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
fecha_col = parts[1].strip()
|
||||
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col)
|
||||
|
||||
if date_match:
|
||||
line_date = datetime.strptime(f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}", "%Y-%m-%d")
|
||||
if line_date >= filter_date:
|
||||
filtered_lines.append(line)
|
||||
else:
|
||||
filtered_lines.append(line)
|
||||
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
|
||||
def convert_file_to_markdown(path: str):
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(path)
|
||||
@@ -312,12 +354,17 @@ async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse R4 report from XLSX format")
|
||||
parser.add_argument("source", help="Path to the input XLSX file")
|
||||
parser.add_argument("--from", dest="from_date", help="Filter transactions from this date (YYYY-MM-DD)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.source.endswith(".xlsx"):
|
||||
parser.error("Input file must have .xlsx format")
|
||||
|
||||
markdown_report = convert_file_to_markdown(args.source)
|
||||
|
||||
if args.from_date:
|
||||
markdown_report = filter_markdown_by_date(markdown_report, args.from_date)
|
||||
|
||||
beancount_statements = await get_beancount_price_statements(
|
||||
markdown_report
|
||||
)
|
||||
|
||||
343
commands/parse-edenred-account-movements
Executable file
343
commands/parse-edenred-account-movements
Executable file
@@ -0,0 +1,343 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import asyncio
|
||||
import re
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: Edenred Transactions to Beancount Parser
|
||||
|
||||
You are a specialized financial transaction parser that converts Edenred account movements into Beancount format.
|
||||
|
||||
## Input Format
|
||||
|
||||
You will receive a table with the following columns:
|
||||
- **Fecha**: Transaction date
|
||||
- **Detalle movimiento**: Transaction description
|
||||
- **Importe**: Amount (always negative for expenses)
|
||||
|
||||
You will also receive the source account to use for all transactions.
|
||||
|
||||
Example input:
|
||||
```
|
||||
| Producto: | Ticket Restaurant |
|
||||
|
||||
| Fecha | Detalle movimiento | Importe |
|
||||
2025-10-09 00:00:00 | MCDONALD'S BARCELONA | 12,50
|
||||
```
|
||||
|
||||
## Output Format
|
||||
|
||||
Convert each transaction into a Beancount entry with this structure:
|
||||
|
||||
```
|
||||
YYYY-MM-DD * "Payee" "Description"
|
||||
ExpenseAccount AMOUNT EUR
|
||||
SourceAccount
|
||||
```
|
||||
|
||||
### Rules for Conversion
|
||||
|
||||
1. **Date**: Use the "Fecha" field in YYYY-MM-DD format
|
||||
2. **Flag**: Always use `*` (cleared transaction)
|
||||
3. **Payee**: Extract the main payee name from the "Detalle movimiento" field (first recognizable entity/merchant name)
|
||||
4. **Description**: Use the full "Detalle movimiento" text as the description
|
||||
5. **Amount**: Use the absolute value of "Importe" (remove the negative sign)
|
||||
6. **Currency**: Always use EUR
|
||||
7. **Source Account**: Use the provided source account as the second posting (the account is automatically debited)
|
||||
|
||||
### Expense Account Classification
|
||||
|
||||
Analyze each transaction and classify it into the most appropriate account based on:
|
||||
- The payee/merchant name
|
||||
- The transaction description
|
||||
- Common spending patterns
|
||||
|
||||
**Available Expense Accounts:**
|
||||
Expenses:Supermercat
|
||||
Expenses:MenjarFora
|
||||
Expenses:Mobilitat
|
||||
Expenses:Parking
|
||||
Expenses:Gasolina
|
||||
Expenses:Altres
|
||||
|
||||
### Guidelines
|
||||
|
||||
- Restaurants, cafes, food delivery: `Expenses:MenjarFora`
|
||||
- Supermarkets, grocery stores: `Expenses:Supermercat`
|
||||
- Public transport, taxi, ride-sharing: `Expenses:Mobilitat`
|
||||
- Parking: `Expenses:Parking`
|
||||
- Gas stations: `Expenses:Gasolina`
|
||||
- Other/unknown: `Expenses:Altres`
|
||||
|
||||
## Example
|
||||
|
||||
**Input:**
|
||||
```
|
||||
Source Account: Assets:Benefits:Edenred:TicketsRestaurant
|
||||
|
||||
2025-10-09 00:00:00 | MCDONALD'S BARCELONA | 12,50
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
2025-10-09 * "MCDONALD'S" "MCDONALD'S BARCELONA"
|
||||
Expenses:MenjarFora 12.50 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
```
|
||||
|
||||
## Output Requirements
|
||||
|
||||
- Process all transactions in the input table
|
||||
- Maintain chronological order
|
||||
- Ensure proper indentation (2 spaces for posting lines)
|
||||
- Be consistent with account naming conventions
|
||||
- Only output Beancount code, explanations are not needed.
|
||||
|
||||
## Your Task
|
||||
Parse the provided account movements data tables and generate the corresponding Beancount statements. Output only the Beancount code.
|
||||
"""
|
||||
|
||||
|
||||
async def get_beancount_statements(markdown_report: str, source_account: str) -> str:
|
||||
options = ClaudeAgentOptions(
|
||||
system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT,
|
||||
cwd=os.getcwd()
|
||||
)
|
||||
|
||||
result = None
|
||||
async for message in query(
|
||||
prompt=f"Convert this Edenred account movements table to beancount statements.\n\n"
|
||||
f"Source Account: {source_account}\n\n{markdown_report}",
|
||||
options=options
|
||||
):
|
||||
if isinstance(message, ResultMessage) and message.subtype == "success":
|
||||
result = message.result
|
||||
else:
|
||||
print(message)
|
||||
|
||||
if result is not None and isinstance(result, str):
|
||||
return result
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to get Beancount statements from the report!")
|
||||
|
||||
|
||||
def parse_response(beancount_statements: str):
|
||||
"""
|
||||
The input beancount statements might be inside a markdown beancount
|
||||
code block or in plain text.
|
||||
"""
|
||||
code_block_pattern = r'```(?:beancount)?\n(.*?)```'
|
||||
match = re.search(code_block_pattern, beancount_statements, re.DOTALL)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
else:
|
||||
content = beancount_statements
|
||||
return content
|
||||
|
||||
|
||||
def extract_product_type(markdown_report: str) -> str:
|
||||
"""
|
||||
Extract the Producto field to determine which account to use.
|
||||
Returns the appropriate Beancount account.
|
||||
"""
|
||||
lines = markdown_report.split('\n')
|
||||
|
||||
for line in lines:
|
||||
if 'Producto:' in line or 'producto:' in line.lower():
|
||||
if 'Ticket Restaurant' in line:
|
||||
return 'Assets:Benefits:Edenred:TicketsRestaurant'
|
||||
elif 'Edenred Movilidad' in line or 'Movilidad' in line:
|
||||
return 'Assets:Benefits:Edenred:TargetaTransport'
|
||||
|
||||
print("Warning: Could not determine product type. Defaulting to TicketsRestaurant")
|
||||
return 'Assets:Benefits:Edenred:TicketsRestaurant'
|
||||
|
||||
|
||||
def extract_balance_and_last_date(markdown_report: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract the balance from the Saldo field and the date of the last transaction.
|
||||
Returns (last_date, balance) tuple.
|
||||
"""
|
||||
lines = markdown_report.split('\n')
|
||||
|
||||
balance = ""
|
||||
last_date = ""
|
||||
|
||||
for line in lines:
|
||||
if '|' not in line:
|
||||
continue
|
||||
|
||||
parts = line.split('|')
|
||||
|
||||
if len(parts) >= 3 and 'Saldo:' in parts[1]:
|
||||
balance_str = parts[2].strip()
|
||||
balance = balance_str.replace(',', '.').replace(' ', '')
|
||||
|
||||
if len(parts) >= 2:
|
||||
fecha_col = parts[1].strip()
|
||||
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col)
|
||||
if date_match:
|
||||
current_date = f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}"
|
||||
if not last_date or current_date > last_date:
|
||||
last_date = current_date
|
||||
|
||||
return last_date, balance
|
||||
|
||||
|
||||
def save_statements(beancount_statements: str, last_date: str, balance: str, source_account: str):
|
||||
"""
|
||||
The statements are saved in beancount files in
|
||||
ledger/transactions/YYYY/MM.beancount.
|
||||
Statements are sorted chronologically and split by month if they
|
||||
span multiple months.
|
||||
A balance assertion is added at the end of the last month's file.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
if not beancount_statements.strip():
|
||||
print("Warning: No valid statements to save")
|
||||
return
|
||||
|
||||
lines = beancount_statements.strip().split('\n')
|
||||
|
||||
transactions = []
|
||||
current_transaction = []
|
||||
|
||||
for line in lines:
|
||||
if re.match(r'^\d{4}-\d{2}-\d{2}', line):
|
||||
if current_transaction:
|
||||
transactions.append('\n'.join(current_transaction))
|
||||
current_transaction = [line]
|
||||
elif current_transaction:
|
||||
current_transaction.append(line)
|
||||
|
||||
if current_transaction:
|
||||
transactions.append('\n'.join(current_transaction))
|
||||
|
||||
transactions.sort(key=lambda t: re.match(
|
||||
r'^(\d{4}-\d{2}-\d{2})', t).group(1))
|
||||
|
||||
transactions_by_month = defaultdict(list)
|
||||
for transaction in transactions:
|
||||
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction)
|
||||
if date_match:
|
||||
year = date_match.group(1)
|
||||
month = date_match.group(2)
|
||||
key = (year, month)
|
||||
transactions_by_month[key].append(transaction)
|
||||
|
||||
last_month_key = max(transactions_by_month.keys()) if transactions_by_month else None
|
||||
|
||||
for (year, month), month_transactions in sorted(transactions_by_month.items()):
|
||||
output_dir = Path(f"ledger/transactions/{year}")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_file = output_dir / f"{month}.beancount"
|
||||
|
||||
existing_content = ""
|
||||
if output_file.exists():
|
||||
with open(output_file, 'r') as f:
|
||||
existing_content = f.read()
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
if existing_content:
|
||||
f.write(existing_content)
|
||||
if not existing_content.endswith('\n'):
|
||||
f.write('\n')
|
||||
f.write('\n'.join(month_transactions))
|
||||
f.write('\n')
|
||||
|
||||
if (year, month) == last_month_key and last_date and balance:
|
||||
f.write(f'\n{last_date} balance {source_account} {balance} EUR\n')
|
||||
|
||||
print(f"Saved statements to {output_file}")
|
||||
|
||||
|
||||
def filter_markdown_by_date(markdown_report: str, from_date: str) -> str:
|
||||
"""
|
||||
Filter markdown table to only include rows with dates >= from_date.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
if not from_date:
|
||||
return markdown_report
|
||||
|
||||
try:
|
||||
filter_date = datetime.strptime(from_date, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
print(f"Warning: Invalid date format '{
|
||||
from_date}'. Expected YYYY-MM-DD. Ignoring filter.")
|
||||
return markdown_report
|
||||
|
||||
lines = markdown_report.split('\n')
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
if '|' not in line:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
parts = line.split('|')
|
||||
if len(parts) < 4:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
fecha_col = parts[1].strip()
|
||||
date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col)
|
||||
|
||||
if date_match:
|
||||
line_date = datetime.strptime(f"{date_match.group(
|
||||
1)}-{date_match.group(2)}-{date_match.group(3)}", "%Y-%m-%d")
|
||||
if line_date >= filter_date:
|
||||
filtered_lines.append(line)
|
||||
else:
|
||||
filtered_lines.append(line)
|
||||
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
|
||||
def convert_file_to_markdown(path: str):
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(path)
|
||||
return result.document.export_to_markdown()
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse Edenred report from XLSX format")
|
||||
parser.add_argument("source", help="Path to the input XLSX file")
|
||||
parser.add_argument("--from", dest="from_date",
|
||||
help="Filter transactions from this date (YYYY-MM-DD)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.source.endswith(".xlsx"):
|
||||
parser.error("Input file must have .xlsx format")
|
||||
|
||||
markdown_report = convert_file_to_markdown(args.source)
|
||||
|
||||
source_account = extract_product_type(markdown_report)
|
||||
print(f"Detected source account: {source_account}")
|
||||
|
||||
last_date, balance = extract_balance_and_last_date(markdown_report)
|
||||
print(f"Extracted balance: {balance} on date: {last_date}")
|
||||
|
||||
if args.from_date:
|
||||
markdown_report = filter_markdown_by_date(
|
||||
markdown_report, args.from_date)
|
||||
|
||||
beancount_statements = await get_beancount_statements(
|
||||
markdown_report, source_account
|
||||
)
|
||||
print(f"Final result: \n{beancount_statements}")
|
||||
|
||||
clean_beancount_statements = parse_response(beancount_statements)
|
||||
save_statements(clean_beancount_statements, last_date, balance, source_account)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
403
commands/parse-r4-movements
Executable file
403
commands/parse-r4-movements
Executable file
@@ -0,0 +1,403 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import asyncio
|
||||
import re
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: R4 Movements to Beancount Parser
|
||||
|
||||
You are a specialized financial transaction parser that converts R4 account movements into Beancount format.
|
||||
|
||||
## Input Format
|
||||
|
||||
You will receive a table with the following columns:
|
||||
- **FECHA**: Transaction date (DD/MM/YYYY format)
|
||||
- **CONCEPTO**: Transaction description
|
||||
- **MOVIMIENTOS**: Amount (negative for expenses/debits, positive for income/credits)
|
||||
- **SALDO**: Account balance after transaction
|
||||
|
||||
Example input:
|
||||
```
|
||||
| FECHA | CONCEPTO | MOVIMIENTOS | SALDO |
|
||||
|------------|----------------------------------------------|-------------|------------|
|
||||
| 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32 |
|
||||
| 11/10/2025 | TRANSF. RECIBIDA DE ROGER ORIOL PEREZ | 150 | -19971.3 |
|
||||
```
|
||||
|
||||
Warning: The table might be incorrecly parsed and may appear split. For example:
|
||||
```
|
||||
| MOVIMIENTOS DEL PERIODO EN EUROS |
|
||||
|------------------------------------|
|
||||
| FECHA |
|
||||
|
||||
| CONCEPTO | MOVIMIENTOS | SALDO |
|
||||
|----------------------------------------------|---------------|----------|
|
||||
| SALDO ANTERIOR | -19876.16 | -19876.2 |
|
||||
| INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.3 |
|
||||
| INTERESES TERCER TRIMESTRE DE 2025 | -175.23 | -20051.5 |
|
||||
| SALDO FINAL | None | -19971.3 |
|
||||
|
||||
| 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32 |
|
||||
|--------------|----------------------------------------------|---------|-------------|
|
||||
| 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -175.23 | -20051.5 |
|
||||
```
|
||||
You should interpet the previous table like this:
|
||||
```
|
||||
| MOVIMIENTOS DEL PERIODO EN EUROS |
|
||||
|------------------------------------|
|
||||
| FECHA | CONCEPTO | MOVIMIENTOS | SALDO |
|
||||
|-------------------------------------------------------------|---------------|----------|
|
||||
| | SALDO ANTERIOR | -19876.16 | -19876.2 |
|
||||
| 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.3 |
|
||||
| 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -175.23 | -20051.5 |
|
||||
| | SALDO FINAL | None | -19971.3 |
|
||||
```
|
||||
|
||||
## Output Format
|
||||
|
||||
Convert each transaction into a Beancount entry with this structure:
|
||||
|
||||
```
|
||||
YYYY-MM-DD * "Payee" "Description"
|
||||
Account1 AMOUNT EUR
|
||||
Account2
|
||||
```
|
||||
|
||||
### Rules for Conversion
|
||||
|
||||
1. **Date**: Convert from DD/MM/YYYY to YYYY-MM-DD format
|
||||
2. **Flag**: Always use `*` (cleared transaction)
|
||||
3. **Payee**: Extract the main entity from "CONCEPTO" field
|
||||
4. **Description**: Use the full "CONCEPTO" text as the description
|
||||
5. **Amount**: Use the absolute value of "MOVIMIENTOS"
|
||||
6. **Currency**: Always use EUR
|
||||
7. **Source Account**: Always use `Liabilities:Credit:Renta4:PolissaCredit` as one of the accounts
|
||||
|
||||
### Account Classification
|
||||
|
||||
**Available Expense Accounts:**
|
||||
- Expenses:R4:Comissions
|
||||
- Expenses:R4:Interessos
|
||||
|
||||
**Available Income Accounts:**
|
||||
- Income:Invest:R4:Dividends
|
||||
- Income:Invest:R4:CapitalGains
|
||||
- Income:Invest:R4:CapitalGains:Untaxable
|
||||
|
||||
**Available Tax Accounts:**
|
||||
- Expenses:Taxes:BeneficisDividends
|
||||
- Expenses:Taxes:BeneficisDividendsOrigen
|
||||
|
||||
**Transfer Account:**
|
||||
- Assets:Liquid:Caixabank:Corrent
|
||||
|
||||
### Transaction Type Rules
|
||||
|
||||
1. **Commissions** (COMISION): Use `Expenses:R4:Comissions`
|
||||
2. **Interest** (INTERESES): Use `Expenses:R4:Interessos`
|
||||
3. **Dividends**: Use `Income:Invest:R4:Dividends`
|
||||
4. **Capital Gains**: Use `Income:Invest:R4:CapitalGains` or `Income:Invest:R4:CapitalGains:Untaxable`
|
||||
5. **Dividend Tax**: Use `Expenses:Taxes:BeneficisDividends` or `Expenses:Taxes:BeneficisDividendsOrigen`
|
||||
6. **IVA**: Use `Expenses:R4:Comissions`
|
||||
7. **Received Transfer from ROGER ORIOL PEREZ**:
|
||||
- Credit: `Liabilities:Credit:Renta4:PolissaCredit` with amount
|
||||
- Debit: `Assets:Liquid:Caixabank:Corrent`
|
||||
|
||||
### Special Cases
|
||||
|
||||
- For negative amounts in MOVIMIENTOS: debit the expense account, credit `Liabilities:Credit:Renta4:PolissaCredit`
|
||||
- For positive amounts in MOVIMIENTOS: debit `Liabilities:Credit:Renta4:PolissaCredit`, credit the income account
|
||||
- For transfers from ROGER ORIOL PEREZ: debit `Liabilities:Credit:Renta4:PolissaCredit`, credit `Assets:Liquid:Caixabank:Corrent`
|
||||
|
||||
## Examples
|
||||
|
||||
**Input:**
|
||||
```
|
||||
01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
2025-10-01 * "R4" "INTERESES TERCER TRIMESTRE DE 2025"
|
||||
Expenses:R4:Interessos 0.16 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
```
|
||||
|
||||
**Input:**
|
||||
```
|
||||
11/10/2025 | TRANSF. RECIBIDA DE ROGER ORIOL PEREZ | 150 | -19971.3
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```
|
||||
2025-10-11 * "ROGER ORIOL PEREZ" "TRANSF. RECIBIDA DE ROGER ORIOL PEREZ"
|
||||
Liabilities:Credit:Renta4:PolissaCredit 150.00 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
```
|
||||
|
||||
## Output Requirements
|
||||
|
||||
- Process all transactions in the input table
|
||||
- Skip rows with "SALDO ANTERIOR" or "SALDO FINAL" in CONCEPTO
|
||||
- Maintain chronological order
|
||||
- Ensure proper indentation (2 spaces for posting lines)
|
||||
- Be consistent with account naming conventions
|
||||
- Only output Beancount code, explanations are not needed.
|
||||
|
||||
## Your Task
|
||||
Parse the provided R4 movements table and generate the corresponding Beancount statements. Output only the Beancount code.
|
||||
"""
|
||||
|
||||
|
||||
async def get_beancount_statements(markdown_report: str) -> str:
|
||||
options = ClaudeAgentOptions(
|
||||
system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT,
|
||||
cwd=os.getcwd()
|
||||
)
|
||||
|
||||
result = None
|
||||
async for message in query(
|
||||
prompt=f"Convert this R4 movements table to beancount statements:\n\n{
|
||||
markdown_report}",
|
||||
options=options
|
||||
):
|
||||
if isinstance(message, ResultMessage) and message.subtype == "success":
|
||||
result = message.result
|
||||
else:
|
||||
print(message)
|
||||
|
||||
if result is not None and isinstance(result, str):
|
||||
return result
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to get Beancount statements from the report!")
|
||||
|
||||
|
||||
def parse_response(beancount_statements: str):
|
||||
"""
|
||||
The input beancount statements might be inside a markdown beancount
|
||||
code block or in plain text.
|
||||
"""
|
||||
code_block_pattern = r'```(?:beancount)?\n(.*?)```'
|
||||
match = re.search(code_block_pattern, beancount_statements, re.DOTALL)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
else:
|
||||
content = beancount_statements
|
||||
return content
|
||||
|
||||
|
||||
def extract_movements_table(markdown_report: str) -> str:
|
||||
"""
|
||||
Extract the MOVIMIENTOS DEL PERIODO EN EUROS table from the markdown.
|
||||
"""
|
||||
lines = markdown_report.split('\n')
|
||||
|
||||
in_movements_section = False
|
||||
table_lines = []
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if 'MOVIMIENTOS DEL PERIODO EN EUROS' in line:
|
||||
in_movements_section = True
|
||||
continue
|
||||
|
||||
if in_movements_section:
|
||||
if line.strip() and ('|' in line or line.startswith('FECHA')):
|
||||
table_lines.append(line)
|
||||
elif 'RESUMEN DE RESULTADOS' in line:
|
||||
break
|
||||
|
||||
return '\n'.join(table_lines)
|
||||
|
||||
|
||||
def extract_balance_and_last_date(markdown_report: str) -> tuple[str, str]:
|
||||
"""
|
||||
Extract the final balance from SALDO FINAL row and the date of the last transaction.
|
||||
Returns (last_date, balance) tuple.
|
||||
"""
|
||||
lines = markdown_report.split('\n')
|
||||
|
||||
balance = ""
|
||||
last_date = ""
|
||||
|
||||
for line in lines:
|
||||
if '|' not in line:
|
||||
continue
|
||||
|
||||
parts = line.split('|')
|
||||
|
||||
if len(parts) >= 3 and 'SALDO FINAL' in line:
|
||||
balance_str = parts[-2].strip()
|
||||
balance = balance_str.replace(',', '.').replace(' ', '')
|
||||
|
||||
if len(parts) >= 2:
|
||||
fecha_col = parts[1].strip()
|
||||
date_match = re.search(r'(\d{2})/(\d{2})/(\d{4})', fecha_col)
|
||||
if date_match:
|
||||
current_date = f"{date_match.group(
|
||||
3)}-{date_match.group(2)}-{date_match.group(1)}"
|
||||
if not last_date or current_date > last_date:
|
||||
last_date = current_date
|
||||
|
||||
return last_date, balance
|
||||
|
||||
|
||||
def save_statements(beancount_statements: str, last_date: str, balance: str):
|
||||
"""
|
||||
The statements are saved in beancount files in
|
||||
ledger/transactions/YYYY/MM.beancount.
|
||||
Statements are sorted chronologically and split by month if they
|
||||
span multiple months.
|
||||
A balance assertion is added at the end of the last month's file.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
if not beancount_statements.strip():
|
||||
print("Warning: No valid statements to save")
|
||||
return
|
||||
|
||||
lines = beancount_statements.strip().split('\n')
|
||||
|
||||
transactions = []
|
||||
current_transaction = []
|
||||
|
||||
for line in lines:
|
||||
if re.match(r'^\d{4}-\d{2}-\d{2}', line):
|
||||
if current_transaction:
|
||||
transactions.append('\n'.join(current_transaction))
|
||||
current_transaction = [line]
|
||||
elif current_transaction:
|
||||
current_transaction.append(line)
|
||||
|
||||
if current_transaction:
|
||||
transactions.append('\n'.join(current_transaction))
|
||||
|
||||
transactions.sort(key=lambda t: re.match(
|
||||
r'^(\d{4}-\d{2}-\d{2})', t).group(1))
|
||||
|
||||
transactions_by_month = defaultdict(list)
|
||||
for transaction in transactions:
|
||||
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction)
|
||||
if date_match:
|
||||
year = date_match.group(1)
|
||||
month = date_match.group(2)
|
||||
key = (year, month)
|
||||
transactions_by_month[key].append(transaction)
|
||||
|
||||
last_month_key = max(transactions_by_month.keys()
|
||||
) if transactions_by_month else None
|
||||
|
||||
for (year, month), month_transactions in sorted(transactions_by_month.items()):
|
||||
output_dir = Path(f"ledger/transactions/{year}")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_file = output_dir / f"{month}.beancount"
|
||||
|
||||
existing_content = ""
|
||||
if output_file.exists():
|
||||
with open(output_file, 'r') as f:
|
||||
existing_content = f.read()
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
if existing_content:
|
||||
f.write(existing_content)
|
||||
if not existing_content.endswith('\n'):
|
||||
f.write('\n')
|
||||
f.write('\n'.join(month_transactions))
|
||||
f.write('\n')
|
||||
|
||||
if (year, month) == last_month_key and last_date and balance:
|
||||
f.write(f'\n{last_date} balance Liabilities:Credit:Renta4:PolissaCredit {
|
||||
balance} EUR\n')
|
||||
|
||||
print(f"Saved statements to {output_file}")
|
||||
|
||||
|
||||
def filter_markdown_by_date(markdown_report: str, from_date: str) -> str:
|
||||
"""
|
||||
Filter markdown table to only include rows with dates >= from_date.
|
||||
"""
|
||||
from datetime import datetime
|
||||
|
||||
if not from_date:
|
||||
return markdown_report
|
||||
|
||||
try:
|
||||
filter_date = datetime.strptime(from_date, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
print(f"Warning: Invalid date format '{
|
||||
from_date}'. Expected YYYY-MM-DD. Ignoring filter.")
|
||||
return markdown_report
|
||||
|
||||
lines = markdown_report.split('\n')
|
||||
filtered_lines = []
|
||||
|
||||
for line in lines:
|
||||
if '|' not in line:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
parts = line.split('|')
|
||||
if len(parts) < 2:
|
||||
filtered_lines.append(line)
|
||||
continue
|
||||
|
||||
fecha_col = parts[1].strip()
|
||||
date_match = re.search(r'(\d{2})/(\d{2})/(\d{4})', fecha_col)
|
||||
|
||||
if date_match:
|
||||
line_date = datetime.strptime(f"{date_match.group(
|
||||
3)}-{date_match.group(2)}-{date_match.group(1)}", "%Y-%m-%d")
|
||||
if line_date >= filter_date:
|
||||
filtered_lines.append(line)
|
||||
else:
|
||||
filtered_lines.append(line)
|
||||
|
||||
return '\n'.join(filtered_lines)
|
||||
|
||||
|
||||
def convert_file_to_markdown(path: str):
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(path)
|
||||
return result.document.export_to_markdown()
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse R4 movement statements from XLSX format")
|
||||
parser.add_argument("source", help="Path to the input XLSX file")
|
||||
parser.add_argument("--from", dest="from_date",
|
||||
help="Filter transactions from this date (YYYY-MM-DD)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.source.endswith(".xlsx"):
|
||||
parser.error("Input file must have .xlsx format")
|
||||
|
||||
markdown_report = convert_file_to_markdown(args.source)
|
||||
|
||||
movements_table = extract_movements_table(markdown_report)
|
||||
|
||||
if not movements_table:
|
||||
print("Error: Could not find MOVIMIENTOS DEL PERIODO EN EUROS table")
|
||||
return
|
||||
|
||||
last_date, balance = extract_balance_and_last_date(movements_table)
|
||||
print(f"Extracted balance: {balance} on date: {last_date}")
|
||||
|
||||
if args.from_date:
|
||||
movements_table = filter_markdown_by_date(
|
||||
movements_table, args.from_date)
|
||||
|
||||
beancount_statements = await get_beancount_statements(movements_table)
|
||||
print(f"Final result: \n{beancount_statements}")
|
||||
|
||||
clean_beancount_statements = parse_response(beancount_statements)
|
||||
save_statements(clean_beancount_statements, last_date, balance)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -31,6 +31,12 @@
|
||||
Assets:Liquid:Caixabank:Corrent 530 EUR
|
||||
Expenses:Lloguer -530 EUR
|
||||
Equity:LloguerMiquel 530 EUR
|
||||
2025-10-01 * "R4" "INTERESES TERCER TRIMESTRE DE 2025"
|
||||
Expenses:R4:Interessos 0.16 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-01 * "R4" "INTERESES TERCER TRIMESTRE DE 2025"
|
||||
Expenses:R4:Interessos 175.23 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-02 * "Aigues de Barcelona" "Factura Aigua"
|
||||
Expenses:FacturesUtilitats 47.95 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
@@ -49,9 +55,18 @@
|
||||
2025-10-04 * "Finques Samso" "FINQ.SAMSO S.L Recibo de fincas, alquileres"
|
||||
Expenses:Lloguer 1146.20 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
2025-10-04 * "OKASHII OSONA" "OKASHII OSONA - C. DUES SOLES 6, VIC"
|
||||
Expenses:MenjarFora 49.50 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
2025-10-06 * "Wellhub" "Wellhub EU"
|
||||
Expenses:Gimnàs 35.99 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
2025-10-06 * "365 T173" "365 T173"
|
||||
Expenses:MenjarFora 2.30 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
2025-10-07 * "DON ZANGANO" "DON ZANGANO"
|
||||
Expenses:MenjarFora 5.10 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
2025-10-07 * "Plus Barcelona" "PLUS BARCELONA -"
|
||||
Expenses:Supermercat 2.86 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
@@ -64,18 +79,47 @@
|
||||
2025-10-07 * "Plus Teodora Lama" "PLUS TEODORA LAMA"
|
||||
Expenses:Supermercat 24.85 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
2025-10-07 * "AL PASATORE" "AL PASATORE"
|
||||
Expenses:MenjarFora 12.50 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
2025-10-07 * "BLT" "BLT - GV. AUGUSTA BJ 187, BARCELONA"
|
||||
Expenses:MenjarFora 1.55 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
2025-10-09 * "Comercializadora Gas Power" "COMERCIALIZADORA GAS POWER11877106871025"
|
||||
Income:Other:Devolucions -11.96 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
2025-10-09 * "R4" "COMISION CUSTODIA INTERNACIONAL JUL-SEP 2025"
|
||||
Expenses:R4:Comissions 18.43 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-09 * "R4" "COMISION CUSTODIA IIC JUL-SEP 2025"
|
||||
Expenses:R4:Comissions 30.18 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-09 * "R4" "IVA CUSTODIAS JUL-SEP 2025"
|
||||
Expenses:R4:Comissions 10.21 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-09 * "R4" "COMISION MANTENIMIENTO JUL-SEP 2025"
|
||||
Expenses:R4:Comissions 9.00 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-09 * "R4" "IVA MANTENIMIENTO JUL-SEP 2025"
|
||||
Expenses:R4:Comissions 1.89 EUR
|
||||
Liabilities:Credit:Renta4:PolissaCredit
|
||||
2025-10-09 * "Nintendo" "Pokemon Legends ZA DLC"
|
||||
Expenses:Entreteniment 29.99 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
2025-10-09 * "Nintendo" "Pokemon Legends ZA"
|
||||
Expenses:Entreteniment 69.99 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
2025-10-09 * "Aerobus" "Anada i tornada aerobus"
|
||||
Expenses:Mobilitat 12.85 EUR
|
||||
Assets:Benefits:Edenred:TargetaTransport
|
||||
2025-10-09 * "Burger King" "Sopar aeroport"
|
||||
Expenses:MenjarFora 14.41 EUR
|
||||
Assets:Benefits:Edenred:TicketsRestaurant
|
||||
2025-10-11 * "ROGER ORIOL PEREZ" "TRANSF. RECIBIDA DE ROGER ORIOL PEREZ"
|
||||
Liabilities:Credit:Renta4:PolissaCredit 150.00 EUR
|
||||
Assets:Liquid:Caixabank:Corrent
|
||||
|
||||
|
||||
2025-11-01 balance Assets:Liquid:Caixabank:Corrent 11739.32 EUR
|
||||
2025-11-01 balance Assets:Liquid:Caixabank:Corrent 11589.32 EUR
|
||||
2025-11-01 balance Assets:Liquid:R4:EUR 0 EUR
|
||||
2025-11-01 balance Assets:Invest:Fund:Vanguard:EMMK 14.99 VANEMMK
|
||||
2025-11-01 balance Assets:Invest:Fund:Vanguard:GL 755.40 VANGL
|
||||
@@ -83,8 +127,8 @@
|
||||
2025-11-01 balance Assets:Invest:ETF:IWVL 430 IWVL
|
||||
2025-11-01 balance Assets:Invest:Fixed:R4RF 1518.57004 R4RF
|
||||
2025-11-01 balance Assets:Invest:ETF:XDEQ 264 XDEQ
|
||||
2025-11-01 balance Assets:Benefits:Edenred:TicketsRestaurant 231.55 EUR
|
||||
2025-11-01 balance Assets:Benefits:Edenred:TargetaTransport 165.85 EUR
|
||||
2025-11-01 balance Assets:Benefits:Edenred:TicketsRestaurant 146.19 EUR
|
||||
2025-11-01 balance Assets:Benefits:Edenred:TargetaTransport 153.00 EUR
|
||||
2025-11-01 balance Assets:Benefits:DZP:PPEZurich 3833.46 EUR
|
||||
2025-11-01 balance Assets:PersonalProperty:VivendaPrincipal 0 EUR
|
||||
2025-11-01 balance Assets:PersonalProperty:Cotxe 10000 EUR
|
||||
@@ -92,4 +136,4 @@
|
||||
2025-11-01 balance Assets:PersonalProperty:MetallsPreciosos 0 EUR
|
||||
2025-11-01 balance Assets:PersonalProperty:AltresPropietats 0 EUR
|
||||
2025-11-01 balance Liabilities:Credit:Caixabank:TargetaCredit 0 EUR
|
||||
2025-11-01 balance Liabilities:Credit:Renta4:PolissaCredit -19876.16 EUR
|
||||
2025-11-01 balance Liabilities:Credit:Renta4:PolissaCredit -19971.26 EUR
|
||||
|
||||
Reference in New Issue
Block a user