180 lines
6.4 KiB
Plaintext
180 lines
6.4 KiB
Plaintext
|
|
#!/usr/bin/env python3
|
||
|
|
import argparse
|
||
|
|
import asyncio
|
||
|
|
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage
|
||
|
|
from docling.document_converter import DocumentConverter
|
||
|
|
|
||
|
|
GET_BEANCOUNT_STATEMENTS_PROMPT = """# Financial Data to Beancount Converter
|
||
|
|
|
||
|
|
You are an expert in converting markdown financial data tables to beancount price statements.
|
||
|
|
|
||
|
|
## Task
|
||
|
|
Parse markdown tables containing financial portfolio data and convert them to Beancount price statements.
|
||
|
|
|
||
|
|
## Input Format
|
||
|
|
You will receive markdown tables with the following sections:
|
||
|
|
- **ETFs**: Contains ETF holdings with descriptions, markets, currency, shares, and valuations
|
||
|
|
- **Fondos de Inversión** (Investment Funds): Contains fund holdings with descriptions, manager, currency, units, and valuations
|
||
|
|
- **Saldo en EUR**: Contains cash balances (should be ignored for price statements)
|
||
|
|
|
||
|
|
The date appears in the format `DD/MM/YYYY HH:MM:SS` near the top of each section.
|
||
|
|
|
||
|
|
## Commodity Mapping
|
||
|
|
Map the Spanish fund/ETF descriptions to the following commodity codes:
|
||
|
|
|
||
|
|
- "VANGUARD GLOBAL STOCK INDEX (EUR) ACC" → VANGL
|
||
|
|
- "VANGUARD GB SMALL-CAP IDX \"INV\" (EUR)" → VANSMCAP
|
||
|
|
- "ISHARES EDGE MSCI WORLD VALUE" → IWVL
|
||
|
|
- "X MSCI WORLD QUALITY ETF 1C" → XDEQ
|
||
|
|
- "RENTA 4 RENTA FIJA EURO, FI CLASE A" → R4RF
|
||
|
|
- "VANGUARD EM MK ST IX \"INV\" (EUR)" → VANEMMK
|
||
|
|
|
||
|
|
## Output Format
|
||
|
|
Generate Beancount price statements in this format:
|
||
|
|
YYYY-MM-DD price COMMODITY PRICE CURRENCY
|
||
|
|
|
||
|
|
## Rules
|
||
|
|
1. Extract the date from the table (format: DD/MM/YYYY) and convert to ISO format (YYYY-MM-DD)
|
||
|
|
2. For each ETF or Fund entry:
|
||
|
|
- Match the "Descripción" field to the commodity mapping
|
||
|
|
- Extract the price from "Precio actual" (ETFs) or "Valor liquidativo" (Funds)
|
||
|
|
- Use the currency specified in the "Divisa" column
|
||
|
|
3. Ignore the "Saldo en EUR" (cash balance) section - do not create price statements for cash
|
||
|
|
4. Only output Beancount statements, no explanations or additional text
|
||
|
|
5. If a description doesn't match any commodity in the mapping, skip it or ask for clarification
|
||
|
|
|
||
|
|
## Example
|
||
|
|
Given this input data dated 04/10/2025 for "VANGUARD GLOBAL STOCK INDEX (EUR) ACC" that costs 54.12 EUR per participation:
|
||
|
|
|
||
|
|
Output:
|
||
|
|
2025-10-04 price VANGL 54.12 EUR
|
||
|
|
|
||
|
|
## Your Task
|
||
|
|
Parse the provided financial data tables and generate the corresponding Beancount price statements. Output only the Beancount code.
|
||
|
|
"""
|
||
|
|
|
||
|
|
|
||
|
|
def convert_file_to_markdown(path: str):
|
||
|
|
converter = DocumentConverter()
|
||
|
|
result = converter.convert(path)
|
||
|
|
return result.document.export_to_markdown()
|
||
|
|
|
||
|
|
|
||
|
|
async def get_beancount_price_statements(r4_report: str) -> str:
|
||
|
|
options = ClaudeAgentOptions(
|
||
|
|
system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT,
|
||
|
|
cwd="/home/roger/Projectes/contabilitat-2024/"
|
||
|
|
)
|
||
|
|
|
||
|
|
result = None
|
||
|
|
async for message in query(
|
||
|
|
prompt=f"Convert this financial statement to beancount price statements:\n{
|
||
|
|
r4_report}",
|
||
|
|
options=options
|
||
|
|
):
|
||
|
|
if isinstance(message, ResultMessage) and message.subtype == "success":
|
||
|
|
result = message.result
|
||
|
|
else:
|
||
|
|
print(message)
|
||
|
|
|
||
|
|
if result is not None and isinstance(result, str):
|
||
|
|
return result
|
||
|
|
else:
|
||
|
|
raise ValueError(
|
||
|
|
"Unable to get Beancount price statements from the report!")
|
||
|
|
|
||
|
|
|
||
|
|
def parse_price_statements(beancount_statements: str):
|
||
|
|
"""
|
||
|
|
The input beancount statements might be inside a markdown beancount code block
|
||
|
|
or in plain text. After parsing them, we check that each line meets the
|
||
|
|
format YYYY-MM-DD price COMMODITY PRICE CURRENCY.
|
||
|
|
All lines that don't meet the criteria are dropped and are logged as errors.
|
||
|
|
"""
|
||
|
|
import re
|
||
|
|
|
||
|
|
# Extract content from markdown code block if present
|
||
|
|
code_block_pattern = r'```(?:beancount)?\n(.*?)```'
|
||
|
|
match = re.search(code_block_pattern, beancount_statements, re.DOTALL)
|
||
|
|
if match:
|
||
|
|
content = match.group(1)
|
||
|
|
else:
|
||
|
|
content = beancount_statements
|
||
|
|
|
||
|
|
# Pattern for valid beancount price statement: YYYY-MM-DD price COMMODITY PRICE CURRENCY
|
||
|
|
price_pattern = r'^\d{4}-\d{2}-\d{2}\s+price\s+[A-Z0-9]+\s+[\d.]+\s+[A-Z]+$'
|
||
|
|
|
||
|
|
valid_statements = []
|
||
|
|
for line in content.strip().split('\n'):
|
||
|
|
line = line.strip()
|
||
|
|
if not line:
|
||
|
|
continue
|
||
|
|
if re.match(price_pattern, line):
|
||
|
|
valid_statements.append(line)
|
||
|
|
else:
|
||
|
|
print(f"Warning: Dropping invalid line: {line}")
|
||
|
|
|
||
|
|
return '\n'.join(valid_statements)
|
||
|
|
|
||
|
|
|
||
|
|
def save_price_statements(beancount_statements: str):
|
||
|
|
"""
|
||
|
|
The statements are saved in a beancount file in ledger/trading/YYYY/MM.beancount.
|
||
|
|
The year and month are extracted from the first beancount statement in the input.
|
||
|
|
The file is created if it doesn't exist or the statements are appended to the
|
||
|
|
end of the file if it already exists.
|
||
|
|
"""
|
||
|
|
import re
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
if not beancount_statements.strip():
|
||
|
|
print("Warning: No valid statements to save")
|
||
|
|
return
|
||
|
|
|
||
|
|
# Extract date from first statement (format: YYYY-MM-DD price ...)
|
||
|
|
first_line = beancount_statements.strip().split('\n')[0]
|
||
|
|
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', first_line)
|
||
|
|
|
||
|
|
if not date_match:
|
||
|
|
print(f"Error: Could not extract date from first statement: {
|
||
|
|
first_line}")
|
||
|
|
return
|
||
|
|
|
||
|
|
year = date_match.group(1)
|
||
|
|
month = date_match.group(2)
|
||
|
|
|
||
|
|
# Create directory structure if it doesn't exist
|
||
|
|
output_dir = Path(f"ledger/trading/{year}")
|
||
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
# Define output file path
|
||
|
|
output_file = output_dir / f"{month}.beancount"
|
||
|
|
|
||
|
|
# Append statements to file (create if doesn't exist)
|
||
|
|
with open(output_file, 'a') as f:
|
||
|
|
f.write(beancount_statements)
|
||
|
|
f.write('\n')
|
||
|
|
|
||
|
|
print(f"Saved price statements to {output_file}")
|
||
|
|
|
||
|
|
|
||
|
|
async def main():
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Parse R4 report from XLSX format")
|
||
|
|
parser.add_argument("source", help="Path to the input XLSX file")
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
if not args.source.endswith(".xlsx"):
|
||
|
|
parser.error("Input file must have .xlsx format")
|
||
|
|
|
||
|
|
markdown_report = convert_file_to_markdown(args.source)
|
||
|
|
beancount_statements = await get_beancount_price_statements(markdown_report)
|
||
|
|
print(f"Final result: \n{beancount_statements}")
|
||
|
|
|
||
|
|
clean_beancount_statements = parse_price_statements(beancount_statements)
|
||
|
|
save_price_statements(clean_beancount_statements)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
asyncio.run(main())
|