parse-r4-report script to parse r4 investment prices
This commit is contained in:
179
commands/parse-r4-report
Normal file
179
commands/parse-r4-report
Normal file
@@ -0,0 +1,179 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import asyncio
|
||||
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
GET_BEANCOUNT_STATEMENTS_PROMPT = """# Financial Data to Beancount Converter
|
||||
|
||||
You are an expert in converting markdown financial data tables to beancount price statements.
|
||||
|
||||
## Task
|
||||
Parse markdown tables containing financial portfolio data and convert them to Beancount price statements.
|
||||
|
||||
## Input Format
|
||||
You will receive markdown tables with the following sections:
|
||||
- **ETFs**: Contains ETF holdings with descriptions, markets, currency, shares, and valuations
|
||||
- **Fondos de Inversión** (Investment Funds): Contains fund holdings with descriptions, manager, currency, units, and valuations
|
||||
- **Saldo en EUR**: Contains cash balances (should be ignored for price statements)
|
||||
|
||||
The date appears in the format `DD/MM/YYYY HH:MM:SS` near the top of each section.
|
||||
|
||||
## Commodity Mapping
|
||||
Map the Spanish fund/ETF descriptions to the following commodity codes:
|
||||
|
||||
- "VANGUARD GLOBAL STOCK INDEX (EUR) ACC" → VANGL
|
||||
- "VANGUARD GB SMALL-CAP IDX \"INV\" (EUR)" → VANSMCAP
|
||||
- "ISHARES EDGE MSCI WORLD VALUE" → IWVL
|
||||
- "X MSCI WORLD QUALITY ETF 1C" → XDEQ
|
||||
- "RENTA 4 RENTA FIJA EURO, FI CLASE A" → R4RF
|
||||
- "VANGUARD EM MK ST IX \"INV\" (EUR)" → VANEMMK
|
||||
|
||||
## Output Format
|
||||
Generate Beancount price statements in this format:
|
||||
YYYY-MM-DD price COMMODITY PRICE CURRENCY
|
||||
|
||||
## Rules
|
||||
1. Extract the date from the table (format: DD/MM/YYYY) and convert to ISO format (YYYY-MM-DD)
|
||||
2. For each ETF or Fund entry:
|
||||
- Match the "Descripción" field to the commodity mapping
|
||||
- Extract the price from "Precio actual" (ETFs) or "Valor liquidativo" (Funds)
|
||||
- Use the currency specified in the "Divisa" column
|
||||
3. Ignore the "Saldo en EUR" (cash balance) section - do not create price statements for cash
|
||||
4. Only output Beancount statements, no explanations or additional text
|
||||
5. If a description doesn't match any commodity in the mapping, skip it or ask for clarification
|
||||
|
||||
## Example
|
||||
Given this input data dated 04/10/2025 for "VANGUARD GLOBAL STOCK INDEX (EUR) ACC" that costs 54.12 EUR per participation:
|
||||
|
||||
Output:
|
||||
2025-10-04 price VANGL 54.12 EUR
|
||||
|
||||
## Your Task
|
||||
Parse the provided financial data tables and generate the corresponding Beancount price statements. Output only the Beancount code.
|
||||
"""
|
||||
|
||||
|
||||
def convert_file_to_markdown(path: str):
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(path)
|
||||
return result.document.export_to_markdown()
|
||||
|
||||
|
||||
async def get_beancount_price_statements(r4_report: str) -> str:
|
||||
options = ClaudeAgentOptions(
|
||||
system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT,
|
||||
cwd="/home/roger/Projectes/contabilitat-2024/"
|
||||
)
|
||||
|
||||
result = None
|
||||
async for message in query(
|
||||
prompt=f"Convert this financial statement to beancount price statements:\n{
|
||||
r4_report}",
|
||||
options=options
|
||||
):
|
||||
if isinstance(message, ResultMessage) and message.subtype == "success":
|
||||
result = message.result
|
||||
else:
|
||||
print(message)
|
||||
|
||||
if result is not None and isinstance(result, str):
|
||||
return result
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unable to get Beancount price statements from the report!")
|
||||
|
||||
|
||||
def parse_price_statements(beancount_statements: str):
|
||||
"""
|
||||
The input beancount statements might be inside a markdown beancount code block
|
||||
or in plain text. After parsing them, we check that each line meets the
|
||||
format YYYY-MM-DD price COMMODITY PRICE CURRENCY.
|
||||
All lines that don't meet the criteria are dropped and are logged as errors.
|
||||
"""
|
||||
import re
|
||||
|
||||
# Extract content from markdown code block if present
|
||||
code_block_pattern = r'```(?:beancount)?\n(.*?)```'
|
||||
match = re.search(code_block_pattern, beancount_statements, re.DOTALL)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
else:
|
||||
content = beancount_statements
|
||||
|
||||
# Pattern for valid beancount price statement: YYYY-MM-DD price COMMODITY PRICE CURRENCY
|
||||
price_pattern = r'^\d{4}-\d{2}-\d{2}\s+price\s+[A-Z0-9]+\s+[\d.]+\s+[A-Z]+$'
|
||||
|
||||
valid_statements = []
|
||||
for line in content.strip().split('\n'):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if re.match(price_pattern, line):
|
||||
valid_statements.append(line)
|
||||
else:
|
||||
print(f"Warning: Dropping invalid line: {line}")
|
||||
|
||||
return '\n'.join(valid_statements)
|
||||
|
||||
|
||||
def save_price_statements(beancount_statements: str):
|
||||
"""
|
||||
The statements are saved in a beancount file in ledger/trading/YYYY/MM.beancount.
|
||||
The year and month are extracted from the first beancount statement in the input.
|
||||
The file is created if it doesn't exist or the statements are appended to the
|
||||
end of the file if it already exists.
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
if not beancount_statements.strip():
|
||||
print("Warning: No valid statements to save")
|
||||
return
|
||||
|
||||
# Extract date from first statement (format: YYYY-MM-DD price ...)
|
||||
first_line = beancount_statements.strip().split('\n')[0]
|
||||
date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', first_line)
|
||||
|
||||
if not date_match:
|
||||
print(f"Error: Could not extract date from first statement: {
|
||||
first_line}")
|
||||
return
|
||||
|
||||
year = date_match.group(1)
|
||||
month = date_match.group(2)
|
||||
|
||||
# Create directory structure if it doesn't exist
|
||||
output_dir = Path(f"ledger/trading/{year}")
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Define output file path
|
||||
output_file = output_dir / f"{month}.beancount"
|
||||
|
||||
# Append statements to file (create if doesn't exist)
|
||||
with open(output_file, 'a') as f:
|
||||
f.write(beancount_statements)
|
||||
f.write('\n')
|
||||
|
||||
print(f"Saved price statements to {output_file}")
|
||||
|
||||
|
||||
async def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Parse R4 report from XLSX format")
|
||||
parser.add_argument("source", help="Path to the input XLSX file")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.source.endswith(".xlsx"):
|
||||
parser.error("Input file must have .xlsx format")
|
||||
|
||||
markdown_report = convert_file_to_markdown(args.source)
|
||||
beancount_statements = await get_beancount_price_statements(markdown_report)
|
||||
print(f"Final result: \n{beancount_statements}")
|
||||
|
||||
clean_beancount_statements = parse_price_statements(beancount_statements)
|
||||
save_price_statements(clean_beancount_statements)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
6
ledger/trading/2025/10.beancount
Normal file
6
ledger/trading/2025/10.beancount
Normal file
@@ -0,0 +1,6 @@
|
||||
2025-10-04 price IWVL 47.49 EUR
|
||||
2025-10-04 price XDEQ 67.88 EUR
|
||||
2025-10-04 price VANGL 54.12 EUR
|
||||
2025-10-04 price R4RF 15.58 EUR
|
||||
2025-10-04 price VANSMCAP 371.37 EUR
|
||||
2025-10-04 price VANEMMK 240.14 EUR
|
||||
@@ -1,2 +1,2 @@
|
||||
include "2024/*"
|
||||
include "*/*"
|
||||
include "prices.beancount"
|
||||
@@ -7,16 +7,18 @@ requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"babel==2.13.1",
|
||||
"beancount==2.3.6",
|
||||
"beautifulsoup4==4.12.2",
|
||||
"beautifulsoup4==4.12.3",
|
||||
"blinker==1.7.0",
|
||||
"bottle==0.12.25",
|
||||
"budget-report==0.4",
|
||||
"cachetools==5.3.2",
|
||||
"certifi==2023.11.17",
|
||||
"certifi==2024.7.4",
|
||||
"chardet==5.2.0",
|
||||
"charset-normalizer==3.3.2",
|
||||
"cheroot==10.0.0",
|
||||
"claude-agent-sdk>=0.1.0",
|
||||
"click==8.1.7",
|
||||
"docling==2.55.1",
|
||||
"fava==1.26.2",
|
||||
"fava-plugins==1.0",
|
||||
"flask==3.0.0",
|
||||
@@ -48,7 +50,7 @@ dependencies = [
|
||||
"python-dateutil==2.8.2",
|
||||
"python-magic==0.4.27",
|
||||
"pytz==2023.3.post1",
|
||||
"requests==2.31.0",
|
||||
"requests==2.32.2",
|
||||
"rsa==4.9",
|
||||
"simplejson==3.19.2",
|
||||
"six==1.16.0",
|
||||
|
||||
Reference in New Issue
Block a user