#!/usr/bin/env python3

import os
import argparse
import asyncio
import re
from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage
from docling.document_converter import DocumentConverter

GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: R4 Movements to Beancount Parser

You are a specialized financial transaction parser that converts R4 account movements into Beancount format.

## Input Format

You will receive a table with the following columns:
- **FECHA**: Transaction date (DD/MM/YYYY format)
- **CONCEPTO**: Transaction description
- **MOVIMIENTOS**: Amount (negative for expenses/debits, positive for income/credits)
- **SALDO**: Account balance after transaction

Example input:
```
| FECHA      | CONCEPTO                                     | MOVIMIENTOS | SALDO      |
|------------|----------------------------------------------|-------------|------------|
| 01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025           | -0.16       | -19876.32  |
| 11/10/2025 | TRANSF. RECIBIDA DE ROGER ORIOL PEREZ        | 150         | -19971.3   |
```

Warning: The table might be incorrecly parsed and may appear split. For example:
```
| MOVIMIENTOS DEL PERIODO EN EUROS   |
|------------------------------------|
| FECHA                              |

| CONCEPTO                                     | MOVIMIENTOS   |    SALDO |
|----------------------------------------------|---------------|----------|
| SALDO ANTERIOR                               | -19876.16     | -19876.2 |
| INTERESES TERCER TRIMESTRE DE 2025           | -0.16         | -19876.3 |
| INTERESES TERCER TRIMESTRE DE 2025           | -175.23       | -20051.5 |
| SALDO FINAL                                  | None          | -19971.3 |

| 01/10/2025   | INTERESES TERCER TRIMESTRE DE 2025           |   -0.16 |   -19876.32 |
|--------------|----------------------------------------------|---------|-------------|
| 01/10/2025   | INTERESES TERCER TRIMESTRE DE 2025           | -175.23 |    -20051.5 |
```
You should interpet the previous table like this:
```
| MOVIMIENTOS DEL PERIODO EN EUROS   |
|------------------------------------|
| FECHA        | CONCEPTO                                     | MOVIMIENTOS   |    SALDO |
|-------------------------------------------------------------|---------------|----------|
|              | SALDO ANTERIOR                               | -19876.16     | -19876.2 |
| 01/10/2025   | INTERESES TERCER TRIMESTRE DE 2025           | -0.16         | -19876.3 |
| 01/10/2025   | INTERESES TERCER TRIMESTRE DE 2025           | -175.23       | -20051.5 |
|              | SALDO FINAL                                  | None          | -19971.3 |
```

## Output Format

Convert each transaction into a Beancount entry with this structure:

```
YYYY-MM-DD * "Payee" "Description"
  Account1  AMOUNT EUR
  Account2
```

### Rules for Conversion

1. **Date**: Convert from DD/MM/YYYY to YYYY-MM-DD format
2. **Flag**: Always use `*` (cleared transaction)
3. **Payee**: Extract the main entity from "CONCEPTO" field
4. **Description**: Use the full "CONCEPTO" text as the description
5. **Amount**: Use the absolute value of "MOVIMIENTOS"
6. **Currency**: Always use EUR
7. **Source Account**: Always use `Liabilities:Credit:Renta4:PolissaCredit` as one of the accounts

### Account Classification

**Available Expense Accounts:**
- Expenses:R4:Comissions
- Expenses:R4:Interessos

**Available Income Accounts:**
- Income:Invest:R4:Dividends
- Income:Invest:R4:CapitalGains
- Income:Invest:R4:CapitalGains:Untaxable

**Available Tax Accounts:**
- Expenses:Taxes:BeneficisDividends
- Expenses:Taxes:BeneficisDividendsOrigen

**Transfer Account:**
- Assets:Liquid:Caixabank:Corrent

### Transaction Type Rules

1. **Commissions** (COMISION): Use `Expenses:R4:Comissions`
2. **Interest** (INTERESES): Use `Expenses:R4:Interessos`
3. **Dividends**: Use `Income:Invest:R4:Dividends`
4. **Capital Gains**: Use `Income:Invest:R4:CapitalGains` or `Income:Invest:R4:CapitalGains:Untaxable`
5. **Dividend Tax**: Use `Expenses:Taxes:BeneficisDividends` or `Expenses:Taxes:BeneficisDividendsOrigen`
6. **IVA**: Use `Expenses:R4:Comissions`
7. **Received Transfer from ROGER ORIOL PEREZ**: 
   - Credit: `Liabilities:Credit:Renta4:PolissaCredit` with amount
   - Debit: `Assets:Liquid:Caixabank:Corrent`

### Special Cases

- For negative amounts in MOVIMIENTOS: debit the expense account, credit `Liabilities:Credit:Renta4:PolissaCredit`
- For positive amounts in MOVIMIENTOS: debit `Liabilities:Credit:Renta4:PolissaCredit`, credit the income account
- For transfers from ROGER ORIOL PEREZ: debit `Liabilities:Credit:Renta4:PolissaCredit`, credit `Assets:Liquid:Caixabank:Corrent`

## Examples

**Input:**
```
01/10/2025 | INTERESES TERCER TRIMESTRE DE 2025 | -0.16 | -19876.32
```

**Output:**
```
2025-10-01 * "R4" "INTERESES TERCER TRIMESTRE DE 2025"
  Expenses:R4:Interessos  0.16 EUR
  Liabilities:Credit:Renta4:PolissaCredit
```

**Input:**
```
11/10/2025 | TRANSF. RECIBIDA DE ROGER ORIOL PEREZ | 150 | -19971.3
```

**Output:**
```
2025-10-11 * "ROGER ORIOL PEREZ" "TRANSF. RECIBIDA DE ROGER ORIOL PEREZ"
  Liabilities:Credit:Renta4:PolissaCredit  150.00 EUR
  Assets:Liquid:Caixabank:Corrent
```

## Output Requirements

- Process all transactions in the input table
- Skip rows with "SALDO ANTERIOR" or "SALDO FINAL" in CONCEPTO
- Maintain chronological order
- Ensure proper indentation (2 spaces for posting lines)
- Be consistent with account naming conventions
- Only output Beancount code, explanations are not needed.

## Your Task
Parse the provided R4 movements table and generate the corresponding Beancount statements. Output only the Beancount code.
"""


async def get_beancount_statements(markdown_report: str) -> str:
    options = ClaudeAgentOptions(
        system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT,
        cwd=os.getcwd()
    )

    result = None
    async for message in query(
        prompt=f"Convert this R4 movements table to beancount statements:\n\n{
            markdown_report}",
        options=options
    ):
        if isinstance(message, ResultMessage) and message.subtype == "success":
            result = message.result
        else:
            print(message)

    if result is not None and isinstance(result, str):
        return result
    else:
        raise ValueError(
            "Unable to get Beancount statements from the report!")


def parse_response(beancount_statements: str):
    """
        The input beancount statements might be inside a markdown beancount
        code block or in plain text.
    """
    code_block_pattern = r'```(?:beancount)?\n(.*?)```'
    match = re.search(code_block_pattern, beancount_statements, re.DOTALL)
    if match:
        content = match.group(1)
    else:
        content = beancount_statements
    return content


def extract_movements_table(markdown_report: str) -> str:
    """
        Extract the MOVIMIENTOS DEL PERIODO EN EUROS table from the markdown.
    """
    lines = markdown_report.split('\n')

    in_movements_section = False
    table_lines = []

    for i, line in enumerate(lines):
        if 'MOVIMIENTOS DEL PERIODO EN EUROS' in line:
            in_movements_section = True
            continue

        if in_movements_section:
            if line.strip() and ('|' in line or line.startswith('FECHA')):
                table_lines.append(line)
            elif 'RESUMEN DE RESULTADOS' in line:
                break

    return '\n'.join(table_lines)


def extract_balance_and_last_date(markdown_report: str) -> tuple[str, str]:
    """
        Extract the final balance from SALDO FINAL row and the date of the last transaction.
        Returns (last_date, balance) tuple.
    """
    lines = markdown_report.split('\n')

    balance = ""
    last_date = ""

    for line in lines:
        if '|' not in line:
            continue

        parts = line.split('|')

        if len(parts) >= 3 and 'SALDO FINAL' in line:
            balance_str = parts[-2].strip()
            balance = balance_str.replace(',', '.').replace(' ', '')

        if len(parts) >= 2:
            fecha_col = parts[1].strip()
            date_match = re.search(r'(\d{2})/(\d{2})/(\d{4})', fecha_col)
            if date_match:
                current_date = f"{date_match.group(
                    3)}-{date_match.group(2)}-{date_match.group(1)}"
                if not last_date or current_date > last_date:
                    last_date = current_date

    return last_date, balance


def save_statements(beancount_statements: str, last_date: str, balance: str):
    """
        The statements are saved in beancount files in
        ledger/transactions/YYYY/MM.beancount.
        Statements are sorted chronologically and split by month if they
        span multiple months.
        A balance assertion is added at the end of the last month's file.
    """
    from pathlib import Path
    from collections import defaultdict

    if not beancount_statements.strip():
        print("Warning: No valid statements to save")
        return

    lines = beancount_statements.strip().split('\n')

    transactions = []
    current_transaction = []

    for line in lines:
        if re.match(r'^\d{4}-\d{2}-\d{2}', line):
            if current_transaction:
                transactions.append('\n'.join(current_transaction))
            current_transaction = [line]
        elif current_transaction:
            current_transaction.append(line)

    if current_transaction:
        transactions.append('\n'.join(current_transaction))

    transactions.sort(key=lambda t: re.match(
        r'^(\d{4}-\d{2}-\d{2})', t).group(1))

    transactions_by_month = defaultdict(list)
    for transaction in transactions:
        date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction)
        if date_match:
            year = date_match.group(1)
            month = date_match.group(2)
            key = (year, month)
            transactions_by_month[key].append(transaction)

    last_month_key = max(transactions_by_month.keys()
                         ) if transactions_by_month else None

    for (year, month), month_transactions in sorted(transactions_by_month.items()):
        output_dir = Path(f"ledger/transactions/{year}")
        output_dir.mkdir(parents=True, exist_ok=True)

        output_file = output_dir / f"{month}.beancount"

        existing_content = ""
        if output_file.exists():
            with open(output_file, 'r') as f:
                existing_content = f.read()

        with open(output_file, 'w') as f:
            if existing_content:
                f.write(existing_content)
                if not existing_content.endswith('\n'):
                    f.write('\n')
            f.write('\n'.join(month_transactions))
            f.write('\n')

            if (year, month) == last_month_key and last_date and balance:
                f.write(f'\n{last_date} balance Liabilities:Credit:Renta4:PolissaCredit  {
                        balance} EUR\n')

        print(f"Saved statements to {output_file}")


def filter_markdown_by_date(markdown_report: str, from_date: str) -> str:
    """
        Filter markdown table to only include rows with dates >= from_date.
    """
    from datetime import datetime

    if not from_date:
        return markdown_report

    try:
        filter_date = datetime.strptime(from_date, "%Y-%m-%d")
    except ValueError:
        print(f"Warning: Invalid date format '{
              from_date}'. Expected YYYY-MM-DD. Ignoring filter.")
        return markdown_report

    lines = markdown_report.split('\n')
    filtered_lines = []

    for line in lines:
        if '|' not in line:
            filtered_lines.append(line)
            continue

        parts = line.split('|')
        if len(parts) < 2:
            filtered_lines.append(line)
            continue

        fecha_col = parts[1].strip()
        date_match = re.search(r'(\d{2})/(\d{2})/(\d{4})', fecha_col)

        if date_match:
            line_date = datetime.strptime(f"{date_match.group(
                3)}-{date_match.group(2)}-{date_match.group(1)}", "%Y-%m-%d")
            if line_date >= filter_date:
                filtered_lines.append(line)
        else:
            filtered_lines.append(line)

    return '\n'.join(filtered_lines)


def convert_file_to_markdown(path: str):
    converter = DocumentConverter()
    result = converter.convert(path)
    return result.document.export_to_markdown()


async def main():
    parser = argparse.ArgumentParser(
        description="Parse R4 movement statements from XLSX format")
    parser.add_argument("source", help="Path to the input XLSX file")
    parser.add_argument("--from", dest="from_date",
                        help="Filter transactions from this date (YYYY-MM-DD)")
    args = parser.parse_args()

    if not args.source.endswith(".xlsx"):
        parser.error("Input file must have .xlsx format")

    markdown_report = convert_file_to_markdown(args.source)

    movements_table = extract_movements_table(markdown_report)

    if not movements_table:
        print("Error: Could not find MOVIMIENTOS DEL PERIODO EN EUROS table")
        return

    last_date, balance = extract_balance_and_last_date(movements_table)
    print(f"Extracted balance: {balance} on date: {last_date}")

    if args.from_date:
        movements_table = filter_markdown_by_date(
            movements_table, args.from_date)

    beancount_statements = await get_beancount_statements(movements_table)
    print(f"Final result: \n{beancount_statements}")

    clean_beancount_statements = parse_response(beancount_statements)
    save_statements(clean_beancount_statements, last_date, balance)


if __name__ == "__main__":
    asyncio.run(main())