import re
from pathlib import Path

path = Path(r'd:\kreuzberg\pipeline_claim\index.html')
text = path.read_text(encoding='utf-8', errors='replace')

# Add "Open File" link per doc-card using data-file to derive actual file path
pattern = re.compile(r'(<a class="doc-link doc-open"[^>]*data-file="doc_viewer\.html\?file=([^"]+)"[^>]*>[^<]*</a>)', re.MULTILINE)

def repl(match):
    link = match.group(1)
    file_param = match.group(2)
    # strip page param if present
    if '&page=' in file_param:
        file_param = file_param.split('&page=')[0]
    open_link = f'<a class="doc-link doc-open-file" href="{file_param}" target="_blank" rel="noopener">Open File</a>'
    return link + '\n        ' + open_link

# Only add if not already present
if 'doc-open-file' not in text:
    text = pattern.sub(repl, text)

path.write_text(text, encoding='utf-8')
print('added Open File links')
