#!/usr/bin/env python3
"""
Metropoli BBS - PCBIndex to HTML Converter (Retro Edition)
==========================================================
This script generates static HTML index files for a directory tree, mimicking
the look and feel of a DOS-era BBS file listing.

Features:
- Recursive directory traversal (bottom-up).
- Parses legacy PCBoard file descriptions (_INDEX_ / 00_INDEX.TXT).
- Extracts and cleans descriptions from FILE_ID.DIZ in unpacked archives.
  (Reads from .src folder, links to web folder).
- Unified "Retro Text" pipeline:
    - Strips ANSI codes and PCB Color Codes (@Xnn) at the byte level.
    - Detects encodings: SF7 (Finnish 7-bit), CP437 (DOS), UTF-8, Latin1.
    - Transcodes everything to valid UTF-8 HTML.
    - STRICT Whitespace Handling:
      - Removes trailing whitespace.
      - Removes vertical padding (empty lines at start/end).
      - PRESERVES leading whitespace (indentation) for ASCII art.
- Responsive retro styling:
    - Uses embedded "Perfect DOS" web font (locally hosted in /retrofonts/).
    - Dynamic width (110ch vs 132ch) based on content length.
    - Forces 110ch width for specific curated directories.
    - Advanced breadcrumb navigation for unpacked trees (Archive boundary detection).
"""

import os
import re
import html
import datetime
import urllib.parse
import fnmatch
import math
import stat

# =============================================================================
# CONFIGURATION
# =============================================================================

ROOT_DIR = "/home/ftp"

# Destination for clickable links (HTML versions generated by viewer)
UNPACKED_WEB_ROOT = "/home/ftp/unpacked"

# Source for reading metadata (Raw files)
UNPACKED_SRC_ROOT = "/home/ftp/unpacked.src"

# Files and directories to exclude from the generated index.
# CASE INSENSITIVE matching.
EXCLUDED_PATTERNS = {
    ".*",               # Hidden files
    "_INDEX*",          # PCBoard Index source files
    "INDEX.*", 
    "00_INDEX.TXT", 
    "CORE",             # Linux core dumps
    "*.desc",           # Internal description files
    "mpolilogo*.*",     # Website assets
    "starportlogo*",    # Website assets (Starport variant)
    "index-style", 
    "favicon.ico", 
    "files", 
    "sitemap*.txt", 
    "robots.txt", 
    "pub",
    "DESCRIPT.ION",     # 4DOS descriptions
    "_index*", 
    "index.html",       # The file we are generating
    "index.htm",
    "retrofonts",       # Exclude font directory
    "unpacked.src"      # Exclude raw source of unpacked files from indexing
}

# Mapping file extensions to their BBS-era archiver commands.
# Spaces here are preserved in the HTML via 'white-space: pre'.
ARCHIVER_MAP = {
    '.ZIP': '[ PKUNZIP ]',
    '.ARJ': '[ ARJ x   ]',
    '.LZH': '[ LHARC e ]',
    '.LHA': '[ LHARC e ]',
    '.RAR': '[ UNRAR   ]',
    '.ZOO': '[ ZOO x   ]',
    '.ARC': '[ ARC e   ]',
    '.PAK': '[ PAK e   ]',
    '.EXE': '[ UNPACK  ]'
}

# 1990-01-01 00:00:00 Timestamp.
# Used as a fallback for missing dates or to clamp "future" dates.
FIXED_TIMESTAMP = 631152000.0

# Cutoff Year for Date Sanity Check.
# Files newer than this (e.g., generated logs) get clamped to 1990 styling.
MAX_VALID_YEAR = 2015

# =============================================================================
# RETRO TEXT PROCESSING LOGIC
# =============================================================================

# Regex to identify ANSI escape sequences (colors, cursor moves).
ANSI_BYTES_REGEX = re.compile(rb'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')

# Regex to identify PCBoard Color Codes (e.g., @X0A, @X1F).
# Matches @X followed by two hex digits. Case insensitive.
PCB_BYTES_REGEX = re.compile(rb'@X[0-9A-F]{2}', re.IGNORECASE)

# Standard Word Regex (2+ letters) used for text density analysis.
WORD_REGEX = re.compile(rb'\b[a-zA-Z]{2,}\b')

def is_binary_safe(content):
    """
    Determines if a byte string is likely binary garbage rather than text.
    Checks for high density of control codes (excluding tabs/newlines).
    """
    if not content: return False
    # Check for bytes < 9 (excluding Null sometimes used in text padding) 
    # and 14-26 (Shift Out, DLE, etc).
    bad = sum(1 for b in content if (b < 9 and b != 0) or (14 <= b <= 26))
    return bad > (len(content) * 0.10)

def detect_sf7(raw_bytes):
    """
    Detects Finnish SF7 (ISO 646-FI) encoding.
     SF7 uses 7-bit chars (e.g. '{') to represent Scandis.
     If high-bit chars (>127) exist, it is strictly NOT SF7.
    """
    total_len = len(raw_bytes)
    if total_len == 0: return False

    # SAFETY 1: SF7 is a 7-bit encoding. If we see 8-bit bytes (CP437 blocks, Latin1 chars),
    # it cannot be SF7. This protects CP437 art and Latin1 text.
    if any(b > 127 for b in raw_bytes):
        return False
    
    # SAFETY 2: Word Density.
    # We relax this for short descriptions (under 100 bytes) because 
    # "Very good!" has high density, but "[]" has low.
    if total_len > 100:
        total_words_list = WORD_REGEX.findall(raw_bytes)
        total_words = len(total_words_list)
        if (total_words / total_len) < 0.05: return False

    # Heuristic: Ratio of '{' (ä) to normal letters.
    sf7_marker_count = raw_bytes.count(b'{')
    letter_count = sum(1 for b in raw_bytes if (65 <= b <= 90) or (97 <= b <= 122))
    
    if letter_count == 0: return False

    # Threshold: '{' must represent > 2.5% of all letters.
    return (sf7_marker_count / letter_count) > 0.025

def detect_encoding(raw_bytes, source_filename=""):
    """
    Determines the likely encoding of the byte stream.
    Prioritizes: SF7 -> ASCII -> UTF-8 -> CP437 -> Latin1.
    """
    # 0. Special Case: 00_INDEX.TXT is known to be CP437 (or ASCII).
    # Never SF7.
    is_00_index = "00_INDEX" in source_filename.upper()

    # 1. Check for SF7
    # Skip if source is 00_INDEX or if detection fails
    if not is_00_index and detect_sf7(raw_bytes): 
        return 'SF7'

    # 2. Check for Pure ASCII (7-bit clean)
    if not any(b > 127 for b in raw_bytes): return 'ASCII'

    # 3. Check for valid UTF-8
    try:
        raw_bytes.decode('utf-8')
        return 'UTF-8'
    except UnicodeDecodeError: pass

    # 4. Fallback: Distinguish CP437 (DOS Art) from Latin1 (Windows/Unix)
    cp437_score = 0
    latin1_score = 0
    
    # If it's 00_INDEX.TXT, we bias heavily towards CP437
    if is_00_index:
        cp437_score += 100

    for b in raw_bytes:
        if 0x80 <= b <= 0x9F: cp437_score += 5   # CP437 block drawing chars
        elif 0xB0 <= b <= 0xDF: cp437_score += 1 # CP437 shading blocks
        elif b in [0xC4, 0xD6, 0xC5, 0xE4, 0xF6, 0xE5]: latin1_score += 3 # Scandi Latin1
        elif b == 0xA2: cp437_score += 2 # Cent symbol
        
    return 'CP437' if cp437_score >= latin1_score else 'LATIN1'

def transcode_text(raw_bytes, encoding):
    """
    Converts the raw bytes from the detected source encoding into a clean UTF-8 string.
    """
    enc = encoding.lower()
    if enc == 'sf7':
        text = raw_bytes.decode('ascii', errors='replace')
        # Map SF7 chars to Unicode
        return text.translate(str.maketrans({
            '[': 'Ä', '\\': 'Ö', ']': 'Å', 
            '{': 'ä', '|': 'ö', '}': 'å', 
            '~': 'ü'
        }))
    elif enc == 'cp437': return raw_bytes.decode('cp437', errors='replace')
    elif enc == 'utf-8': return raw_bytes.decode('utf-8', errors='replace')
    elif enc == 'ascii': return raw_bytes.decode('ascii', errors='replace')
    else: return raw_bytes.decode('latin1', errors='replace')

def process_retro_text(raw_bytes, source_filename=""):
    """
    Master function to clean and convert description text.
    1. Strips ANSI & PCB codes (bytes) and DOS EOF.
    2. Detects encoding & Transcodes (UTF-8).
    3. Trims whitespace (safe for ASCII art).
    4. Escapes HTML.
    """
    if not raw_bytes: return ""
    
    # Strip ANSI escape sequences
    clean_bytes = ANSI_BYTES_REGEX.sub(b'', raw_bytes)
    
    # Strip PCBoard Color Codes (@Xnn)
    clean_bytes = PCB_BYTES_REGEX.sub(b'', clean_bytes)
    
    # Strip DOS EOF Char (0x1A / 26) - Common in old text files
    clean_bytes = clean_bytes.replace(b'\x1a', b'')
    
    # Safety check
    if is_binary_safe(clean_bytes): return "[Binary Data]"

    encoding = detect_encoding(clean_bytes, source_filename=source_filename)
    utf8_text = transcode_text(clean_bytes, encoding)
    
    # Whitespace Cleanup:
    # 1. Right-Strip EVERY line (Remove trailing whitespace)
    #    We do NOT lstrip() here to preserve indentation/centering.
    lines = [line.rstrip() for line in utf8_text.splitlines()]
    
    # 2. Vertical Trim (Remove empty lines from Start)
    while lines and not lines[0]:
        lines.pop(0)
        
    # 3. Vertical Trim (Remove empty lines from End)
    while lines and not lines[-1]:
        lines.pop()
        
    # 4. Rejoin with clean newlines (strip vertical padding of result)
    final_text = "\n".join(lines)

    return html.escape(final_text)

# =============================================================================
# HTML TEMPLATES
# =============================================================================

HTML_HEADER = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="utf-8">
    <meta name="generator" content="Metropoli-PCB-Indexer">
    <title>{header_title} - {raw_path}</title>
    <style>
        /* FONT DEFINITION: Forces browser to use local file aliased as MetropoliRetro */
        @font-face {{
            font-family: 'MetropoliRetro';
            src: url('/retrofonts/Perfect%20DOS%20VGA%20437%20Win.woff') format('woff'),
                 url('/retrofonts/Perfect%20DOS%20VGA%20437%20Win.ttf') format('truetype');
            font-weight: normal;
            font-style: normal;
        }}

        :root {{
            --bg-color: #000000;
            --crt-bg: #080808;          
            --dos-white: #DDDDDD;       
            --dos-grey: #999999;        
            --dos-yellow: #DDDD44;      
            --dos-cyan: #44DDDD;        
            --dos-green: #44DD44;       
            --dos-red-bg: #AA0000;      
            --action-color: #00AAAA;    
            --hover-white: #FFFFFF;
        }}

        body {{
            background-color: var(--bg-color);
            color: var(--dos-white);
            font-family: 'MetropoliRetro', monospace;
            font-size: 19px;
            line-height: 21px; 
            margin: 0;
            padding: 20px;
            display: flex;
            justify-content: center;
            min-width: fit-content; 
        }}
        
        /* Explicitly force PRE tags to inherit the Retro Font */
        pre, code, kbd, samp {{
            font-family: 'MetropoliRetro', monospace;
            font-size: inherit;
        }}

        /* The main CRT Container. Width is injected dynamically (110ch or 132ch) */
        .crt-frame {{
            background-color: var(--crt-bg);
            display: block;
            {frame_style} 
            padding: 20px;
            border: 1px solid #333;
            box-sizing: border-box;
        }}

        a {{ text-decoration: none; color: inherit; cursor: pointer; }}

        .bbs-header {{
            display: flex;
            align-items: center;
            gap: 20px;
            margin-bottom: 20px;
            border-bottom: 2px solid var(--dos-grey);
            padding-bottom: 10px;
            color: var(--dos-white);
        }}
        
        .header-logo {{ height: 160px; width: auto; }}

        .breadcrumb a:hover {{ color: var(--dos-yellow); background-color: #0000AA; }}

        .ctx-link {{
            display: block;
            margin-top: 5px;
            color: var(--dos-grey);
            font-family: 'MetropoliRetro', monospace;
            font-size: 19px;
        }}
        .ctx-link a {{ color: var(--action-color); text-decoration: none; }}
        .ctx-link a:hover {{ color: var(--hover-white); background-color: var(--action-color); }}

        .file-table {{ width: 100%; border-collapse: collapse; table-layout: auto; }}

        th {{
            text-align: left;
            color: var(--dos-yellow);
            border-bottom: 1px solid var(--dos-grey);
            padding: 5px 10px; 
            cursor: pointer;
            user-select: none;
            white-space: nowrap;
            /* Force the Retro font and remove implicit bolding */
            font-family: 'MetropoliRetro', monospace;
            font-weight: normal; 
        }}
        
        td {{ padding: 0 10px; vertical-align: top; }}
        
        /* COLUMNS */
        .col-name {{ width: 1px; white-space: nowrap; color: var(--dos-white); }}
        
        /* Action column uses 'pre' to preserve spaces */
        .col-action {{ 
            width: 13ch; 
            white-space: pre; 
            text-align: left; 
            color: var(--action-color); 
        }}
        
        .col-size {{ width: 1px; white-space: nowrap; text-align: right; color: var(--dos-green); }}
        .col-date {{ width: 1px; white-space: nowrap; text-align: left; color: var(--dos-cyan); }}
        .col-desc {{ width: auto; white-space: pre-wrap; color: var(--dos-grey); }}

        /* HOVER EFFECTS */
        tbody tr:hover {{ background: linear-gradient(to bottom, var(--dos-red-bg) 21px, transparent 21px); }}
        tbody tr:first-child td {{ padding-top: 12px; }}
        tbody tr:first-child:hover {{
            background: linear-gradient(to bottom, transparent 12px, var(--dos-red-bg) 12px, var(--dos-red-bg) 33px, transparent 33px);
        }}
        tbody tr:hover .col-name a, tbody tr:hover .col-size, tbody tr:hover .col-date, tbody tr:hover .col-action a {{ color: var(--hover-white) !important; }}
        tbody tr:hover .col-desc::first-line {{ color: var(--hover-white) !important; }}

        .unpack-btn {{ color: var(--action-color); }}
        .dir-label {{ color: var(--action-color); }}
        
        /* Sort Indicators */
        .sort-asc::after {{ content: " \u25B2"; }}
        .sort-desc::after {{ content: " \u25BC"; }}
    </style>
</head>
<body>

<div class="crt-frame">
    <div class="bbs-header">
        <img src="{header_logo}" class="header-logo" alt="{header_alt}">
        <div>
            <pre>{header_title}</pre>
            <pre>{breadcrumb_html}</pre>
            {context_link_html}
        </div>
    </div>

    <table class="file-table" id="fileTable">
        <thead>
            <tr>
                <th class="col-name" onclick="sortTable(0, 'text')">FILENAME</th>
                <th class="col-action"></th>
                <th class="col-size" onclick="sortTable(2, 'size')">SIZE</th>
                <th class="col-date" onclick="sortTable(3, 'date')">DATE</th>
                <th class="col-desc">DESCRIPTION</th>
            </tr>
        </thead>
        <tbody id="fileTableBody">
"""

HTML_FOOTER = """        </tbody>
    </table>
    <pre style="margin-top: 15px; border-top: 1px solid #444; padding-top: 10px; color: #777;"> TOTAL: {file_count}</pre>
</div>

<script>
var sortOrder = {{ 0: "asc", 2: "asc", 3: "asc" }};

function sortTable(n, type) {{
  var table = document.getElementById("fileTable");
  var tbody = document.getElementById("fileTableBody");
  var rows = Array.from(tbody.rows);
  
  var dir = sortOrder[n] === "asc" ? "desc" : "asc";
  sortOrder[n] = dir;

  var headers = table.getElementsByTagName("th");
  for (var i = 0; i < headers.length; i++) {{
    headers[i].className = headers[i].className.replace(" sort-asc", "").replace(" sort-desc", "");
  }}
  headers[n].className += (dir === "asc" ? " sort-asc" : " sort-desc");

  rows.sort(function(a, b) {{
    var xContent, yContent;
    
    if (type === 'size') {{
      xContent = parseInt(a.getAttribute('data-bytes')) || 0;
      yContent = parseInt(b.getAttribute('data-bytes')) || 0;
    }} else if (type === 'date') {{
      xContent = parseInt(a.getAttribute('data-timestamp')) || 0;
      yContent = parseInt(b.getAttribute('data-timestamp')) || 0;
    }} else {{
      xContent = a.getElementsByTagName("TD")[n].innerText.trim().toLowerCase();
      yContent = b.getElementsByTagName("TD")[n].innerText.trim().toLowerCase();
    }}

    if (xContent > yContent) return dir === "asc" ? 1 : -1;
    if (xContent < yContent) return dir === "asc" ? -1 : 1;
    return 0;
  }});

  var fragment = document.createDocumentFragment();
  rows.forEach(function(row) {{
    fragment.appendChild(row);
  }});
  tbody.appendChild(fragment);
}}
</script>
</body>
</html>
"""

# =============================================================================
# HELPER FUNCTIONS
# =============================================================================

def safe_str(s):
    """
    Encodes string to UTF-8 properly for HTML injection.
    Handles 'surrogateescape' artifacts from os.listdir (e.g., lone surrogates like \udcaf)
    by reverting them to bytes and decoding with replacement characters.
    """
    if s is None: return ""
    try:
        # Step 1: Revert 'surrogateescape' decoding (common in os.listdir on Linux)
        # to get original raw bytes.
        raw_bytes = s.encode('utf-8', 'surrogateescape')
    except UnicodeError:
        # If surrogates are somehow complex or mixed, force replacement during encode
        raw_bytes = s.encode('utf-8', 'replace')
        
    # Step 2: Decode as UTF-8 with replacement for errors (invalid UTF-8 bytes)
    # This guarantees the result is a valid UTF-8 string without surrogates.
    return raw_bytes.decode('utf-8', 'replace')

def force_clean(s):
    """
    Last-resort sanitizer used before file writing.
    Ensures absolute UTF-8 validity by stripping any lingering surrogates.
    """
    if not s: return ""
    return s.encode('utf-8', 'replace').decode('utf-8')

def safe_href(s):
    """URL-encodes a path component."""
    if s is None: return ""
    return urllib.parse.quote(os.fsencode(s))

def is_excluded(filename):
    """Checks if a filename matches any of the exclusion patterns."""
    fname = filename.upper()
    for pattern in EXCLUDED_PATTERNS:
        if fnmatch.fnmatch(fname, pattern.upper()):
            return True
    return False

def is_valid_file_object(path):
    """Checks if path is a regular file or directory (and exists)."""
    try:
        st = os.lstat(path)
        if stat.S_ISLNK(st.st_mode): return False
        if stat.S_ISREG(st.st_mode) or stat.S_ISDIR(st.st_mode): return True
        return False
    except OSError:
        return False

def is_effectively_empty(path):
    """
    Recursively checks if a directory is empty or contains only excluded files.
    """
    for root, dirs, files in os.walk(path):
        dirs[:] = [d for d in dirs if is_valid_file_object(os.path.join(root, d))]
        for f in files:
            fp = os.path.join(root, f)
            if not is_valid_file_object(fp): continue
            if not is_excluded(f): return False
    return True

def read_file_bytes_safe(path):
    """Attempts to read a file as raw bytes. Returns None on failure."""
    try:
        with open(path, 'rb') as f:
            return f.read()
    except Exception:
        return None

def format_size(size_bytes):
    """Formats bytes into human readable string strictly with 2 decimals."""
    if size_bytes == 0: return "0.00 B"
    size_name = ("B", "KB", "MB", "GB", "TB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = size_bytes / p
    return "%.2f %s" % (s, size_name[i])

def get_folder_size(start_path):
    """Calculates total size of a directory (excluding excluded files)."""
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            if not is_excluded(f):
                fp = os.path.join(dirpath, f)
                if is_valid_file_object(fp):
                    try:
                        total_size += os.path.getsize(fp)
                    except OSError:
                        pass
    return total_size

def backup_existing_indices(full_path):
    """
    Backs up existing index.html files if they weren't generated by this tool.
    """
    targets = ["index.html", "index.htm", "INDEX.HTML", "INDEX.HTM"]
    our_markers = [b'content="Metropoli-PCB-Indexer"']
    
    for target in targets:
        target_path = os.path.join(full_path, target)
        if is_valid_file_object(target_path):
            is_ours = False
            try:
                with open(target_path, 'rb') as f:
                    head = f.read(4096)
                    for marker in our_markers:
                        if marker in head:
                            is_ours = True
                            break
            except:
                pass
            
            if not is_ours:
                new_name = "_" + target
                new_path = os.path.join(full_path, new_name)
                try:
                    os.replace(target_path, new_path)
                except OSError:
                    pass

def get_better_desc(rel_dir_path, filename):
    """
    Looks for a FILE_ID.DIZ inside the corresponding unpacked directory.
    Uses process_retro_text to clean it up.
    
    CRITICAL: Reads metadata from UNPACKED_SRC_ROOT (raw text).
    """
    lower_rel = rel_dir_path.lower()
    lower_filename = filename.lower()
    
    # Check src path for reading
    src_path = os.path.join(UNPACKED_SRC_ROOT, lower_rel, lower_filename)
    
    # We check if the folder exists in source to try and read the DIZ
    if not os.path.isdir(src_path) or os.path.islink(src_path): 
        return None
        
    candidates = ["file_id.old", "FILE_ID.OLD", "file_id.diz", "FILE_ID.DIZ"]
    
    for candidate in candidates:
        candidate_path = os.path.join(src_path, candidate)
        if is_valid_file_object(candidate_path):
            raw_content = read_file_bytes_safe(candidate_path)
            if raw_content:
                # DIZ files are usually CP437 or SF7, treated generally
                desc_html = process_retro_text(raw_content, source_filename=candidate)
                # DO NOT STRIP() HERE! strict cleanup is already done in process_retro_text
                return desc_html
    return None

def generate_breadcrumb(rel_path):
    """
    Generates the navigation breadcrumb HTML in 'C:\DIR' style.
    Handles 'unpacked' directories specially to redirect intermediate
    paths back to the compressed file index and HIDE the 'UNPACKED' level.
    """
    if not rel_path or rel_path == ".":
        return '<span class="breadcrumb">C:</span>'
    
    parts = rel_path.split(os.sep)
    html_parts = []
    
    # Root Link
    up_levels = len(parts)
    root_link = "../" * up_levels
    html_parts.append(f'<a href="{root_link}index.html">C:</a>')
    
    # Detect if we are inside the 'unpacked' tree
    is_unpacked = (parts[0].lower() == 'unpacked')
    
    # Flag to track if we have crossed the boundary from "Folder Structure" to "Inside Zip"
    inside_archive_boundary = False
    
    for i, part in enumerate(parts):
        # Handle "UNPACKED" word - Skip it entirely from display
        if is_unpacked and i == 0:
            continue
            
        levels_up = len(parts) - 1 - i
        
        # Link construction logic for unpacked tree
        if is_unpacked:
            # Reconstruct the path from ROOT to the current part (skipping 'unpacked')
            source_segments = parts[1:i+1]
            
            # Resolve this path in ROOT_DIR case-insensitively to determine
            # if it points to a real Source Directory or Source File (Archive).
            # This handles case mismatches (e.g. 'games' vs 'GAMES') which
            # prevents 'os.path.isdir' from working on Linux.
            
            check_path = ROOT_DIR
            resolved_segments = []
            valid_source = True
            
            for seg in source_segments:
                found = False
                if os.path.isdir(check_path):
                    try:
                        for entry in os.listdir(check_path):
                            if entry.lower() == seg.lower():
                                check_path = os.path.join(check_path, entry)
                                resolved_segments.append(entry)
                                found = True
                                break
                    except OSError: pass
                
                if not found:
                    valid_source = False
                    break
            
            is_source_file = False
            is_source_dir = False
            
            if valid_source:
                if os.path.isfile(check_path): is_source_file = True
                elif os.path.isdir(check_path): is_source_dir = True
            
            # Determine target URL based on whether we are looking at a Directory or a File (Archive)
            if not inside_archive_boundary:
                if is_source_file:
                    # It IS the archive file (e.g. doom.zip). 
                    # This is the boundary. Link to the Unpacked Index (self).
                    inside_archive_boundary = True
                    # Use source_segments (current unpacked path context) for unpacked links
                    target_url = ("../" * len(parts)) + "unpacked/" + "/".join(safe_href(p) for p in source_segments) + "/index.html"
                elif is_source_dir:
                    # It is a directory in the source tree (e.g. GAMES).
                    # Link to the Compressed Index (Source).
                    # Use resolved_segments to ensure we link to the correct Case on disk (e.g. GAMES not games)
                    target_url = ("../" * len(parts)) + "/".join(safe_href(p) for p in resolved_segments) + "/index.html"
                else:
                    # Fallback (Inside archive or path mismatch)
                    target_url = ("../" * len(parts)) + "unpacked/" + "/".join(safe_href(p) for p in source_segments) + "/index.html"
            else:
                # We are ALREADY inside the archive (e.g. DATA). 
                # Link to the Unpacked Index (subdir).
                target_url = ("../" * len(parts)) + "unpacked/" + "/".join(safe_href(p) for p in source_segments) + "/index.html"
                
            html_parts.append(f'<a href="{target_url}">{safe_str(part.upper())}</a>')
            
        else:
            # Standard logic for normal tree
            link_target = "../" * levels_up + "index.html"
            if levels_up == 0: link_target = "index.html"
            html_parts.append(f'<a href="{link_target}">{safe_str(part.upper())}</a>')
    
    # Join with backslash, no spaces
    return '<span class="breadcrumb">' + '\\'.join(html_parts) + '</span>'

def parse_date(date_str):
    """Parses MM-DD-YY date string into standardized YYYY-MM-DD."""
    try:
        m, d, y = date_str.split('-')
        y_int = int(y)
        full_year = 1900 + y_int if y_int > 50 else 2000 + y_int
        dt = datetime.datetime(full_year, int(m), int(d))
        return dt.strftime("%Y-%m-%d"), int(dt.timestamp())
    except:
        return date_str, 0

def parse_htaccess(full_path):
    """
    Parses .htaccess for AddDescription fields. 
    """
    htaccess_path = os.path.join(full_path, ".htaccess")
    desc_map = {}
    if is_valid_file_object(htaccess_path):
        content_bytes = read_file_bytes_safe(htaccess_path)
        if content_bytes:
            # .htaccess is usually simple text
            try: content = content_bytes.decode('utf-8')
            except UnicodeDecodeError: content = content_bytes.decode('latin-1', errors='replace')
            
            lines = content.splitlines()
            regex = re.compile(r'^\s*AddDescription\s+"([^"]+)"\s+"(?:\*/)?([^"]+)"', re.IGNORECASE)
            for line in lines:
                m = regex.search(line)
                if m: desc_map[m.group(2).upper()] = m.group(1)
    return desc_map

def parse_pcboard_index(index_path):
    """
    Parses a legacy PCBoard _INDEX_ file.
    """
    entries = []
    current_entry = None
    if not is_valid_file_object(index_path): return []
    
    content_bytes = read_file_bytes_safe(index_path)
    if not content_bytes: return []

    # Get filename for detection logic (00_INDEX check)
    index_filename = os.path.basename(index_path)

    lines = content_bytes.splitlines()
    line_regex = re.compile(rb'^([A-Z0-9_\-\.]{1,12})\s+(\d+)\s+(\d{2}-\d{2}-\d{2})\s+(.*)')

    for idx, line in enumerate(lines):
        line = line.rstrip()
        if not line: continue
        
        match = line_regex.match(line)
        if match:
            if current_entry: entries.append(current_entry)
            
            f_name = match.group(1).decode('ascii', errors='ignore')
            f_size = match.group(2).decode('ascii', errors='ignore')
            f_date = match.group(3).decode('ascii', errors='ignore')
            raw_desc = match.group(4)
            
            current_entry = {
                'filename': f_name,
                'size': f_size,
                'raw_date': f_date,
                'raw_desc_lines': [raw_desc], 
                'original_order': idx
            }
        elif current_entry:
            # Handle Continuation Lines
            # We must detect if it's a pipe-separated continuation (standard)
            # or just a wrapped line.
            
            # 1. Check if the line *visually* starts with a pipe
            if line.lstrip().startswith(b'|'):
                # Locate the pipe char
                pidx = line.find(b'|')
                # Content is everything after the pipe.
                # We RSTRIP to remove trailing space, but we do NOT LSTRIP the content
                # to preserve any intended indentation (ASCII art).
                content = line[pidx+1:].rstrip()
                current_entry['raw_desc_lines'].append(content)
            else:
                # Fallback: No pipe found. We have to assume it's just text.
                # In this case we strip to be safe, as indentation without a delimiter is unreliable.
                current_entry['raw_desc_lines'].append(line.strip())
            
    if current_entry: entries.append(current_entry)
    
    # Process descriptions
    for e in entries:
        full_desc_bytes = b'\n'.join(e['raw_desc_lines'])
        # Pass the index filename so process_retro_text knows if it's 00_INDEX.TXT
        e['desc'] = process_retro_text(full_desc_bytes, source_filename=index_filename)
        
    return entries

def get_unpack_url(rel_dir_path, filename):
    """
    Checks if an archive has a corresponding unpacked folder.
    Links to the WEB root (HTML version).
    """
    lower_rel = rel_dir_path.lower()
    lower_filename = filename.lower()
    
    # Check Web path for linking
    web_disk_path = os.path.join(UNPACKED_WEB_ROOT, lower_rel, lower_filename)
    
    # We check if the folder exists (even if we ignore it for indexing)
    if os.path.isdir(web_disk_path) and not os.path.islink(web_disk_path):
        _, ext = os.path.splitext(filename.upper())
        label = ARCHIVER_MAP.get(ext, '[ UNPACK ]')
        url_path = f"/unpacked/{safe_href(lower_rel)}/{safe_href(lower_filename)}/"
        return url_path, label
    return None, None

def get_context_link_html(full_path):
    """
    Links from unpacked view back to original file.
    Adjusted to handle checking within WEB root.
    """
    abs_unpacked = os.path.abspath(UNPACKED_WEB_ROOT)
    abs_current = os.path.abspath(full_path)
    
    if abs_current.startswith(abs_unpacked) and abs_current != abs_unpacked:
        rel_from_unpacked = os.path.relpath(abs_current, abs_unpacked)
        path_parts = rel_from_unpacked.split(os.sep)
        
        current_phys_path = ROOT_DIR
        matched_url_parts = []
        
        for part in path_parts:
            found_name = None
            try:
                if os.path.isdir(current_phys_path):
                    for item in os.listdir(current_phys_path):
                        if item.lower() == part.lower():
                            found_name = item
                            break
            except OSError: break
            
            if found_name:
                next_phys_path = os.path.join(current_phys_path, found_name)
                
                if os.path.isfile(next_phys_path):
                    if not matched_url_parts:
                        href = "/"
                    else:
                        href = "/" + "/".join(safe_href(p) for p in matched_url_parts) + "/"
                    return f'<div class="ctx-link"><a href="{href}">View in context or download</a></div>'
                
                current_phys_path = next_phys_path
                matched_url_parts.append(found_name)
            else:
                break
                
    return ""

def update_folder_timestamps(full_path, index_file_path):
    """
    Updates the folder timestamp to match the newest file inside it.
    Clamps dates > MAX_VALID_YEAR.
    
    CRITICAL: This ignores 'index.html', 'index.htm', and '_INDEX_' 
    files to ensure the directory timestamp reflects content, not the 
    generated index itself.
    """
    try:
        # 1. Reset timestamps of excluded items to fixed date
        # EXCEPTION: We do NOT reset index.html here, allowing it to stay "Current"
        try:
            immediate_items = os.listdir(full_path)
            for item in immediate_items:
                # Skip touching the index files we just generated so they stay fresh
                if item.lower() in ["index.html", "index.htm", "_index_"]:
                    continue
                
                if is_excluded(item):
                    item_path = os.path.join(full_path, item)
                    if is_valid_file_object(item_path):
                        try:
                            os.utime(item_path, (FIXED_TIMESTAMP, FIXED_TIMESTAMP))
                        except OSError: pass
        except OSError: pass

        # 2. Find newest content file
        newest_ts = 0.0
        
        for root, dirs, files in os.walk(full_path):
            dirs[:] = [d for d in dirs if not is_excluded(d) and is_valid_file_object(os.path.join(root, d))]
            
            for f in files:
                # Explicitly skip index files from calculation
                if f.lower() in ["index.html", "index.htm", "_index_"]:
                    continue

                if is_excluded(f): continue
                
                file_path = os.path.join(root, f)
                if file_path == index_file_path: continue
                
                if is_valid_file_object(file_path):
                    try:
                        stat_res = os.stat(file_path)
                        # Sanity Check
                        file_year = datetime.datetime.fromtimestamp(stat_res.st_mtime).year
                        if file_year > MAX_VALID_YEAR:
                             # If file is too new (e.g. logs), ignore it for sorting
                             # or clamp it. Here we clamp it to keep folder retro.
                             current_ts = FIXED_TIMESTAMP
                        else:
                             current_ts = stat_res.st_mtime

                        if current_ts > newest_ts:
                            newest_ts = current_ts
                    except OSError: pass
            
            for d in dirs:
                dir_path = os.path.join(root, d)
                if is_valid_file_object(dir_path):
                    try:
                        stat_res = os.stat(dir_path)
                        if stat_res.st_mtime > newest_ts:
                            newest_ts = stat_res.st_mtime
                    except OSError: pass
                    
        if newest_ts > 0:
            os.utime(full_path, (newest_ts, newest_ts))
        else:
            os.utime(full_path, (FIXED_TIMESTAMP, FIXED_TIMESTAMP))
            
    except OSError as e:
        print(f"Timestamp update failed for {full_path}: {e}")

# =============================================================================
# MAIN LOGIC
# =============================================================================

def generate_folder_index(full_path):
    print(f"Processing: {safe_str(full_path)}")
    backup_existing_indices(full_path)
    
    # Locate index source
    index_file = os.path.join(full_path, "_INDEX_")
    if not is_valid_file_object(index_file):
        alt_index = os.path.join(full_path, "00_index.txt")
        if is_valid_file_object(alt_index): index_file = alt_index
    
    # Parse sources
    parsed_entries = parse_pcboard_index(index_file)
    indexed_filenames = {e['filename'].upper() for e in parsed_entries}
    htaccess_desc = parse_htaccess(full_path)
    
    try: actual_items = os.listdir(full_path)
    except OSError: return

    disk_map_files = {}
    dir_list = []       
    file_list = []      

    context_link_html = get_context_link_html(full_path)

    # 1. Process Directories & Files on Disk
    for item in actual_items:
        full_item_path = os.path.join(full_path, item)
        
        # HIDE UNPACKED FOLDERS FROM TOP LEVEL
        if full_path == ROOT_DIR and item.lower() in ['unpacked', 'unpacked.src']:
            continue

        upper_item = item.upper()
        if not is_valid_file_object(full_item_path): continue

        if os.path.isdir(full_item_path):
            if not is_excluded(item):
                if is_effectively_empty(full_item_path): continue

                stat_res = os.stat(full_item_path)
                dt = datetime.datetime.fromtimestamp(stat_res.st_mtime)
                
                if dt.year > MAX_VALID_YEAR or int(dt.timestamp()) == int(FIXED_TIMESTAMP):
                    date_str = ""
                else:
                    date_str = dt.strftime("%Y-%m-%d")
                
                desc = htaccess_desc.get(upper_item, "")
                dir_size = get_folder_size(full_item_path)
                size_str = format_size(dir_size)
                
                dir_list.append({
                    'filename': item, 'real_name': item, 'size': size_str, 
                    'raw_date': None, 'formatted_date': date_str,
                    'timestamp': int(dt.timestamp()), 'desc': desc,
                    'original_order': -1, 'is_dir': True
                })
        else:
            disk_map_files[upper_item] = item
            # Only add if not already in PCBoard index (merged later)
            if upper_item not in indexed_filenames and not is_excluded(item):
                
                # Determine correct path for STATS (Size/Date).
                # If we are in the 'unpacked' web root, we should check if a source file exists
                # in 'unpacked.src' and use that instead.
                
                target_stat_path = full_item_path
                
                if full_path.startswith(UNPACKED_WEB_ROOT):
                    # Calculate relative path from the web root, e.g. "games/doom"
                    rel_from_web = os.path.relpath(full_path, UNPACKED_WEB_ROOT)
                    # Construct potential source path: /home/ftp/unpacked.src/games/doom/file.txt
                    possible_src = os.path.join(UNPACKED_SRC_ROOT, rel_from_web, item)
                    
                    if is_valid_file_object(possible_src):
                        target_stat_path = possible_src

                stat_res = os.stat(target_stat_path)
                dt = datetime.datetime.fromtimestamp(stat_res.st_mtime)
                
                if dt.year > MAX_VALID_YEAR or int(dt.timestamp()) == int(FIXED_TIMESTAMP):
                    date_str = ""
                else:
                    date_str = dt.strftime("%Y-%m-%d")
                
                raw_desc = htaccess_desc.get(upper_item, "")
                desc_html = html.escape(raw_desc)
                
                file_list.append({
                    'filename': item, 'real_name': item, 'size': str(stat_res.st_size),
                    'raw_date': None, 'formatted_date': date_str,
                    'timestamp': int(dt.timestamp()), 'desc': desc_html,
                    'original_order': 999999, 'is_dir': False
                })

    # 2. Merge Lists
    final_list = []
    dir_list.sort(key=lambda x: x['filename'].upper())
    final_list.extend(dir_list)

    for entry in parsed_entries:
        fname_upper = entry['filename'].upper()
        if fname_upper in disk_map_files:
            entry['real_name'] = disk_map_files[fname_upper]
            entry['is_dir'] = False
            final_list.append(entry)

    file_list.sort(key=lambda x: x['filename'].upper())
    final_list.extend(file_list)

    rel_path = os.path.relpath(full_path, ROOT_DIR)
    if rel_path == ".": rel_path = ""

    html_rows = []
    
    # 3. Width Logic (110ch vs 132ch)
    # Default is 110ch (Fits legacy forum files)
    
    # CHECK FOR FORCED 110ch MODE based on curated directories
    forced_mode_keywords = {'software', 'hardware', 'skene', 'tlr'}
    force_110 = False
    
    # rel_path is already calculated and sanitized (empty string for root)
    if rel_path:
        first_part = rel_path.split(os.sep)[0].lower()
        if first_part in forced_mode_keywords:
            force_110 = True

    needs_wide = False
    
    # Only run dynamic check if NOT forced
    if not force_110:
        for item in final_list:
            real_name = item.get('real_name', item['filename'])
            
            # Check 1: Long Filename trigger (increased to 35)
            if len(real_name) > 35:
                needs_wide = True
                break
                
            # Check 2: Long Description trigger (increased to 50)
            if item['desc']:
                 # Unescape so entities like &quot; count as 1 char
                 # Also strip whitespace from check since we will strip it in display
                 clean_d = html.unescape(re.sub(r'<[^>]+>', '', item['desc']))
                 longest_line = max(len(line.rstrip()) for line in clean_d.splitlines()) if clean_d else 0
                 if longest_line > 50:
                     needs_wide = True
                     break
    
    frame_style = "width: 132ch;" if needs_wide else "width: 110ch;"

    # 4. Generate HTML Rows
    for item in final_list:
        real_name = item.get('real_name', item['filename'])
        
        if 'formatted_date' in item:
            display_date = item['formatted_date']
            timestamp = item['timestamp']
        else:
            display_date, timestamp = parse_date(item['raw_date'])
        
        if timestamp == int(FIXED_TIMESTAMP):
            display_date = ""

        if item.get('is_dir', False):
             desc_html = html.escape(item['desc'])
        else:
            better_desc = get_better_desc(rel_path, real_name)
            if better_desc:
                desc_html = better_desc
            else:
                desc_html = item['desc']

        display_filename = safe_str(item['filename'])
        href_link = safe_href(real_name) + ("/" if item.get('is_dir', False) else "")

        if item.get('is_dir', False):
             unpack_html = '<span class="dir-label">[ DIR ]</span>'
        else:
            unpack_url, unpack_label = get_unpack_url(rel_path, real_name)
            if unpack_url:
                unpack_html = f'<a href="{unpack_url}" class="unpack-btn">{unpack_label}</a>'
            else:
                unpack_html = ""
        
        data_bytes = item['size']
        if item.get('is_dir', False): sort_bytes = 0 
        else: sort_bytes = data_bytes

        row = f"""
        <tr data-bytes="{sort_bytes}" data-timestamp="{timestamp}">
            <td class="col-name"><a href="{href_link}">{display_filename}</a></td>
            <td class="col-action">{unpack_html}</td>
            <td class="col-size">{item['size']}</td>
            <td class="col-date">{display_date}</td>
            <td class="col-desc">{desc_html}</td>
        </tr>"""
        html_rows.append(row)

    # 5. Write Output
    out_path = os.path.join(full_path, "index.html")
    
    # Generate C:\ style path for window title
    raw_path_display = "C:\\" if (not rel_path or rel_path == ".") else "C:\\" + safe_str(rel_path.replace("/", "\\").upper())
    
    # BRANDING LOGIC
    # Check if we are in the 'skene' tree (either standard or unpacked)
    # rel_path is relative to ROOT_DIR (/home/ftp)
    # e.g. 'skene', 'skene/demo', 'unpacked/skene', 'unpacked/skene/demo'
    
    check_path = rel_path.lower().replace('\\', '/')
    is_starport = False
    
    if check_path == 'skene' or check_path.startswith('skene/'):
        is_starport = True
    elif check_path == 'unpacked/skene' or check_path.startswith('unpacked/skene/'):
        is_starport = True
        
    if is_starport:
        header_title = "Starport BBS"
        header_logo = "/starportlogo-500x242.png"
        header_alt = "Starport BBS"
    else:
        header_title = "Metropoli BBS"
        header_logo = "/mpolilogo2-transparent-500x500.png"
        header_alt = "Metropoli BBS"

    with open(out_path, 'w', encoding='utf-8') as f:
        bc_html = generate_breadcrumb(rel_path)
        # Prepare the header content
        header_content = HTML_HEADER.format(
            raw_path=raw_path_display, 
            breadcrumb_html=bc_html, 
            context_link_html=context_link_html,
            frame_style=frame_style,
            header_title=header_title,
            header_logo=header_logo,
            header_alt=header_alt
        )
        
        # FINAL SANITIZATION: force_clean ensures no surrogates exist before writing
        f.write(force_clean(header_content))
        f.write("".join(html_rows))
        f.write(HTML_FOOTER.format(file_count=len(final_list)))
    
    update_folder_timestamps(full_path, out_path)

if __name__ == "__main__":
    for subdir, dirs, files in os.walk(ROOT_DIR, topdown=False):
        # Exclude unpacked.src completely from traversal
        if "unpacked.src" in subdir:
            continue
        
        # Modify dirs in-place to prevent os.walk from entering excluded dirs next
        dirs[:] = [d for d in dirs if not is_excluded(d)]
        
        generate_folder_index(subdir)