"""This is a high-level width() supporting terminal output."""

from typing import Literal

# local
from ._wcwidth import wcwidth
from .bisearch import bisearch
from ._wcswidth import wcswidth
from ._constants import (_EMOJI_ZWJ_SET,
                         _ISC_VIRAMA_SET,
                         _CATEGORY_MC_TABLE,
                         _FITZPATRICK_RANGE,
                         _REGIONAL_INDICATOR_SET)
from .table_vs16 import VS16_NARROW_TO_WIDE
from .text_sizing import TextSizing, TextSizingParams
from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL
from .table_grapheme import ISC_CONSONANT
from .escape_sequences import (_SEQUENCE_CLASSIFY,
                               TEXT_SIZING_PATTERN,
                               CURSOR_MOVEMENT_SEQUENCE,
                               INDETERMINATE_EFFECT_SEQUENCE,
                               strip_sequences)

# In 'parse' mode, strings longer than this are checked for cursor-movement
# controls (BS, TAB, CR, cursor sequences); when absent, mode downgrades to
# 'ignore' to skip character-by-character parsing. The detection scan cost is
# negligible for long strings but wasted on short ones like labels or headings.
_WIDTH_FAST_PATH_MIN_LEN = 20

# Translation table to strip C0/C1 control characters for fast 'ignore' mode.
_CONTROL_CHAR_TABLE = str.maketrans('', '', (
    ''.join(chr(c) for c in range(0x00, 0x20)) +   # C0: NUL through US (including tab)
    '\x7f' +                                       # DEL
    ''.join(chr(c) for c in range(0x80, 0xa0))     # C1: U+0080-U+009F
))


def _width_ignored_codes(text: str, ambiguous_width: int = 1) -> int:
    """
    Fast path for width() with control_codes='ignore'.

    Strips escape sequences and control characters, then measures remaining text.
    """
    return wcswidth(
        strip_sequences(text).translate(_CONTROL_CHAR_TABLE),
        ambiguous_width=ambiguous_width
    )


def width(
    text: str,
    *,
    control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
    tabsize: int = 8,
    ambiguous_width: int = 1,
) -> int:
    r"""
    Return printable width of text containing many kinds of control codes and sequences.

    Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal
    output sequences.  Never returns -1.

    :param text: String to measure.
    :param control_codes: How to handle control characters and sequences:

        - ``'parse'`` (default): Track horizontal cursor movement like BS ``\b``, CR ``\r``, TAB
          ``\t``, cursor left and right movement sequences.  Vertical movement (LF, VT, FF) and
          indeterminate terminal sequences are zero-width. OSC 66 Kitty Text Sizing protocol, OSC 8
          Hyperlink, and many other kinds of output sequences are parsed for displayed measurements.
        - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with
          indeterminate results of the screen or cursor, like clear or vertical movement. Generally,
          these should be handled with a virtual terminal emulator (like 'pyte').
        - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as
          width 0. This is the fastest measurement for text already filtered or known not to contain
          any kinds of control codes or sequences. TAB ``\t`` is zero-width; to ensure
          tab expansion, pre-process text using :func:`str.expandtabs`.

    :param tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8.
        Must be positive. Has no effect when ``control_codes='ignore'``.
    :param ambiguous_width: Width to use for East Asian Ambiguous (A)
        characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
    :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences
        present in ``text`` according to given parameters.  This represents the rightmost column the
        cursor reaches.  Always a non-negative integer.

    :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate
        effects, such as vertical movement or clear sequences are encountered, or on unexpected
        C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values.

    .. versionadded:: 0.3.0

    .. versionchanged:: 0.7.0
       Expanded strict-mode to raise :exc:`ValueError` when cursor-left movement
       (CSI D) would move beyond the beginning of the string. Previously, cursor-left
       was silently clamped to column 0 in all modes.

       Support horizontal cursor sequences (``cub``, ``cuf``, ``hpa``). Cursor-left (``cub``) or
       backspace (``\b``) now overwrites text.  ``column_address`` (``hpa``) and carriage return
       (``\r``) are now parsed, and some values conditionally raise ``ValueError`` when
       ``control_codes='parse'``.

    Examples::

        >>> width('hello')
        5
        >>> width('コンニチハ')
        10
        >>> width('\x1b[31mred\x1b[0m')
        3
        >>> width('\x1b[31mred\x1b[0m', control_codes='ignore')  # same result (ignored)
        3
        >>> width('123\b4')     # backspace overwrites previous cell (outputs '124')
        3
        >>> width('abc\t')      # tab caused cursor to move to column 8
        8
        >>> width('1\x1b[10C')  # '1' + cursor right 10, cursor ends on column 11
        11
        >>> width('1\x1b[10C', control_codes='ignore')   # faster but wrong in this case
        1
    """
    # pylint: disable=too-complex,too-many-branches,too-many-statements,too-many-locals
    # This could be broken into sub-functions (#1, #3, and #6 especially), but for reduced overhead
    # in consideration of this function a likely "hot path", they are inline, breaking many pylint
    # complexity rules.

    # Fast path for ASCII printable (no tabs, escapes, or control chars)
    if text.isascii() and text.isprintable():
        return len(text)

    # Fast parse: if no horizontal cursor movements are possible, switch to 'ignore' mode.
    # Only check longer strings - the detection overhead hurts short string performance.
    if control_codes == 'parse' and len(text) > _WIDTH_FAST_PATH_MIN_LEN:
        # Check for cursor-affecting control characters
        if '\b' not in text and '\t' not in text and '\r' not in text:
            # Check for escape sequences, if none contain cursor movement or
            # text sizing, downgrade to 'ignore'
            if '\x1b' not in text or (
                not CURSOR_MOVEMENT_SEQUENCE.search(text)
                and not TEXT_SIZING_PATTERN.search(text)
            ):
                control_codes = 'ignore'

    # Fast path for ignore mode, useful if you know the text is already free of control codes
    if control_codes == 'ignore':
        return _width_ignored_codes(text, ambiguous_width)

    strict = control_codes == 'strict'
    # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0.
    # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width.
    current_col = 0
    max_extent = 0
    idx = 0
    text_len = len(text)

    # Select wcwidth call pattern for best lru_cache performance:
    # - ambiguous_width=1 (default): single-arg calls share cache with direct wcwidth() calls
    # - ambiguous_width=2: full positional args needed (results differ, separate cache is correct)
    _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)

    # grapheme-clustering state
    last_measured_idx = -2
    last_measured_ucs = -1
    last_was_virama = False
    conjunct_pending = False

    while idx < text_len:
        char = text[idx]

        # 1. ESC sequences
        if char == '\x1b':
            m = _SEQUENCE_CLASSIFY.match(text, idx)
            if not m:
                # 1a. Errant ESC or unknown sequence: only the first character is zero-width
                idx += 1
            else:
                seq = m.group()
                if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
                    raise ValueError(f"Indeterminate cursor sequence at position {idx}, {seq!r}")

                # 2b. horizontal position absolute (before forward/backward to
                #     avoid other_seq match in _SEQUENCE_CLASSIFY)
                if (hpa_n := m.group('hpa_n')) is not None:
                    target_col = int(hpa_n) if hpa_n else 1
                    if strict:
                        raise ValueError(
                            f"Indeterminate horizontal position at position {idx}, "
                            f"{seq!r} (absolute column unknown)"
                        )
                    current_col = target_col - 1  # HPA is 1-indexed, convert to 0-indexed
                # 2c. cursor forward, backward
                elif (cforward_n := m.group('cforward_n')) is not None:
                    current_col += int(cforward_n) if cforward_n else 1
                elif (cbackward_n := m.group('cbackward_n')) is not None:
                    n_backward = int(cbackward_n) if cbackward_n else 1
                    if strict and n_backward > current_col:
                        raise ValueError(
                            f"Cursor left movement at position {idx} would move "
                            f"{n_backward} cells left from column {current_col}, "
                            f"exceeding string start"
                        )
                    current_col = max(0, current_col - n_backward)
                # 2d. OSC 66 Text Sizing — has positive display width
                elif (ts_meta := m.group('ts_meta')) is not None:
                    ts_text = m.group('ts_text')
                    ts_term = m.group('ts_term')
                    assert ts_text is not None and ts_term is not None
                    text_size = TextSizing(
                        TextSizingParams.from_params(ts_meta, control_codes=control_codes),
                        ts_text, ts_term)
                    current_col += text_size.display_width(ambiguous_width)
                # 2e. SGR and other zero-width sequences -- no column advance
                idx = m.end()
            # Escape sequences break VS16 adjacency: reset last-measured state
            last_measured_idx = -2
            last_measured_ucs = -1
            max_extent = max(max_extent, current_col)
            continue

        # 2. Vertical or Illegal control characters zero width or error when 'strict'
        if char in ILLEGAL_CTRL:
            if strict:
                raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}")
            idx += 1
            last_measured_idx = -2
            last_measured_ucs = -1
            continue

        if char in VERTICAL_CTRL:
            if strict:
                raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}")
            idx += 1
            last_measured_idx = -2
            last_measured_ucs = -1
            continue

        # 3. Horizontal movement characters
        if char in HORIZONTAL_CTRL:
            if char == '\t' and tabsize > 0:
                current_col += tabsize - (current_col % tabsize)
            elif char == '\b':
                if current_col > 0:
                    current_col -= 1
            elif char == '\r':
                if strict:
                    raise ValueError(
                        f"Horizontal movement character \\r at position {idx}: "
                        "indeterminate starting column"
                    )
                current_col = 0
            max_extent = max(max_extent, current_col)
            idx += 1
            last_measured_idx = -2
            last_measured_ucs = -1
            continue

        # 4. Zero-width control characters
        if char in ZERO_WIDTH_CTRL:
            idx += 1
            last_measured_idx = -2
            last_measured_ucs = -1
            continue

        # 5. Inline grapheme-clustering: ZWJ, VS16, Regional Indicators,
        #    Fitzpatrick, Virama conjuncts, Mc, wcwidth
        ucs = ord(char)

        # ZWJ (U+200D)
        if ucs == 0x200D:
            if last_was_virama:
                idx += 1
            elif idx + 1 < text_len:
                last_was_virama = False
                idx += 2
            else:
                last_was_virama = False
                idx += 1
            continue

        # VS16 (U+FE0F): converts preceding narrow character to wide.
        if ucs == 0xFE0F and last_measured_idx >= 0:
            if bisearch(ord(text[last_measured_idx]), VS16_NARROW_TO_WIDE['9.0.0']):
                current_col += 1
                max_extent = max(max_extent, current_col)
            last_measured_idx = -2  # prevent double application
            idx += 1
            continue

        # Regional Indicator & Fitzpatrick (both above BMP)
        if ucs > 0xFFFF:
            if ucs in _REGIONAL_INDICATOR_SET:
                ri_before = 0
                j = idx - 1
                while j >= 0 and ord(text[j]) in _REGIONAL_INDICATOR_SET:
                    ri_before += 1
                    j -= 1
                if ri_before % 2 == 1:
                    last_measured_ucs = ucs
                    idx += 1
                    continue
            elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
                  and last_measured_ucs in _EMOJI_ZWJ_SET):
                idx += 1
                continue

        # Virama conjunct formation
        if last_was_virama and bisearch(ucs, ISC_CONSONANT):
            last_measured_idx = idx
            last_measured_ucs = ucs
            last_was_virama = False
            conjunct_pending = True
            idx += 1
            continue

        # Normal character: measure with wcwidth
        w = _wcwidth(char)
        if w > 0:
            if conjunct_pending:
                current_col += 1
                conjunct_pending = False
            current_col += w
            max_extent = max(max_extent, current_col)
            last_measured_idx = idx
            last_measured_ucs = ucs
            last_was_virama = False
        elif last_measured_idx >= 0 and bisearch(ucs, _CATEGORY_MC_TABLE):
            # Spacing Combining Mark (Mc) following a base character adds 1
            current_col += 1
            max_extent = max(max_extent, current_col)
            last_measured_idx = -2
            last_was_virama = False
            conjunct_pending = False
        else:
            last_was_virama = ucs in _ISC_VIRAMA_SET
        idx += 1

    if conjunct_pending:
        current_col += 1
        max_extent = max(max_extent, current_col)
    return max_extent
