import { ErrorResult, SourcedErrorResult } from './shared';
import { TokenSpecification } from './LexerRules';

export enum PatternType {
    Exact,
    Pattern,
    StartEndPattern,
}

export class Token {
    constructor(readonly value: string, readonly type: string, readonly spec: TokenSpec, readonly pos: [number, number]) {}
    safeDisplayValue() {
        return this.value
            .split('')
            .map(c =>
                this.value === ' '
                    ? 'Space'
                    : this.value === '\t'
                    ? 'Tab'
                    : this.value === '\r'
                    ? 'Line break'
                    : this.value === '\n'
                    ? 'Newline'
                    : c
            )
            .join('');
    }
    get startCharacterIndex() {
        return this.pos[0];
    }
}

export enum TokenAction {
    Ignore,
    Error,
    Literal,
    Name,
}

export interface TokenSpec {
    type: PatternType;
    pattern: string;
    action: TokenAction;
    label?: string;
}

/* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
function escapeRegExp(txt: string) {
    return txt.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}

function splitLiterals(str: string) {
    return str.trim().split(/\s+/);
}

function toPattern(token: TokenSpec) {
    if (token.type === PatternType.Pattern) {
        return token.pattern;
    } else {
        // TODO: order by length, boundary after text
        return splitLiterals(token.pattern)
            .map(p => escapeRegExp(p))
            .join('|');
    }
}

export class TokenSet {
    constructor(public tokens: Token[], public error?: SourcedErrorResult) {}
    get isError() {
        return this.error != null;
    }
}

export function tokenize(src: string, allTokenSpecs: TokenSpecification[]) {
    const tokenspecs = allTokenSpecs.filter(t => t.isValid);
    let ix = 0;
    const tokens: Token[] = [];
    /* Add a catch-all error pattern at the end */
    const errorSpec: TokenSpec = new TokenSpecification(-1, PatternType.Pattern, '.', TokenAction.Error);
    const specs = [...tokenspecs, errorSpec];
    /* TODO: What if user pattern contains capturing groups? */
    const patterns = specs.map(s => '(' + toPattern(s) + ')').join('|');
    const r = RegExp(patterns, 'msuy');
    while (ix < src.length) {
        const m = r.exec(src);
        if (!m) {
            throw new Error('No match - internal error.');
        }
        const matchIx = m.findIndex((m, ix) => ix > 0 && m !== undefined);
        const spec = specs[matchIx - 1];
        const value = RegExp.lastMatch;
        if (RegExp.lastMatch.length === 0) {
            throw new ErrorResult('Empty match - regex is invalid.: ' + patterns);
        }
        const start = ix;
        ix += RegExp.lastMatch.length;
        const end = ix;
        if (spec.action !== TokenAction.Ignore) {
            const grammarName = spec.action === TokenAction.Name ? spec.label! : value;
            const token = new Token(value, grammarName, spec, [start, end]);
            tokens.push(token);
            if (spec.action === TokenAction.Error) {
                const err = new SourcedErrorResult(`Unrecognized character: ${token.safeDisplayValue()}`, token);
                return new TokenSet(tokens, err);
            }
        }
    }
    return new TokenSet(tokens);
}

/* All grammar symbols defined by the token specifications */
export function getTerminalSymbols(tokenspecs: TokenSpec[]) {
    return tokenspecs.flatMap(s =>
        s.action === TokenAction.Name ? [s.label!] : s.action === TokenAction.Literal ? splitLiterals(s.pattern) : []
    );
}
