// unconventional spelling to dinstinguish from the built-in SyntaxError
export class SyntaxErr extends Error {}

/*
 * Whitespace is added as a property of the preceding token
 * Although there is a seperate token for initial whitespace
 * (We dont store preceding whitespace on a token since this would require backtracking in optional tokens)
 */

export enum TokenType {
    Number = 'number',
    Register = 'register',
    Operator = 'operator',
    Punctuation = 'punctuation',
    Jump = 'jump',
    Placeholder = 'placeholder',
    Indentifier = 'identifier',
    LeadingWhitespace = 'whitespace',
    Error = 'error',
    Comment = 'comment',
}

// offsets are 0-based, but line and column numbers used in Monaco editor is 1-based
export class Location {
    constructor(readonly lineOffset: number, readonly charOffset: number, readonly length: number) {}
    get lineNumber() {
        return this.lineOffset + 1;
    }
    get startColumn() {
        return this.charOffset + 1;
    }
    get endColumn() {
        return this.startColumn + this.length;
    }
}

export interface Token {
    type: TokenType;
    value: string;
    wsAfter: string | null;
    isValid: boolean;
    location: Location;
}

class InitialWhitespace implements Token {
    value = '';
    isValid = true;
    type = TokenType.LeadingWhitespace;
    constructor(public location: Location, public wsAfter: string) {}
}

export class GenericToken implements Token {
    isValid = true;
    constructor(public location: Location, public type: TokenType, public value: string, public wsAfter: string | null) {}
}

export class NumericToken implements Token {
    type = TokenType.Number;
    isValid = true;
    constructor(public location: Location, public value: string, public numericValue: number, public wsAfter: string | null) {}
}

export class ErrorToken implements Token {
    type = TokenType.Error;
    isValid = false;
    constructor(public location: Location, public value: string, public wsAfter: string | null) {}
}

class RawToken {
    constructor(public token: string, public location: Location) {}
}

/* Scans a single line */
export class Scanner {
    rest: string;
    tokens: Token[] = [];
    charOffset = 0;
    constructor(readonly code: string, public lineOffset: number) {
        this.rest = code;
    }
    optInitialWhitespace() {
        const optWs = this.optWhitespace();
        if (!optWs) {
            return null;
        }
        const token = new InitialWhitespace(new Location(this.lineOffset, 0, optWs.length), optWs);
        this.tokens.push(token);
        return token;
    }

    optIdentifier() {
        const match = /^[.a-zA-Z]\w*/.exec(this.rest);
        // special case - strings A, D, AD, DA are excluded from matching identifiers
        if (match && match[0] !== 'A' && match[0] !== 'D' && match[0] !== 'AD' && match[0] !== 'DA') {
            const tokenStr = match[0];
            const raw = this.consume(tokenStr);
            const ws = this.optWhitespace();
            const token = new GenericToken(raw.location, TokenType.Indentifier, tokenStr, ws);
            this.tokens.push(token);
            return token;
        }
        return null;
    }
    optZeroOne() {
        // only the literal numbers '0' or '1'
        // (note optNumber also matches this)
        const tok = this.optMatch(/^(0|1)\b/i);
        if (!tok) {
            return null;
        }
        const [numStrRaw, ws] = tok;
        const numStr = numStrRaw.token;
        const value = parseInt(numStr, 10);
        const token = new NumericToken(numStrRaw.location, numStr, value, ws);
        this.tokens.push(token);
        return token;
    }
    optNumber() {
        const tok = this.optMatch(/^(0x[0-9a-fA-F_]+|0b[01_]+|\d+\b)/i);
        if (!tok) {
            return null;
        }
        const [numStrRaw, ws] = tok;
        const numStr = numStrRaw.token;
        if (numStr.toLowerCase().startsWith('0x')) {
            const digits = numStr.substring(2).replace(/_/g, '');
            const value = parseInt(digits, 16);
            const token = new NumericToken(numStrRaw.location, numStr, value, ws);
            this.tokens.push(token);
            return token;
        } else if (numStr.toLowerCase().startsWith('0b')) {
            const digits = numStr.substring(2).replace(/_/g, '');
            const value = parseInt(digits, 2);
            const token = new NumericToken(numStrRaw.location, numStr, value, ws);
            this.tokens.push(token);
            return token;
        } else {
            const value = parseInt(numStr, 10);
            const token = new NumericToken(numStrRaw.location, numStr, value, ws);
            this.tokens.push(token);
            return token;
        }
    }
    optPlaceholder() {
        const tok = this.optMatch(/^\$\w+/);
        if (!tok) {
            return null;
        }
        const [regRawToken, ws] = tok;
        const reg = regRawToken.token;
        const token = new GenericToken(regRawToken.location, TokenType.Placeholder, reg, ws);
        this.tokens.push(token);
        return token;
    }
    optPunctuation(str: string) {
        return this.optString(str, TokenType.Punctuation);
    }
    optString(str: string, type: TokenType) {
        if (this.rest.substring(0, str.length) === str) {
            const rawTok = this.consume(str);
            const ws = this.optWhitespace();
            const token = new GenericToken(rawTok.location, type, rawTok.token, ws);
            this.tokens.push(token);
            return token;
        }
        return null;
    }
    jump() {
        const tok = this.match(/^J\w\w/, 'Jump instruction expected after a semicolon.');
        const [reg, ws] = tok;
        const token = new GenericToken(reg.location, TokenType.Jump, reg.token, ws);
        this.tokens.push(token);
        return token;
    }
    optOperator() {
        // minus should not be followed by '>' since then it is an arrow!
        const tok = this.optMatch(/^(-(?!>)|\+|&|\|)/);
        if (!tok) {
            return null;
        }
        const [reg, ws] = tok;
        const token = new GenericToken(reg.location, TokenType.Operator, reg.token, ws);
        this.tokens.push(token);
        return token;
    }
    optPrefixOperator() {
        const tok = this.optMatch(/^-|~/);
        if (!tok) {
            return null;
        }
        const [reg, ws] = tok;
        const token = new GenericToken(reg.location, TokenType.Operator, reg.token, ws);
        this.tokens.push(token);
        return token;
    }
    operand() {
        const tok = this.match(/^(\*?A|D|1)\b/, 'Expected either a register (A or D) or a number. Found:' + this.rest);
        const [reg, ws] = tok;
        let token;
        if (reg.token === '1') {
            token = new GenericToken(reg.location, TokenType.Number, reg.token, ws);
        } else {
            token = new GenericToken(reg.location, TokenType.Register, reg.token, ws);
        }
        this.tokens.push(token);
        return token;
    }
    register() {
        const tok = this.match(/^(A|D|\*A)\b/, 'Expected a register: A, D or *A. Found:' + this.rest);
        const [reg, ws] = tok;
        const token = new GenericToken(reg.location, TokenType.Register, reg.token, ws);
        this.tokens.push(token);
        return token;
    }
    optRegister() {
        const tok = this.optMatch(/^(\*?A|D)\b/);
        if (!tok) {
            return null;
        }
        const [reg, ws] = tok;
        const token = new GenericToken(reg.location, TokenType.Register, reg.token, ws);
        this.tokens.push(token);
        return token;
    }
    // any characters following the end of an instruction
    // (this would not be trailing whitespace since this is already trimmed)
    optTrailer() {
        const tok = this.optMatch(/^.+/);
        if (!tok) {
            return null;
        }
        const [reg, ws] = tok;
        const token = new ErrorToken(reg.location, reg.token, ws);
        this.tokens.push(token);
        return token;
    }
    /* */
    optWhitespace() {
        const match = /^\s+/.exec(this.rest);
        if (match) {
            this.consume(match[0]);
            return match[0];
        }
        return null;
    }
    match(regex: RegExp, errMsg: string): [RawToken, string | null] {
        const match = this.rest.match(regex);
        if (!match) {
            throw new SyntaxErr(errMsg);
        }
        const token = match[0];
        const raw = this.consume(token);
        const ws = this.optWhitespace();
        return [raw, ws];
    }
    optMatch(regex: RegExp): [RawToken, string | null] | null {
        const match = this.rest.match(regex);
        if (match) {
            const token = match[0];
            const raw = this.consume(token);
            const ws = this.optWhitespace();
            return [raw, ws];
        }
        return null;
    }

    consume(str: string) {
        // update location line/col
        const charIx = this.charOffset;
        this.charOffset += str.length;
        this.rest = this.rest.substring(str.length);
        return new RawToken(str, new Location(this.lineOffset, charIx, str.length));
    }

    expect(str: string, errMsg: string, type: TokenType) {
        const m = this.optString(str, type);
        if (!m) {
            throw new SyntaxErr(errMsg);
        }
        return m;
    }

    toEnd() {
        const rest = this.rest;
        this.consume(rest);
        return rest;
    }
}
