import type { Token, TokenType } from "./lexer"; import { TOKEN_TYPES } from "./lexer"; import type { Statement } from "./ast"; import { Program, If, For, SetStatement, MemberExpression, CallExpression, Identifier, NumericLiteral, StringLiteral, BooleanLiteral, ArrayLiteral, ObjectLiteral, BinaryExpression, FilterExpression, TestExpression, UnaryExpression, SliceExpression, KeywordArgumentExpression, TupleLiteral, } from "./ast"; /** * Generate the Abstract Syntax Tree (AST) from a list of tokens. * Operator precedence can be found here: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_precedence#table */ export function parse(tokens: Token[]): Program { const program = new Program([]); let current = 0; /** * Consume the next token if it matches the expected type, otherwise throw an error. * @param type The expected token type * @param error The error message to throw if the token does not match the expected type * @returns The consumed token */ function expect(type: string, error: string): Token { const prev = tokens[current++]; if (!prev || prev.type !== type) { throw new Error(`Parser Error: ${error}. ${prev.type} !== ${type}.`); } return prev; } function parseAny(): Statement { switch (tokens[current].type) { case TOKEN_TYPES.Text: return parseText(); case TOKEN_TYPES.OpenStatement: return parseJinjaStatement(); case TOKEN_TYPES.OpenExpression: return parseJinjaExpression(); default: throw new SyntaxError(`Unexpected token type: ${tokens[current].type}`); } } function not(...types: TokenType[]): boolean { return current + types.length <= tokens.length && types.some((type, i) => type !== tokens[current + i].type); } function is(...types: TokenType[]): boolean { return current + types.length <= tokens.length && types.every((type, i) => type === tokens[current + i].type); } function parseText(): StringLiteral { return new StringLiteral(expect(TOKEN_TYPES.Text, "Expected text token").value); } function parseJinjaStatement(): Statement { // Consume {% %} tokens expect(TOKEN_TYPES.OpenStatement, "Expected opening statement token"); let result; switch (tokens[current].type) { case TOKEN_TYPES.Set: ++current; result = parseSetStatement(); expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); break; case TOKEN_TYPES.If: ++current; result = parseIfStatement(); expect(TOKEN_TYPES.OpenStatement, "Expected {% token"); expect(TOKEN_TYPES.EndIf, "Expected endif token"); expect(TOKEN_TYPES.CloseStatement, "Expected %} token"); break; case TOKEN_TYPES.For: ++current; result = parseForStatement(); expect(TOKEN_TYPES.OpenStatement, "Expected {% token"); expect(TOKEN_TYPES.EndFor, "Expected endfor token"); expect(TOKEN_TYPES.CloseStatement, "Expected %} token"); break; default: throw new SyntaxError(`Unknown statement type: ${tokens[current].type}`); } return result; } function parseJinjaExpression(): Statement { // Consume {{ }} tokens expect(TOKEN_TYPES.OpenExpression, "Expected opening expression token"); const result = parseExpression(); expect(TOKEN_TYPES.CloseExpression, "Expected closing expression token"); return result; } // NOTE: `set` acts as both declaration statement and assignment expression function parseSetStatement(): Statement { const left = parseExpression(); if (is(TOKEN_TYPES.Equals)) { ++current; const value = parseSetStatement(); return new SetStatement(left, value); } return left; } function parseIfStatement(): If { const test = parseExpression(); expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); const body: Statement[] = []; const alternate: Statement[] = []; // Keep parsing if body until we reach the first {% elif %} or {% else %} or {% endif %} while ( !( tokens[current]?.type === TOKEN_TYPES.OpenStatement && (tokens[current + 1]?.type === TOKEN_TYPES.ElseIf || tokens[current + 1]?.type === TOKEN_TYPES.Else || tokens[current + 1]?.type === TOKEN_TYPES.EndIf) ) ) { body.push(parseAny()); } // Alternate branch: Check for {% elif %} or {% else %} if ( tokens[current]?.type === TOKEN_TYPES.OpenStatement && tokens[current + 1]?.type !== TOKEN_TYPES.EndIf // There is some body ) { ++current; // eat {% token if (is(TOKEN_TYPES.ElseIf)) { expect(TOKEN_TYPES.ElseIf, "Expected elseif token"); alternate.push(parseIfStatement()); } else { // tokens[current]?.type === TokenType.Else expect(TOKEN_TYPES.Else, "Expected else token"); expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); // keep going until we hit {% endif %} while ( !(tokens[current]?.type === TOKEN_TYPES.OpenStatement && tokens[current + 1]?.type === TOKEN_TYPES.EndIf) ) { alternate.push(parseAny()); } } } return new If(test, body, alternate); } function parseExpressionSequence(primary = false): Statement { const fn = primary ? parsePrimaryExpression : parseExpression; const expressions = [fn()]; const isTuple = is(TOKEN_TYPES.Comma); while (isTuple) { ++current; // consume comma expressions.push(fn()); if (!is(TOKEN_TYPES.Comma)) { break; } } return isTuple ? new TupleLiteral(expressions) : expressions[0]; } function parseForStatement(): For { // e.g., `message` in `for message in messages` const loopVariable = parseExpressionSequence(true); // should be an identifier if (!(loopVariable instanceof Identifier || loopVariable instanceof TupleLiteral)) { throw new SyntaxError(`Expected identifier/tuple for the loop variable, got ${loopVariable.type} instead`); } expect(TOKEN_TYPES.In, "Expected `in` keyword following loop variable"); // `messages` in `for message in messages` const iterable = parseExpression(); expect(TOKEN_TYPES.CloseStatement, "Expected closing statement token"); // Body of for loop const body: Statement[] = []; // Keep going until we hit {% endfor while (not(TOKEN_TYPES.OpenStatement, TOKEN_TYPES.EndFor)) { body.push(parseAny()); } return new For(loopVariable, iterable, body); } function parseExpression(): Statement { // Choose parse function with lowest precedence return parseTernaryExpression(); } function parseTernaryExpression(): Statement { const a = parseLogicalOrExpression(); if (is(TOKEN_TYPES.If)) { // Ternary expression ++current; // consume if const predicate = parseLogicalOrExpression(); expect(TOKEN_TYPES.Else, "Expected else token"); const b = parseLogicalOrExpression(); return new If(predicate, [a], [b]); } return a; } function parseLogicalOrExpression(): Statement { let left = parseLogicalAndExpression(); while (is(TOKEN_TYPES.Or)) { const operator = tokens[current]; ++current; const right = parseLogicalAndExpression(); left = new BinaryExpression(operator, left, right); } return left; } function parseLogicalAndExpression(): Statement { let left = parseLogicalNegationExpression(); while (is(TOKEN_TYPES.And)) { const operator = tokens[current]; ++current; const right = parseLogicalNegationExpression(); left = new BinaryExpression(operator, left, right); } return left; } function parseLogicalNegationExpression(): Statement { let right: UnaryExpression | undefined; // Try parse unary operators while (is(TOKEN_TYPES.Not)) { // not not ... const operator = tokens[current]; ++current; const arg = parseLogicalNegationExpression(); // not test.x === not (test.x) right = new UnaryExpression(operator, arg); } return right ?? parseComparisonExpression(); } function parseComparisonExpression(): Statement { // NOTE: membership has same precedence as comparison // e.g., ('a' in 'apple' == 'b' in 'banana') evaluates as ('a' in ('apple' == ('b' in 'banana'))) let left = parseAdditiveExpression(); while (is(TOKEN_TYPES.ComparisonBinaryOperator) || is(TOKEN_TYPES.In) || is(TOKEN_TYPES.NotIn)) { const operator = tokens[current]; ++current; const right = parseAdditiveExpression(); left = new BinaryExpression(operator, left, right); } return left; } function parseAdditiveExpression(): Statement { let left = parseMultiplicativeExpression(); while (is(TOKEN_TYPES.AdditiveBinaryOperator)) { const operator = tokens[current]; ++current; const right = parseMultiplicativeExpression(); left = new BinaryExpression(operator, left, right); } return left; } function parseCallMemberExpression(): Statement { // Handle member expressions recursively const member = parseMemberExpression(); // foo.x if (is(TOKEN_TYPES.OpenParen)) { // foo.x() return parseCallExpression(member); } return member; } function parseCallExpression(callee: Statement): CallExpression { let callExpression = new CallExpression(callee, parseArgs()); if (is(TOKEN_TYPES.OpenParen)) { // foo.x()() callExpression = parseCallExpression(callExpression); } return callExpression; } function parseArgs(): Statement[] { // add (x + 5, foo()) expect(TOKEN_TYPES.OpenParen, "Expected opening parenthesis for arguments list"); const args = parseArgumentsList(); expect(TOKEN_TYPES.CloseParen, "Expected closing parenthesis for arguments list"); return args; } function parseArgumentsList(): Statement[] { // comma-separated arguments list const args = []; while (!is(TOKEN_TYPES.CloseParen)) { let argument = parseExpression(); if (is(TOKEN_TYPES.Equals)) { // keyword argument // e.g., func(x = 5, y = a or b) ++current; // consume equals if (!(argument instanceof Identifier)) { throw new SyntaxError(`Expected identifier for keyword argument`); } const value = parseExpression(); argument = new KeywordArgumentExpression(argument, value); } args.push(argument); if (is(TOKEN_TYPES.Comma)) { ++current; // consume comma } } return args; } function parseMemberExpressionArgumentsList(): Statement { // NOTE: This also handles slice expressions colon-separated arguments list // e.g., ['test'], [0], [:2], [1:], [1:2], [1:2:3] const slices: (Statement | undefined)[] = []; let isSlice = false; while (!is(TOKEN_TYPES.CloseSquareBracket)) { if (is(TOKEN_TYPES.Colon)) { // A case where a default is used // e.g., [:2] will be parsed as [undefined, 2] slices.push(undefined); ++current; // consume colon isSlice = true; } else { slices.push(parseExpression()); if (is(TOKEN_TYPES.Colon)) { ++current; // consume colon after expression, if it exists isSlice = true; } } } if (slices.length === 0) { // [] throw new SyntaxError(`Expected at least one argument for member/slice expression`); } if (isSlice) { if (slices.length > 3) { throw new SyntaxError(`Expected 0-3 arguments for slice expression`); } return new SliceExpression(...slices); } return slices[0] as Statement; // normal member expression } function parseMemberExpression(): Statement { let object = parsePrimaryExpression(); while (is(TOKEN_TYPES.Dot) || is(TOKEN_TYPES.OpenSquareBracket)) { const operator = tokens[current]; // . or [ ++current; let property: Statement; const computed = operator.type !== TOKEN_TYPES.Dot; if (computed) { // computed (i.e., bracket notation: obj[expr]) property = parseMemberExpressionArgumentsList(); expect(TOKEN_TYPES.CloseSquareBracket, "Expected closing square bracket"); } else { // non-computed (i.e., dot notation: obj.expr) property = parsePrimaryExpression(); // should be an identifier if (property.type !== "Identifier") { throw new SyntaxError(`Expected identifier following dot operator`); } } object = new MemberExpression(object, property, computed); } return object; } function parseMultiplicativeExpression(): Statement { let left = parseTestExpression(); // Multiplicative operators have higher precedence than test expressions // e.g., (4 * 4 is divisibleby(2)) evaluates as (4 * (4 is divisibleby(2))) while (is(TOKEN_TYPES.MultiplicativeBinaryOperator)) { const operator = tokens[current]; ++current; const right = parseTestExpression(); left = new BinaryExpression(operator, left, right); } return left; } function parseTestExpression(): Statement { let operand = parseFilterExpression(); while (is(TOKEN_TYPES.Is)) { // Support chaining tests ++current; // consume is const negate = is(TOKEN_TYPES.Not); if (negate) { ++current; // consume not } let filter = parsePrimaryExpression(); if (filter instanceof BooleanLiteral) { // Special case: treat boolean literals as identifiers filter = new Identifier(filter.value.toString()); } if (!(filter instanceof Identifier)) { throw new SyntaxError(`Expected identifier for the test`); } // TODO: Add support for non-identifier tests operand = new TestExpression(operand, negate, filter); } return operand; } function parseFilterExpression(): Statement { let operand = parseCallMemberExpression(); while (is(TOKEN_TYPES.Pipe)) { // Support chaining filters ++current; // consume pipe let filter = parsePrimaryExpression(); // should be an identifier if (!(filter instanceof Identifier)) { throw new SyntaxError(`Expected identifier for the filter`); } if (is(TOKEN_TYPES.OpenParen)) { filter = parseCallExpression(filter); } operand = new FilterExpression(operand, filter as Identifier | CallExpression); } return operand; } function parsePrimaryExpression(): Statement { // Primary expression: number, string, identifier, function call, parenthesized expression const token = tokens[current]; switch (token.type) { case TOKEN_TYPES.NumericLiteral: ++current; return new NumericLiteral(Number(token.value)); case TOKEN_TYPES.StringLiteral: ++current; return new StringLiteral(token.value); case TOKEN_TYPES.BooleanLiteral: ++current; return new BooleanLiteral(token.value === "true"); case TOKEN_TYPES.Identifier: ++current; return new Identifier(token.value); case TOKEN_TYPES.OpenParen: { ++current; // consume opening parenthesis const expression = parseExpressionSequence(); if (tokens[current].type !== TOKEN_TYPES.CloseParen) { throw new SyntaxError(`Expected closing parenthesis, got ${tokens[current].type} instead`); } ++current; // consume closing parenthesis return expression; } case TOKEN_TYPES.OpenSquareBracket: { ++current; // consume opening square bracket const values = []; while (!is(TOKEN_TYPES.CloseSquareBracket)) { values.push(parseExpression()); if (is(TOKEN_TYPES.Comma)) { ++current; // consume comma } } ++current; // consume closing square bracket return new ArrayLiteral(values); } case TOKEN_TYPES.OpenCurlyBracket: { ++current; // consume opening curly bracket const values = new Map(); while (!is(TOKEN_TYPES.CloseCurlyBracket)) { const key = parseExpression(); expect(TOKEN_TYPES.Colon, "Expected colon between key and value in object literal"); const value = parseExpression(); values.set(key, value); if (is(TOKEN_TYPES.Comma)) { ++current; // consume comma } } ++current; // consume closing curly bracket return new ObjectLiteral(values); } default: throw new SyntaxError(`Unexpected token: ${token.type}`); } } while (current < tokens.length) { program.body.push(parseAny()); } return program; }