From bb53b8271edaec56214b304ee93961cadbd9d2f3 Mon Sep 17 00:00:00 2001 From: Jay Pratt Date: Fri, 4 Oct 2024 19:29:34 +1000 Subject: [PATCH] Huge progress --- tree-sitter-tako/grammar.js | 160 +++++++++++++----- tree-sitter-tako/test/corpus/definitions.tk | 26 ++- tree-sitter-tako/test/corpus/hello.tk | 5 - tree-sitter-tako/test/corpus/number.tk | 10 +- tree-sitter-tako/test/corpus/simple_sum.tk | 24 ++- .../test/corpus/string_literals.tk | 20 +++ 6 files changed, 173 insertions(+), 72 deletions(-) delete mode 100644 tree-sitter-tako/test/corpus/hello.tk create mode 100644 tree-sitter-tako/test/corpus/string_literals.tk diff --git a/tree-sitter-tako/grammar.js b/tree-sitter-tako/grammar.js index 08981c21..9945808a 100644 --- a/tree-sitter-tako/grammar.js +++ b/tree-sitter-tako/grammar.js @@ -1,40 +1,85 @@ // For now we match rust. // https://doc.rust-lang.org/reference/expressions.html#expression-precedence +const {left, right} = prec; const PREC = { - call: 15, - field: 14, - try: 13, - unary: 12, - cast: 11, - multiplicative: 10, - additive: 9, - shift: 8, - bitand: 7, - bitxor: 6, - bitor: 5, - comparative: 4, - and: 3, - or: 2, - range: 1, + call: 17, + field: 16, + try: 15, + neg: 14, + not: 14, + bitnot: 14, + cast: 13, + mul: 12, + div: 12, + mod: 12, + add: 11, + sub: 11, + left_shift: 10, + right_shift: 10, + bitand: 9, + bitxor: 8, + bitor: 7, + // comparative: 6, + equals: 6, + not_equals: 6, + less_than: 6, + less_than_equals: 6, + greater_than: 6, + greater_than_equals: 6, + and: 5, + or: 4, + range: 3, + has_type: 2, assign: 0, - comma: -1, - has_type: -2, - closure: -3, + closure: -2, + sequence: -3, + postfix_sequence: -3, }; -const OPERATORS = [ - ['comma', ','], +const RIGHT_OPERATORS = [ ['assign', '='], +]; + +const OPERATORS = [ + ['field', '.'], ['has_type', ':'], + ['sequence', ';'], ['and', '&&'], ['or', '||'], ['bitand', '&'], ['bitor', '|'], ['bitxor', '^'], - ['comparative', choice('==', '!=', '<', '<=', '>', '>=')], - ['shift', choice('<<', '>>')], - ['additive', choice('+', '-')], - ['multiplicative', choice('*', '/', '%')], + ['equals', '=='], + ['not_equals', '!='], + ['less_than', '<'], + ['less_than_equals', '<='], + ['greater_than', '>'], + ['greater_than_equals', '>='], + ['left_shift', '<<'], + ['right_shift', '>>'], + ['add', '+'], + ['sub', '-'], + ['mul', '*'], + ['div', '/'], + ['mod', '%'], +]; + +const POSTFIX_OPERATORS = [ + ['try', '?'], + ['sequence', ';'], +]; + +const UNARY_OPERATORS = [ + ['neg', '-'], + ['not', '!'], + ['bitnot', '~'], +]; + +const ALL_OPERATORS = [ + ...OPERATORS, + ...RIGHT_OPERATORS, + ...POSTFIX_OPERATORS, + ...UNARY_OPERATORS, ]; const separated_one = (entry, delimiter) => { @@ -49,9 +94,30 @@ function operators_gen() { const operators = {}; for (const [name, operator] of OPERATORS) { const precedence = PREC[name]; - operators[name] = ($) => prec.left(precedence, seq( + operators[name] = ($) => left(precedence, seq( + field('left', $._expression), + field('operator', operator), + field('right', $._expression), + )); + } + for (const [name, operator] of RIGHT_OPERATORS) { + const precedence = PREC[name]; + operators[name] = ($) => right(precedence, seq( field('left', $._expression), - // @ts-ignore + field('operator', operator), + field('right', $._expression), + )); + } + for (const [name, operator] of POSTFIX_OPERATORS) { + const precedence = PREC[name]; + operators[name] = ($) => left(precedence, seq( + field('left', $._expression), + field('operator', operator), + )); + } + for (const [name, operator] of UNARY_OPERATORS) { + const precedence = PREC[name]; + operators[name] = ($) => right(precedence, seq( field('operator', operator), field('right', $._expression), )); @@ -64,26 +130,22 @@ module.exports = grammar({ extras: ($) => [$.nesting_comment, $.single_line_comment, "\r", "\n", "\t", " "], rules: { // TODO: add the actual grammar rules - source_file: ($) => seq(optional($.shebang), separated_one(optional($._non_empty_body), $.heading)), - _non_empty_body: ($) => separated_one($._statement, ';'), - _statement: ($) => choice( - $.block, - $._expression - ), - block: ($) => seq('{', optional($._non_empty_body), '}'), + source_file: ($) => seq(optional($.shebang), separated_one(optional($._expression), $.heading)), + block: ($) => seq('{', optional($._expression), '}'), _expression: ($) => choice( - $._binary_expression, // Consider keeping this name to support editing? + $._operator_expression, // Consider keeping this name to support editing? $.call, seq('(', $._expression ,')'), + $.block, $.string_literal, $._number, $.hex_literal, $.color, $.ident, ), - call: ($) => seq($._expression, '(', separated($._expression, ','), optional(','), ')'), - _binary_expression: ($) => { - return choice(...OPERATORS.map(([name, _operator_parser]) => { + call: ($) => left(PREC.call, seq($._expression, '(', separated($._expression, ','), optional(','), ')')), + _operator_expression: ($) => { + return choice(...ALL_OPERATORS.map(([name, _operator_parser]) => { try { return ($)[name]; // Get the parser 'named'. } catch (e) { @@ -124,13 +186,23 @@ module.exports = grammar({ _hex_char_6: (_) => /[a-fA-F0-9_]{6}/, _hex_char_8: (_) => /[a-fA-F0-9_]{8}/, ident: (_) => /[a-zA-Z][a-zA-Z0-9_]*/, - string_literal: $ => seq( - '"', - repeat(choice( - $.escape_sequence, - /[^\"]/, - )), - token.immediate('"'), + string_literal: $ => choice( + seq( + '\'', + repeat(choice( + $.escape_sequence, + /[^\']/, + )), + token.immediate('\''), + ), + seq( + '"', + repeat(choice( + $.escape_sequence, + /[^\"]/, + )), + token.immediate('"'), + ), ), escape_sequence: _ => token.immediate( seq('\\', diff --git a/tree-sitter-tako/test/corpus/definitions.tk b/tree-sitter-tako/test/corpus/definitions.tk index 967789c7..8988fae7 100644 --- a/tree-sitter-tako/test/corpus/definitions.tk +++ b/tree-sitter-tako/test/corpus/definitions.tk @@ -79,19 +79,19 @@ test(a=3,b=2) = 3 ) ) ================== -Function Definition using empty block +Constant Definition using empty block ================== test = { } --- (source_file (assign - (call (ident)) + (ident) (block) ) ) ================== -Function Definition using block +Constant Definition using block ================== test = { 3 @@ -104,7 +104,7 @@ test = { ) ) ================== -Function Definition With Arguments Using Block +Function Definition using Block ================== test() = { 3 @@ -112,7 +112,23 @@ test() = { --- (source_file (assign - (call (ident) (definition_arguments)) + (call (ident)) + (block (int_literal)) + ) +) +================== +Function Definition with arguments using Block +================== +test(a, b) = { + 3 +} +--- +(source_file + (assign + (call (ident) + (ident) + (ident) + ) (block (int_literal)) ) ) diff --git a/tree-sitter-tako/test/corpus/hello.tk b/tree-sitter-tako/test/corpus/hello.tk deleted file mode 100644 index c8f25bce..00000000 --- a/tree-sitter-tako/test/corpus/hello.tk +++ /dev/null @@ -1,5 +0,0 @@ -================== -Hello String literal -================== -"Hello" ---- diff --git a/tree-sitter-tako/test/corpus/number.tk b/tree-sitter-tako/test/corpus/number.tk index 6d63566c..44ae0260 100644 --- a/tree-sitter-tako/test/corpus/number.tk +++ b/tree-sitter-tako/test/corpus/number.tk @@ -18,9 +18,11 @@ Float literal with too many decimal points 1.0.0 --- (source_file - (float_literal) - (ERROR - (UNEXPECTED '.'))) + (field + (float_literal) + (int_literal) + ) +) ================== Long int @@ -62,4 +64,4 @@ Color wrong number of digits ================== #ff00aaa --- -(source_file (color) (ERROR (ident))) +(source_file (color) (ERROR)) diff --git a/tree-sitter-tako/test/corpus/simple_sum.tk b/tree-sitter-tako/test/corpus/simple_sum.tk index d50c3c7f..fba391ff 100755 --- a/tree-sitter-tako/test/corpus/simple_sum.tk +++ b/tree-sitter-tako/test/corpus/simple_sum.tk @@ -5,16 +5,12 @@ Simple Sum --- (source_file - (binary_expression - left: (binary_expression - left: (number - (int_literal)) - right: (number - (int_literal)) - ) - right: (number - (int_literal) + (add + left: (add + left: (int_literal) + right: (int_literal) ) + right: (int_literal) ) ) @@ -29,14 +25,14 @@ Simple Sum with comments */ --- (source_file - (binary_expression - left: (binary_expression - left: (number (int_literal)) + (add + left: (add + left: (int_literal) (nesting_comment) - right: (number (int_literal)) + right: (int_literal) ) (nesting_comment) - right: (number (int_literal))) + right: (int_literal)) (nesting_comment) (nesting_comment (nesting_comment) diff --git a/tree-sitter-tako/test/corpus/string_literals.tk b/tree-sitter-tako/test/corpus/string_literals.tk new file mode 100644 index 00000000..2b23a18f --- /dev/null +++ b/tree-sitter-tako/test/corpus/string_literals.tk @@ -0,0 +1,20 @@ +================== +Hello String literal +================== +"Hello" +--- +(source_file (string_literal)) + +================== +Hello World String literal +================== +"Hello World!" +--- +(source_file (string_literal)) + +================== +Single quoted Hello World String literal +================== +'Hello World!' +--- +(source_file (string_literal))