diff --git a/takolib/src/ast/mod.rs b/takolib/src/ast/mod.rs index db681b67..4f106b5c 100644 --- a/takolib/src/ast/mod.rs +++ b/takolib/src/ast/mod.rs @@ -5,6 +5,7 @@ pub mod location; mod pretty_printer; pub mod string_interner; +use tree_sitter::Language; use crate::parser::semantics::Literal; use crate::parser::tokens::Symbol; use entity_component_slab::{make_component, make_world}; @@ -15,6 +16,8 @@ use smallvec::{smallvec, SmallVec}; use std::path::PathBuf; use string_interner::{Identifier, StringInterner}; +type TsNodeId = u16; + #[derive(Clone, Default, Debug, Hash, PartialEq, Eq)] pub struct Ast { // TODO(usability): Add a range tree for mapping from locations to nodes. @@ -35,13 +38,72 @@ pub struct Ast { pub atoms: ChildSlab, pub string_interner: StringInterner, + + // TS Node Ids + // TODO: generated from + // ../../../tree_sitter_tako/src/node-types.json + pub add_node_id: TsNodeId, + pub and_node_id: TsNodeId, + pub assign_node_id: TsNodeId, + pub binding_node_id: TsNodeId, + pub bit_and_node_id: TsNodeId, + pub bit_not_node_id: TsNodeId, + pub bit_or_node_id: TsNodeId, + pub bit_xor_node_id: TsNodeId, + pub block_node_id: TsNodeId, + pub call_node_id: TsNodeId, + pub color_node_id: TsNodeId, + pub container_node_id: TsNodeId, + pub div_node_id: TsNodeId, + pub equals_node_id: TsNodeId, + pub exp_node_id: TsNodeId, + pub field_node_id: TsNodeId, + pub format_expression_node_id: TsNodeId, + pub greater_than_node_id: TsNodeId, + pub greater_than_equals_node_id: TsNodeId, + pub has_type_node_id: TsNodeId, + pub hex_literal_node_id: TsNodeId, + pub index_node_id: TsNodeId, + pub left_shift_node_id: TsNodeId, + pub less_than_node_id: TsNodeId, + pub less_than_equals_node_id: TsNodeId, + pub mod_node_id: TsNodeId, + pub mul_node_id: TsNodeId, + pub neg_node_id: TsNodeId, + pub nesting_comment_node_id: TsNodeId, + pub not_node_id: TsNodeId, + pub not_equals_node_id: TsNodeId, + pub or_node_id: TsNodeId, + pub parens_node_id: TsNodeId, + pub range_node_id: TsNodeId, + pub right_shift_node_id: TsNodeId, + pub sequence_node_id: TsNodeId, + pub set_node_id: TsNodeId, + pub shebang_node_id: TsNodeId, + pub single_line_comment_node_id: TsNodeId, + pub source_file_node_id: TsNodeId, + pub spread_node_id: TsNodeId, + pub string_literal_node_id: TsNodeId, + pub sub_node_id: TsNodeId, + pub try_node_id: TsNodeId, + pub escape_sequence_node_id: TsNodeId, + pub exists_node_id: TsNodeId, + pub float_literal_node_id: TsNodeId, + pub forall_node_id: TsNodeId, + pub given_node_id: TsNodeId, + pub heading_node_id: TsNodeId, + pub ident_node_id: TsNodeId, + pub int_literal_node_id: TsNodeId, } impl Ast { #[must_use] pub fn new(filepath: PathBuf) -> Self { + let tako_lang: &Language = &tree_sitter_tako::LANGUAGE.into(); + let int_literal_node_id = tako_lang.id_for_node_kind("int_literal", /*named*/true); Self { filepath, + int_literal_node_id, ..Self::default() } } diff --git a/takolib/src/parser/mod.rs b/takolib/src/parser/mod.rs index 3ae99707..235276c9 100644 --- a/takolib/src/parser/mod.rs +++ b/takolib/src/parser/mod.rs @@ -1,6 +1,8 @@ use std::path::Path; use log::error; +use smallvec::{SmallVec, smallvec}; +use tree_sitter::{Language, TreeCursor, Node as TreeNode}; use tree_sitter::{Tree, Parser as TSParser}; use tokens::{Symbol, Token}; @@ -264,7 +266,7 @@ pub mod tokens { Self::OpenParen => "(", Self::CloseParen => ")", Self::OpenCurly => "{", - Self::CloseCurly => "}", + Self::CloseCurly => "}", Self::OpenBracket => "[", Self::CloseBracket => "]", } @@ -286,14 +288,52 @@ pub mod tokens { } } +fn handle_subtree<'a>(curr: &mut TreeCursor<'a>, ts_node: TreeNode<'a>, file: &Path, input: &str, ast: &mut Ast) -> Result, TError> { + // TODO: Check that this is large enough but not too large + let mut children: SmallVec:: = smallvec![]; + let mut children_walker = ts_node.walk(); + for ts_child in ts_node.children(&mut children_walker) { + if !ts_child.is_named() { + // BIG assumption being made here... + continue; + } + let child = handle_subtree(curr, ts_child, file, input, ast)?; + + // TODO: Check that this subtree is allowed. + + if let Some(child) = child { + children.push(child); + } + } + // TODO: Handle merging + // TODO: Handle constructing this kind of node from it's children + + if ts_node.kind_id() == ast.int_literal_node_id { + println!("INT_LITERAL"); + } + let info = ( + ts_node.id(), + ts_node.kind_id(), + ts_node.kind(), + ts_node.is_missing(), + ts_node.is_extra(), + ts_node.is_error(), + ts_node.is_named(), + ); + println!("{:?} {:?} FROM {}", info, ts_node, ts_node.utf8_text(input.as_bytes()).unwrap()); + // TODO: return the ID + Ok(None) +} + pub fn parse(file: &Path, input: &str, _tokens: &[Token]) -> Result { let mut ast = Ast::new(file.to_path_buf()); // TODO: Put parser in a state to get caching // TODO: Set logger. + let tako_lang: &Language = &tree_sitter_tako::LANGUAGE.into(); let mut parser = TSParser::new(); parser - .set_language(&tree_sitter_tako::LANGUAGE.into()) + .set_language(tako_lang) .expect("Error loading Tako parser"); let old_tree: Option<&Tree> = None; @@ -303,7 +343,16 @@ pub fn parse(file: &Path, input: &str, _tokens: &[Token]) -> Result }; // TODO: Handle errors println!("Result: {:?}", res); - // ast.roots.push(res); + + let mut ts_curr = res.walk(); + + let ts_root = ts_curr.node(); + let Some(root) = handle_subtree(&mut ts_curr, ts_root, file, input, &mut ast)? else { + todo!("Handle file with no root!?") + }; + + // Done converting to ast + ast.roots.push(root); Ok(ast) }