В данный момент я работаю над переназначением переменных и постоянно получаю эту ошибку;
Код: Выделить всё
Parsing expression, current token: Token[TokenType.INT : 4 : Line 2 : Position 33]
Left expression:
Parsing expression, current token: Token[TokenType.EQ : = : Line 4 : Position 42]
Parsing expression, current token: Token[TokenType.ASTERISK : * : Line 4 : Position 46]
Parsing expression, current token: Token[TokenType.INT : 2 : Line 4 : Position 49]
Left expression:
Parsing expression, current token: Token[TokenType.IDENT : a : Line 6 : Position 64]
Left expression:
No prefix parse function found for token 'TokenType.EQ'
No prefix parse function found for token 'TokenType.ASTERISK'
AST.py:
Код: Выделить всё
from abc import ABC, abstractmethod
from enum import Enum
class NodeType(Enum):
Program = "Program"
#Statements
ExpressionStatement = "ExpressionStatement"
VariableStatement = "VariableStatement"
FunctionStatement = "FunctionStatement"
BlockStatement = "BlockStatement"
ReturnStatement = "ReturnStatement"
AssignStatement = "AssignStatement"
#Expression
InfixExpression = "InfixExpression"
#Literals
IntegerLiteral = "IntegerLiteral"
FloatLiteral = "FloatLiteral"
IdentifierLiteral = "IdentifierLiteral"
class Node(ABC):
@abstractmethod
def type(self) -> NodeType:
pass
@abstractmethod
def json(self) -> dict:
pass
class Statement(Node):
pass
class Expression(Node):
pass
class Program(Node):
def __init__(self) -> None:
self.statements: list[Statement] = []
def type(self) -> NodeType:
return NodeType.Program
def json(self) -> dict:
return {
"type": self.type().value,
"statements": [{stmt.type().value: stmt.json()} for stmt in self.statements]
}
#Region statements
class ExpressionStatement(Statement):
def __init__(self, expr: Expression = None) -> None:
self.expr: Expression = expr
def type(self) -> NodeType:
return NodeType.ExpressionStatement
def json(self) -> dict:
return {
"type": self.type().value,
"expr": self.expr.json() if self.expr is not None else None
}
class VariableStatement(Statement):
def __init__(self, name: Expression = None, value: Expression = None, value_type: str = None) -> None:
self.name = name
self.value = value
self.value_type = value_type
def type(self) -> NodeType:
return NodeType.VariableStatement
def json(self) -> dict:
return {
"type": self.type().value,
"name": self.name.json() if self.name is not None else None,
"value": self.value.json() if self.value is not None else None,
"value_type": self.value_type
}
class BlockStatement(Statement):
def __init__(self, statements: list[Statement] = None) -> None:
self.statements = statements if statements is not None else []
def type(self) -> NodeType:
return NodeType.BlockStatement
def json(self) -> dict:
return {
"type": self.type().value,
"statements": [stmt.json() for stmt in self.statements]
}
class ReturnStatement(Statement):
def __init__(self, return_value: Expression = None) -> None:
self.return_value = return_value
def type(self) -> NodeType:
return NodeType.ReturnStatement
def json(self) -> dict:
return {
"type": self.type().value,
"return_value": self.return_value.json()
}
class FunctionStatement(Statement):
def __init__(self, parameters: list = [], body: BlockStatement = None, name = None, return_type: str = None) -> None:
self.parameters = parameters
self.body = body
self.name = name
self.return_type = return_type
def type(self) -> NodeType:
return NodeType.FunctionStatement
def json(self) -> dict:
return {
"type": self.type().value,
"name": self.name.json() if self.name is not None else None,
"return_type": self.return_type,
"parameters": [p.json() for p in self.parameters],
"body": self.body.json() if self.body is not None else None
}
class AssignStatement(Statement):
def __init__(self, ident: Expression = None, right_value: Expression = None) -> None:
self.ident = ident
self.right_value = right_value
def type(self) -> NodeType:
return NodeType.AssignStatement
def json(self) -> dict:
return {
"type": self.type().value,
"ident": self.ident.json(),
"right_value": self.right_value.json()
}
class InfixExpression(Expression):
def __init__(self, left_node: Expression, operator: str, right_node: Expression = None) -> None:
self.left_node: Expression = left_node
self.operator: str = operator
self.right_node: Expression = right_node
def type(self) -> NodeType:
return NodeType.InfixExpression
def json(self) -> dict:
return {
"type": self.type().value,
"left_node": self.left_node.json(),
"operator": self.operator,
"right_node": self.right_node.json()
}
#Region Literals
class IntegerLiteral(Expression):
def __init__(self, value: int = None) -> None:
self.value: int = value
def type(self) -> NodeType:
return NodeType.IntegerLiteral
def json(self) -> dict:
return {
"type": self.type().value,
"value": self.value
}
class FloatLiteral(Expression):
def __init__(self, value: float = None) -> None:
self.value: float = value
def type(self) -> NodeType:
return NodeType.FloatLiteral
def json(self) -> dict:
return {
"type": self.type().value,
"value": self.value
}
class IdentifierLiteral(Expression):
def __init__(self, value: str = None) -> None:
self.value: str = value
def type(self) -> NodeType:
return NodeType.IdentifierLiteral
def json(self) -> dict:
return {
"type": self.type().value,
"value": self.value,
}
Код: Выделить всё
from Lexer import Lexer
from Token import Token, TokenType
from typing import Callable
from enum import Enum, auto
from AST import Statement, Expression, Program
from AST import ExpressionStatement, VariableStatement, FunctionStatement, ReturnStatement, BlockStatement, AssignStatement
from AST import InfixExpression
from AST import IntegerLiteral, FloatLiteral, IdentifierLiteral
#Precedence Types
class PrecedenceType(Enum):
P_LOWEST = 0
P_EQUALS = auto()
P_LESSGREATER = auto()
P_SUM = auto()
P_PRODUCT = auto()
P_EXPONENT = auto()
P_PREFIX = auto()
P_CALL = auto()
P_INDEX = auto()
#Precedence Mapping
PRECEDENCES: dict[TokenType, PrecedenceType] = {
TokenType.PLUS: PrecedenceType.P_SUM,
TokenType.MINUS: PrecedenceType.P_SUM,
TokenType.SLASH: PrecedenceType.P_PRODUCT,
TokenType.ASTERISK: PrecedenceType.P_PRODUCT,
TokenType.MODULUS: PrecedenceType.P_PRODUCT,
TokenType.POW: PrecedenceType.P_EXPONENT,
}
class Parser:
def __init__(self, lexer: Lexer) -> None:
self.lexer = lexer
self.errors: list[str] = []
self.current_token: Token = None
self.peek_token: Token = None
self.prefix_parse_fns: dict[TokenType, Callable] = {
TokenType.IDENT: self.__parse_identifier,
TokenType.INT: self.__parse_int_literal,
TokenType.FLOAT: self.__parse_float_literal,
TokenType.LPAREN: self.__parse_grouped_expression,
}
self.infix_parse_fns: dict[TokenType, Callable] = {
TokenType.PLUS: self.__parse_infix_expression,
TokenType.MINUS: self.__parse_infix_expression,
TokenType.SLASH: self.__parse_infix_expression,
TokenType.ASTERISK: self.__parse_infix_expression,
TokenType.POW: self.__parse_infix_expression,
TokenType.MODULUS: self.__parse_infix_expression,
}
self.__next_token()
self.__next_token()
#Region parser helpers
def __next_token(self) -> None:
self.current_token = self.peek_token
self.peek_token = self.lexer.next_token()
def __current_token_is(self, tt: TokenType) -> bool:
return self.current_token.type == tt
def __peek_token_is(self, tt: TokenType) -> bool:
return self.peek_token == tt
def __peek_token(self, tt: TokenType) -> bool:
return self.peek_token.type == tt
def __expect_peek(self, tt: TokenType) -> bool:
if self.__peek_token(tt):
self.__next_token()
return True
else:
self.__peek_token(tt)
return False
def __current_precedence(self) -> PrecedenceType:
prec: int | None = PRECEDENCES.get(self.current_token.type)
if prec is None:
return PrecedenceType.P_LOWEST
return prec
def __peek_precedence(self) -> PrecedenceType:
prec: int | None = PRECEDENCES.get(self.peek_token.type)
if prec is None:
return PrecedenceType.P_LOWEST
return prec
def __peek_error(self, tt: TokenType) -> None:
self.errors.append(f"Unexpected token '{tt}', got {self.peek_token} instead.")
def __no_prefix_parse_fn_error(self, tt: TokenType):
self.errors.append(f"No prefix parse function found for token '{tt}'")
def parse_program(self) -> Program:
program = Program()
while self.current_token.type != TokenType.EOF:
stmt = self.__parse_statement()
if stmt is not None:
program.statements.append(stmt)
self.__next_token()
if self.current_token.type == TokenType.SEMICOLON:
self.__next_token()
return program
def __parse_statement(self) -> Statement:
if self.current_token.type == TokenType.IDENT and self.__peek_token_is(TokenType.EQ):
return self.__parse_assign_statement()
match self.current_token.type:
case TokenType.IDENT:
return self.__parse_variable_statement()
case TokenType.DEF:
return self.__parse_function_statement()
case TokenType.RETURN:
return self.__parse_return_statement()
case _:
return self.__parse_expression_statement()
def __parse_expression_statement(self) -> ExpressionStatement:
expr = self.__parse_expression(PrecedenceType.P_LOWEST)
if self.__peek_token_is(TokenType.SEMICOLON):
self.__next_token()
stmt: ExpressionStatement = ExpressionStatement(expr=expr)
return stmt
def __parse_variable_statement(self) -> VariableStatement:
# a: int = 10;
stmt: VariableStatement = VariableStatement()
if not self.__current_token_is(TokenType.IDENT):
return None
stmt.name = IdentifierLiteral(value=self.current_token.literal)
if not self.__expect_peek(TokenType.COLON):
return None
if not self.__expect_peek(TokenType.TYPE):
return None
stmt.value_type = self.current_token.literal
if not self.__expect_peek(TokenType.EQ):
return None
self.__next_token()
stmt.value = self.__parse_expression(PrecedenceType.P_LOWEST)
while not self.__current_token_is(TokenType.SEMICOLON) and self.__current_token_is(TokenType.EOF):
self.__next_token()
return stmt
def __parse_function_statement(self) -> FunctionStatement:
stmt: FunctionStatement = FunctionStatement()
# def main() -> int { return 10; }
if not self.__expect_peek(TokenType.IDENT):
return None
stmt.name = IdentifierLiteral(value=self.current_token.literal)
if not self.__expect_peek(TokenType.LPAREN):
return None
stmt.parameters = [] # TODO
if not self.__expect_peek(TokenType.RPAREN):
return None
if not self.__expect_peek(TokenType.ARROW):
return None
if not self.__expect_peek(TokenType.TYPE):
return None
stmt.return_type = self.current_token.literal
if not self.__expect_peek(TokenType.COLON):
return None
stmt.body = self.__parse_block_statement()
return stmt
def __parse_return_statement(self) -> ReturnStatement:
stmt: Statement = ReturnStatement()
self.__next_token()
stmt.return_value = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.SEMICOLON):
return None
return stmt
def __parse_block_statement(self) -> BlockStatement:
block_stmt: BlockStatement = BlockStatement()
self.__next_token()
while not self.__current_token_is(TokenType.EOF):
if self.current_token.type == TokenType.SEMICOLON:
self.__next_token()
stmt: Statement = self.__parse_statement()
if stmt is not None:
block_stmt.statements.append(stmt)
self.__next_token()
return block_stmt
def __parse_assign_statement(self) -> AssignStatement:
stmt: AssignStatement = AssignStatement()
stmt.ident = IdentifierLiteral(value=self.current_token.literal)
if not self.__expect_peek(TokenType.EQ):
return None
self.__next_token()
stmt.right_value = self.__parse_expression(PrecedenceType.P_LOWEST)
if self.__peek_token_is(TokenType.SEMICOLON):
self.__next_token()
return stmt
def __parse_expression(self, precedence: PrecedenceType) -> Expression:
print(f"Parsing expression, current token: {self.current_token}")
# Use prefix function only for valid prefix tokens
prefix_fn = self.prefix_parse_fns.get(self.current_token.type)
if prefix_fn is None:
self.__no_prefix_parse_fn_error(self.current_token.type)
return None
left_expr = prefix_fn()
print(f"Left expression: {left_expr}")
# Ensure we proceed only if the next token has higher precedence
while not self.__peek_token_is(TokenType.SEMICOLON) and precedence.value < self.__peek_precedence().value:
infix_fn = self.infix_parse_fns.get(self.peek_token.type)
if infix_fn is None:
return left_expr
# Move to the infix operator and parse accordingly
self.__next_token()
left_expr = infix_fn(left_expr)
print(f"Updated left expression: {left_expr}")
return left_expr
def __parse_infix_expression(self, left_node: Expression) -> Expression:
infix_expr: InfixExpression = InfixExpression(left_node=left_node, operator=self.current_token.literal)
precedence = self.__current_precedence()
self.__next_token()
infix_expr.right_node = self.__parse_expression(precedence)
return infix_expr
def __parse_grouped_expression(self) -> Expression:
self.__next_token()
expr: Expression = self.__parse_expression(PrecedenceType.P_LOWEST)
if not self.__expect_peek(TokenType.RPAREN):
return None
return expr
#Region Prefix Methods
def __parse_identifier(self) -> IdentifierLiteral:
return IdentifierLiteral(value=self.current_token.literal)
def __parse_int_literal(self) -> Expression:
"""Parses an IntegerLiteral node from the current token"""
int_lit: IntegerLiteral = IntegerLiteral()
try:
int_lit.value = int(self.current_token.literal)
except:
self.errors.append(f"Unable to parse '{self.current_token.literal}' as an integer.")
return None
return int_lit
def __parse_float_literal(self) -> Expression:
"""Parses a FloatLiteral node from the current token"""
float_lit: FloatLiteral = FloatLiteral()
try:
float_lit.value = float(self.current_token.literal)
except:
self.errors.append(f"Unable to parse '{self.current_token.literal}' as a float.")
return None
return float_lit
Код: Выделить всё
from Lexer import Lexer
from Parser import Parser
from Compiler import Compiler
from AST import Program
import json
import time
from llvmlite import ir
import llvmlite.binding as llvm
from ctypes import CFUNCTYPE, c_int, c_float
LEXER_DEBUG: bool = False
PARSER_DEBUG: bool = True
COMPILER_DEBUG: bool = False
RUN_CODE: bool = False
if __name__ == '__main__':
with open("tests/test.py", "r") as f:
code: str = f.read()
if LEXER_DEBUG:
print("===== LEXER DEBUG =====")
debug_lex: Lexer = Lexer(source=code)
while debug_lex.current_char is not None:
print(debug_lex.next_token())
l: Lexer = Lexer(source=code)
p: Parser = Parser(l)
program: Program = p.parse_program()
if len(p.errors) > 0:
for err in p.errors:
print(err)
exit(1)
if PARSER_DEBUG:
print("===== PARSER_DEBUG =====")
with open("debug/ast.json", "w") as f:
json.dump(program.json(), f, indent=4)
print("Wrote AST to debug/ast.json successfully")
c: Compiler = Compiler()
c.compile(node=program)
#Output steps
module: ir.Module = c.module
module.triple = llvm.get_default_triple()
if COMPILER_DEBUG:
with open("debug/ir.ll", "w") as f:
f.write(str(module))
if RUN_CODE:
llvm.initialize()
llvm.initialize_native_target()
llvm.initialize_native_asmparser()
llvm.initialize_native_asmprinter()
try:
llvm_ir_parsed = llvm.parse_assembly(str(module))
llvm_ir_parsed.verify()
except Exception as e:
print(e)
raise
target_machine = llvm.Target.from_default_triple().create_target_machine()
engine = llvm.create_mcjit_compiler(llvm_ir_parsed, target_machine)
engine.finalize_object()
entry = engine.get_function_address('main')
cfunc = CFUNCTYPE(c_int)(entry)
# Compile time
st = time.time() #start time
result = cfunc()
et = time.time() # end time
print(f'\n\nProgram returned: {result}\n==== Executed in {round((et - st) * 1000, 3)}ms ====')
Код: Выделить всё
def main() -> int:
a: int = 4;
a = a * 2;
return a;
Похоже, что текущий токен начинается со второй строки в test.py, а не со четвертая строка (a = a + 2), с которой она должна начинаться.
Подробнее здесь: https://stackoverflow.com/questions/791 ... t-properly