core ¶

Classes:

Lexer –
LexerError –
Token –
TokenKind –
TokenList –

Lexer ¶

Lexer()

Methods:

advance –
clean –
get_token –
lex –

Source code in src/arx/lexer/core.py

def __init__(self) -> None:
    """
    title: Initialize Lexer.
    """
    self.lex_loc = SourceLocation(0, 0)
    self.last_char = ""
    self.new_line = True
    self._keyword_map = copy.deepcopy(self._keyword_token_map)

advance ¶

advance() -> str

Source code in src/arx/lexer/core.py

def advance(self) -> str:
    """
    title: Advance the token from the buffer.
    returns:
      type: str
      description: TokenKind in integer form.
    """
    last_char = ArxIO.get_char()
    if last_char in ("\n", "\r"):
        self.lex_loc.line += 1
        self.lex_loc.col = 0
    else:
        self.lex_loc.col += 1
    return last_char

clean ¶

clean() -> None

Source code in src/arx/lexer/core.py

def clean(self) -> None:
    """
    title: Reset the Lexer attributes.
    """
    self.lex_loc = SourceLocation(0, 0)
    self.last_char = ""
    self.new_line = True

get_token ¶

get_token() -> Token

Source code in src/arx/lexer/core.py

def get_token(self) -> Token:
    """
    title: Get the next token.
    returns:
      type: Token
      description: The next token from standard input.
    """
    if self.last_char == "":
        self.new_line = True
        self.last_char = self.advance()

    indent = 0
    while self.last_char.isspace():
        if self.last_char == "\n":
            self.new_line = True
            indent = 0
        elif self.new_line:
            indent += 1

        self.last_char = self.advance()

    if indent:
        token = Token(
            kind=TokenKind.indent,
            value=indent,
            location=self.lex_loc,
        )
        self.new_line = False
        return token

    self.new_line = False

    if self.last_char.isalpha() or self.last_char == "_":
        identifier = self.last_char
        self.last_char = self.advance()

        while self.last_char.isalnum() or self.last_char == "_":
            identifier += self.last_char
            self.last_char = self.advance()

        if identifier in ("and", "or"):
            return Token(
                kind=TokenKind.operator,
                value=identifier,
                location=self.lex_loc,
            )

        if identifier in self._literal_keywords:
            value = self._literal_keywords[identifier]
            if isinstance(value, bool):
                return Token(
                    kind=TokenKind.bool_literal,
                    value=value,
                    location=self.lex_loc,
                )
            return Token(
                kind=TokenKind.none_literal,
                value=value,
                location=self.lex_loc,
            )

        if identifier in self._keyword_map:
            return Token(
                kind=self._keyword_map[identifier],
                value=identifier,
                location=self.lex_loc,
            )

        return Token(
            kind=TokenKind.identifier,
            value=identifier,
            location=self.lex_loc,
        )

    if self.last_char.isdigit() or self.last_char == ".":
        num_str = ""
        dot_count = 0

        if self.last_char == ".":
            next_char = self.advance()
            if not next_char.isdigit():
                self.last_char = next_char
                return Token(
                    kind=TokenKind.operator,
                    value=".",
                    location=self.lex_loc,
                )
            num_str = "."
            dot_count = 1
            self.last_char = next_char

        while self.last_char.isdigit() or self.last_char == ".":
            if self.last_char == ".":
                dot_count += 1
                if dot_count > 1:
                    raise LexerError(
                        "Invalid number format: multiple decimal points",
                        self.lex_loc,
                    )
            num_str += self.last_char
            self.last_char = self.advance()

        if dot_count == 0:
            return Token(
                kind=TokenKind.int_literal,
                value=int(num_str),
                location=self.lex_loc,
            )

        return Token(
            kind=TokenKind.float_literal,
            value=float(num_str),
            location=self.lex_loc,
        )

    if self.last_char in ('"', "'"):
        return self._parse_quoted_literal()

    if self.last_char == "`":
        return self._parse_docstring()

    if self.last_char in self._line_comment_delims:
        while self.last_char not in (EOF, "\n", "\r"):
            self.last_char = self.advance()
        if self.last_char != EOF:
            return self.get_token()

    if self.last_char in ("=", "!", "<", ">", "-", "&", "|", "+"):
        return self._parse_operator()

    if self.last_char:
        this_char = self.last_char
        self.last_char = self.advance()
        return Token(
            kind=TokenKind.operator,
            value=this_char,
            location=self.lex_loc,
        )

    return Token(kind=TokenKind.eof, value="", location=self.lex_loc)

lex ¶

lex() -> TokenList

Source code in src/arx/lexer/core.py

def lex(self) -> TokenList:
    """
    title: Create a list of tokens from input source.
    returns:
      type: TokenList
    """
    self.clean()
    cur_tok = Token(kind=TokenKind.not_initialized, value="")
    tokens: list[Token] = []

    while cur_tok.kind != TokenKind.eof:
        cur_tok = self.get_token()
        tokens.append(cur_tok)

    return TokenList(tokens)

LexerError ¶

LexerError(message: str, location: SourceLocation)

Bases: Exception

Source code in src/arx/lexer/core.py

def __init__(self, message: str, location: SourceLocation):
    """
    title: Initialize LexerError.
    parameters:
      message:
        type: str
      location:
        type: SourceLocation
    """
    super().__init__(
        f"{message} at line {location.line}, col {location.col}"
    )
    self.location = location

Token `dataclass` ¶

Token(
    kind: TokenKind,
    value: Any,
    location: SourceLocation = SourceLocation(0, 0),
)

Methods:

get_display_value –
get_name –

Source code in src/arx/lexer/core.py

def __init__(
    self,
    kind: TokenKind,
    value: Any,
    location: SourceLocation = SourceLocation(0, 0),
) -> None:
    """
    title: Initialize Token.
    parameters:
      kind:
        type: TokenKind
      value:
        type: Any
      location:
        type: SourceLocation
    """
    self.kind = kind
    self.value = value
    self.location = copy.deepcopy(location)

get_display_value ¶

get_display_value() -> str

Source code in src/arx/lexer/core.py

def get_display_value(self) -> str:
    """
    title: Return the string representation of a token value.
    returns:
      type: str
      description: The string representation of the token value.
    """
    if self.kind == TokenKind.identifier:
        return "(" + str(self.value) + ")"
    if self.kind == TokenKind.indent:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.float_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.int_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.string_literal:
        return "(...)"
    elif self.kind == TokenKind.char_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.bool_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.none_literal:
        return ""
    elif self.kind == TokenKind.docstring:
        return "(...)"
    return ""

get_name ¶

get_name() -> str

Source code in src/arx/lexer/core.py

def get_name(self) -> str:
    """
    title: Get the name of the specified token.
    returns:
      type: str
      description: Name of the token.
    """
    return MAP_KW_TOKEN_TO_NAME.get(self.kind, str(self.value))

TokenKind ¶

Bases: Enum

TokenList ¶

TokenList(tokens: list[Token])

Methods:

get_next_token –
get_token –

Source code in src/arx/lexer/core.py

def __init__(self, tokens: list[Token]) -> None:
    """
    title: Instantiate a TokenList object.
    parameters:
      tokens:
        type: list[Token]
    """
    self.tokens = tokens
    self.position = 0
    self.cur_tok: Token = Token(kind=TokenKind.not_initialized, value="")

get_next_token ¶

get_next_token() -> Token

Source code in src/arx/lexer/core.py

def get_next_token(self) -> Token:
    """
    title: Provide a simple token buffer.
    returns:
      type: Token
      description: >-
        The current token the parser is looking at. Reads another token
        from the lexer and updates cur_tok with its results.
    """
    self.cur_tok = self.get_token()
    return self.cur_tok

get_token ¶

get_token() -> Token

Source code in src/arx/lexer/core.py

def get_token(self) -> Token:
    """
    title: Get the next token.
    returns:
      type: Token
      description: The next token from standard input.
    """
    tok = self.tokens[self.position]
    self.position += 1
    return tok

core ¶

Lexer ¶

advance ¶

clean ¶

get_token ¶

lex ¶

LexerError ¶

Token dataclass ¶

get_display_value ¶

get_name ¶

TokenKind ¶

TokenList ¶

get_next_token ¶

get_token ¶

Token `dataclass` ¶