lexer ¶

Module for handling the lexer analysis.

Classes:

Lexer –

Lexer class for tokenizing known variables.
Token –

Token class store the kind and the value of the token.
TokenKind –

TokenKind enumeration for known variables returned by the lexer.
TokenList –

Class for handle a List of tokens.

Lexer ¶

Lexer()

Lexer class for tokenizing known variables.

Attributes:

cur_loc (SourceLocation) –

Current source location.
lex_loc (SourceLocation) –

Source location for lexer.

Methods:

advance –

Advance the token from the buffer.
clean –

Reset the Lexer attributes.
get_token –

Get the next token.
lex –

Create a list of tokens from input source.

Source code in src/arx/lexer.py

def __init__(self) -> None:
    # self.cur_loc: SourceLocation = SourceLocation(0, 0)
    self.lex_loc: SourceLocation = SourceLocation(0, 0)
    self.last_char: str = ""
    self.new_line: bool = True

    self._keyword_map: Dict[str, TokenKind] = copy.deepcopy(
        self._keyword_map
    )

advance ¶

advance() -> str

Advance the token from the buffer.

Returns:

int –

TokenKind in integer form.

Source code in src/arx/lexer.py

def advance(self) -> str:
    """
    Advance the token from the buffer.

    Returns
    -------
    int
        TokenKind in integer form.
    """
    last_char = ArxIO.get_char()

    if last_char in ("\n", "\r"):
        self.lex_loc.line += 1
        self.lex_loc.col = 0
    else:
        self.lex_loc.col += 1

    return last_char

clean ¶

clean() -> None

Reset the Lexer attributes.

Source code in src/arx/lexer.py

def clean(self) -> None:
    """Reset the Lexer attributes."""
    # self.cur_loc = SourceLocation(0, 0)
    self.lex_loc = SourceLocation(0, 0)
    self.last_char = ""
    self.new_line = True

get_token ¶

get_token() -> Token

Get the next token.

Returns:

int –

The next token from standard input.

Source code in src/arx/lexer.py

def get_token(self) -> Token:
    """
    Get the next token.

    Returns
    -------
    int
        The next token from standard input.
    """
    if self.last_char == "":
        self.new_line = True
        self.last_char = self.advance()

    # Skip any whitespace.
    indent = 0
    while self.last_char.isspace():
        if self.new_line:
            indent += 1

        if self.last_char == "\n":
            # note: if it is an empty line it is not necessary to keep
            #       the record about the indentation
            self.new_line = True
            indent = 0

        self.last_char = self.advance()

    self.new_line = False

    if indent:
        return Token(
            kind=TokenKind.indent, value=indent, location=self.lex_loc
        )

    # self.cur_loc = self.lex_loc

    if self.last_char.isalpha() or self.last_char == "_":
        # Identifier
        identifier = self.last_char
        self.last_char = self.advance()

        while self.last_char.isalnum() or self.last_char == "_":
            identifier += self.last_char
            self.last_char = self.advance()

        if identifier in self._keyword_map:
            return Token(
                kind=self._keyword_map[identifier],
                value=identifier,
                location=self.lex_loc,
            )

        return Token(
            kind=TokenKind.identifier,
            value=identifier,
            location=self.lex_loc,
        )

    # Number: [0-9.]+
    if self.last_char.isdigit() or self.last_char == ".":
        num_str = ""
        while self.last_char.isdigit() or self.last_char == ".":
            num_str += self.last_char
            self.last_char = self.advance()

        return Token(
            kind=TokenKind.float_literal,
            value=float(num_str),
            location=self.lex_loc,
        )

    # Comment until end of line.
    if self.last_char == "#":
        while self.last_char not in (EOF, "\n", "\r"):
            self.last_char = self.advance()

        if self.last_char != EOF:
            return self.get_token()

    # Check for end of file. Don't eat the EOF.
    if self.last_char:
        this_char = self.last_char
        self.last_char = self.advance()
        return Token(
            kind=TokenKind.operator, value=this_char, location=self.lex_loc
        )
    return Token(kind=TokenKind.eof, value="", location=self.lex_loc)

lex ¶

lex() -> TokenList

Create a list of tokens from input source.

Source code in src/arx/lexer.py

def lex(self) -> TokenList:
    """Create a list of tokens from input source."""
    self.clean()
    cur_tok = Token(kind=TokenKind.not_initialized, value="")
    tokens: List[Token] = []
    while cur_tok.kind != TokenKind.eof:
        cur_tok = self.get_token()
        tokens.append(cur_tok)
    return TokenList(tokens)

Token `dataclass` ¶

Token(
    kind: TokenKind,
    value: Any,
    location: SourceLocation = SourceLocation(0, 0),
)

Token class store the kind and the value of the token.

Methods:

get_display_value –

Return the string representation of a token value.
get_name –

Get the name of the specified token.

Source code in src/arx/lexer.py

def __init__(
    self,
    kind: TokenKind,
    value: Any,
    location: SourceLocation = SourceLocation(0, 0),
) -> None:
    self.kind = kind
    self.value = value
    self.location = copy.deepcopy(location)

get_display_value ¶

get_display_value() -> str

Return the string representation of a token value.

Returns:

str: The string representation of the token value. –

Source code in src/arx/lexer.py

def get_display_value(self) -> str:
    """
    Return the string representation of a token value.

    Returns
    -------
        str: The string representation of the token value.
    """
    if self.kind == TokenKind.identifier:
        return "(" + str(self.value) + ")"
    if self.kind == TokenKind.indent:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.float_literal:
        return "(" + str(self.value) + ")"
    return ""

get_name ¶

get_name() -> str

Get the name of the specified token.

Parameters:

tok (int) –

TokenKind value.

Returns:

str –

Name of the token.

Source code in src/arx/lexer.py

def get_name(self) -> str:
    """
    Get the name of the specified token.

    Parameters
    ----------
    tok : int
        TokenKind value.

    Returns
    -------
    str
        Name of the token.
    """
    return MAP_KW_TOKEN_TO_NAME.get(self.kind, str(self.value))

TokenKind ¶

Bases: Enum

TokenKind enumeration for known variables returned by the lexer.

TokenList ¶

TokenList(tokens: List[Token])

Class for handle a List of tokens.

Methods:

get_next_token –

Provide a simple token buffer.
get_token –

Get the next token.

Source code in src/arx/lexer.py

def __init__(self, tokens: List[Token]) -> None:
    """Instantiate a TokenList object."""
    self.tokens = tokens
    self.position = 0
    self.cur_tok: Token = Token(kind=TokenKind.not_initialized, value="")

get_next_token ¶

get_next_token() -> Token

Provide a simple token buffer.

Returns:

int –

The current token the parser is looking at. Reads another token from the lexer and updates cur_tok with its results.

Source code in src/arx/lexer.py

def get_next_token(self) -> Token:
    """
    Provide a simple token buffer.

    Returns
    -------
    int
        The current token the parser is looking at.
        Reads another token from the lexer and updates
        cur_tok with its results.
    """
    self.cur_tok = self.get_token()
    return self.cur_tok

get_token ¶

get_token() -> Token

Get the next token.

Returns:

int –

The next token from standard input.

Source code in src/arx/lexer.py

def get_token(self) -> Token:
    """
    Get the next token.

    Returns
    -------
    int
        The next token from standard input.
    """
    tok = self.tokens[self.position]
    self.position += 1
    return tok

lexer ¶

Lexer ¶

advance ¶

clean ¶

get_token ¶

lex ¶

Token dataclass ¶

get_display_value ¶

get_name ¶

TokenKind ¶

TokenList ¶

get_next_token ¶

get_token ¶

Token `dataclass` ¶