Skip to content

core

Classes:

Lexer

Lexer()

Methods:

Source code in src/arx/lexer/core.py
370
371
372
373
374
375
376
377
def __init__(self) -> None:
    """
    title: Initialize Lexer.
    """
    self.lex_loc = SourceLocation(0, 0)
    self.last_char = ""
    self.new_line = True
    self._keyword_map = copy.deepcopy(self._keyword_token_map)

advance

advance() -> str
Source code in src/arx/lexer/core.py
629
630
631
632
633
634
635
636
637
638
639
640
641
642
def advance(self) -> str:
    """
    title: Advance the token from the buffer.
    returns:
      type: str
      description: TokenKind in integer form.
    """
    last_char = ArxIO.get_char()
    if last_char in ("\n", "\r"):
        self.lex_loc.line += 1
        self.lex_loc.col = 0
    else:
        self.lex_loc.col += 1
    return last_char

clean

clean() -> None
Source code in src/arx/lexer/core.py
379
380
381
382
383
384
385
def clean(self) -> None:
    """
    title: Reset the Lexer attributes.
    """
    self.lex_loc = SourceLocation(0, 0)
    self.last_char = ""
    self.new_line = True

get_token

get_token() -> Token
Source code in src/arx/lexer/core.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
def get_token(self) -> Token:
    """
    title: Get the next token.
    returns:
      type: Token
      description: The next token from standard input.
    """
    if self.last_char == "":
        self.new_line = True
        self.last_char = self.advance()

    indent = 0
    while self.last_char.isspace():
        if self.last_char == "\n":
            self.new_line = True
            indent = 0
        elif self.new_line:
            indent += 1

        self.last_char = self.advance()

    if indent:
        token = Token(
            kind=TokenKind.indent,
            value=indent,
            location=self.lex_loc,
        )
        self.new_line = False
        return token

    self.new_line = False

    if self.last_char.isalpha() or self.last_char == "_":
        identifier = self.last_char
        self.last_char = self.advance()

        while self.last_char.isalnum() or self.last_char == "_":
            identifier += self.last_char
            self.last_char = self.advance()

        if identifier in ("and", "or"):
            return Token(
                kind=TokenKind.operator,
                value=identifier,
                location=self.lex_loc,
            )

        if identifier in self._literal_keywords:
            value = self._literal_keywords[identifier]
            if isinstance(value, bool):
                return Token(
                    kind=TokenKind.bool_literal,
                    value=value,
                    location=self.lex_loc,
                )
            return Token(
                kind=TokenKind.none_literal,
                value=value,
                location=self.lex_loc,
            )

        if identifier in self._keyword_map:
            return Token(
                kind=self._keyword_map[identifier],
                value=identifier,
                location=self.lex_loc,
            )

        return Token(
            kind=TokenKind.identifier,
            value=identifier,
            location=self.lex_loc,
        )

    if self.last_char.isdigit() or self.last_char == ".":
        num_str = ""
        dot_count = 0

        if self.last_char == ".":
            next_char = self.advance()
            if not next_char.isdigit():
                self.last_char = next_char
                return Token(
                    kind=TokenKind.operator,
                    value=".",
                    location=self.lex_loc,
                )
            num_str = "."
            dot_count = 1
            self.last_char = next_char

        while self.last_char.isdigit() or self.last_char == ".":
            if self.last_char == ".":
                dot_count += 1
                if dot_count > 1:
                    raise LexerError(
                        "Invalid number format: multiple decimal points",
                        self.lex_loc,
                    )
            num_str += self.last_char
            self.last_char = self.advance()

        if dot_count == 0:
            return Token(
                kind=TokenKind.int_literal,
                value=int(num_str),
                location=self.lex_loc,
            )

        return Token(
            kind=TokenKind.float_literal,
            value=float(num_str),
            location=self.lex_loc,
        )

    if self.last_char in ('"', "'"):
        return self._parse_quoted_literal()

    if self.last_char == "`":
        return self._parse_docstring()

    if self.last_char in self._line_comment_delims:
        while self.last_char not in (EOF, "\n", "\r"):
            self.last_char = self.advance()
        if self.last_char != EOF:
            return self.get_token()

    if self.last_char in ("=", "!", "<", ">", "-", "&", "|", "+"):
        return self._parse_operator()

    if self.last_char:
        this_char = self.last_char
        self.last_char = self.advance()
        return Token(
            kind=TokenKind.operator,
            value=this_char,
            location=self.lex_loc,
        )

    return Token(kind=TokenKind.eof, value="", location=self.lex_loc)

lex

lex() -> TokenList
Source code in src/arx/lexer/core.py
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
def lex(self) -> TokenList:
    """
    title: Create a list of tokens from input source.
    returns:
      type: TokenList
    """
    self.clean()
    cur_tok = Token(kind=TokenKind.not_initialized, value="")
    tokens: list[Token] = []

    while cur_tok.kind != TokenKind.eof:
        cur_tok = self.get_token()
        tokens.append(cur_tok)

    return TokenList(tokens)

LexerError

LexerError(message: str, location: SourceLocation)

Bases: Exception

Source code in src/arx/lexer/core.py
301
302
303
304
305
306
307
308
309
310
311
312
313
def __init__(self, message: str, location: SourceLocation):
    """
    title: Initialize LexerError.
    parameters:
      message:
        type: str
      location:
        type: SourceLocation
    """
    super().__init__(
        f"{message} at line {location.line}, col {location.col}"
    )
    self.location = location

Token dataclass

Token(
    kind: TokenKind,
    value: Any,
    location: SourceLocation = SourceLocation(0, 0),
)

Methods:

Source code in src/arx/lexer/core.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def __init__(
    self,
    kind: TokenKind,
    value: Any,
    location: SourceLocation = SourceLocation(0, 0),
) -> None:
    """
    title: Initialize Token.
    parameters:
      kind:
        type: TokenKind
      value:
        type: Any
      location:
        type: SourceLocation
    """
    self.kind = kind
    self.value = value
    self.location = copy.deepcopy(location)

get_display_value

get_display_value() -> str
Source code in src/arx/lexer/core.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def get_display_value(self) -> str:
    """
    title: Return the string representation of a token value.
    returns:
      type: str
      description: The string representation of the token value.
    """
    if self.kind == TokenKind.identifier:
        return "(" + str(self.value) + ")"
    if self.kind == TokenKind.indent:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.float_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.int_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.string_literal:
        return "(...)"
    elif self.kind == TokenKind.char_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.bool_literal:
        return "(" + str(self.value) + ")"
    elif self.kind == TokenKind.none_literal:
        return ""
    elif self.kind == TokenKind.docstring:
        return "(...)"
    return ""

get_name

get_name() -> str
Source code in src/arx/lexer/core.py
166
167
168
169
170
171
172
173
def get_name(self) -> str:
    """
    title: Get the name of the specified token.
    returns:
      type: str
      description: Name of the token.
    """
    return MAP_KW_TOKEN_TO_NAME.get(self.kind, str(self.value))

TokenKind

Bases: Enum

TokenList

TokenList(tokens: list[Token])

Methods:

Source code in src/arx/lexer/core.py
239
240
241
242
243
244
245
246
247
248
def __init__(self, tokens: list[Token]) -> None:
    """
    title: Instantiate a TokenList object.
    parameters:
      tokens:
        type: list[Token]
    """
    self.tokens = tokens
    self.position = 0
    self.cur_tok: Token = Token(kind=TokenKind.not_initialized, value="")

get_next_token

get_next_token() -> Token
Source code in src/arx/lexer/core.py
280
281
282
283
284
285
286
287
288
289
290
def get_next_token(self) -> Token:
    """
    title: Provide a simple token buffer.
    returns:
      type: Token
      description: >-
        The current token the parser is looking at. Reads another token
        from the lexer and updates cur_tok with its results.
    """
    self.cur_tok = self.get_token()
    return self.cur_tok

get_token

get_token() -> Token
Source code in src/arx/lexer/core.py
269
270
271
272
273
274
275
276
277
278
def get_token(self) -> Token:
    """
    title: Get the next token.
    returns:
      type: Token
      description: The next token from standard input.
    """
    tok = self.tokens[self.position]
    self.position += 1
    return tok