Skip to content

parser

parser module gather all functions and classes for parsing.

Classes:

Parser

Parser(tokens: TokenList = TokenList([]))

Parser class.

Methods:

Source code in src/arx/parser.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, tokens: TokenList = TokenList([])) -> None:
    """Instantiate the Parser object."""
    self.bin_op_precedence: dict[str, int] = {
        "=": 2,
        "<": 10,
        ">": 10,
        "+": 20,
        "-": 20,
        "*": 40,
    }
    self.indent_level: int = 0
    # note: it is useful to assign an initial token list here
    #       mainly for tests
    self.tokens: TokenList = tokens

clean

clean() -> None

Reset the Parser static variables.

Source code in src/arx/parser.py
37
38
39
40
def clean(self) -> None:
    """Reset the Parser static variables."""
    self.indent_level = 0
    self.tokens: TokenList = TokenList([])

get_tok_precedence

get_tok_precedence() -> int

Get the precedence of the pending binary operator token.

Returns:

  • int

    The token precedence.

Source code in src/arx/parser.py
79
80
81
82
83
84
85
86
87
88
def get_tok_precedence(self) -> int:
    """
    Get the precedence of the pending binary operator token.

    Returns
    -------
    int
        The token precedence.
    """
    return self.bin_op_precedence.get(self.tokens.cur_tok.value, -1)

parse

parse(
    tokens: TokenList, module_name: str = "main"
) -> Block

Parse the input code.

Returns:

  • Block

    The parsed abstract syntax tree (AST), or None if parsing fails.

Source code in src/arx/parser.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def parse(
    self, tokens: TokenList, module_name: str = "main"
) -> astx.Block:
    """
    Parse the input code.

    Returns
    -------
    astx.Block
        The parsed abstract syntax tree (AST), or None if parsing fails.
    """
    self.clean()
    self.tokens = tokens

    tree: astx.Module = astx.Module(module_name)
    self.tokens.get_next_token()

    if self.tokens.cur_tok.kind == TokenKind.not_initialized:
        self.tokens.get_next_token()

    while True:
        if self.tokens.cur_tok.kind == TokenKind.eof:
            break
        elif self.tokens.cur_tok == Token(
            kind=TokenKind.operator, value=";"
        ):
            # ignore top-level semicolons.
            self.tokens.get_next_token()
        elif self.tokens.cur_tok.kind == TokenKind.kw_function:
            tree.nodes.append(self.parse_function())
        elif self.tokens.cur_tok.kind == TokenKind.kw_extern:
            tree.nodes.append(self.parse_extern())
        else:
            tree.nodes.append(self.parse_expression())

    return tree

parse_bin_op_rhs

parse_bin_op_rhs(expr_prec: int, lhs: Expr) -> Expr

Parse a binary expression.

Parameters:

  • expr_prec (int) –

    Expression precedence (deprecated).

  • lhs (Expr) –

    Left-hand side expression.

Returns:

  • Expr

    The parsed binary expression, or None if parsing fails.

Source code in src/arx/parser.py
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
def parse_bin_op_rhs(
    self,
    expr_prec: int,
    lhs: astx.Expr,
) -> astx.Expr:
    """
    Parse a binary expression.

    Parameters
    ----------
    expr_prec : int
        Expression precedence (deprecated).
    lhs : astx.Expr
        Left-hand side expression.

    Returns
    -------
    astx.Expr
        The parsed binary expression, or None if parsing fails.
    """
    # If this is a binop, find its precedence. #
    while True:
        cur_prec: int = self.get_tok_precedence()

        # If this is a binop that binds at least as tightly as the current
        # binop, consume it, otherwise we are done.
        if cur_prec < expr_prec:
            return lhs

        # Okay, we know this is a binop.
        bin_op: str = self.tokens.cur_tok.value
        bin_loc: SourceLocation = self.tokens.cur_tok.location
        self.tokens.get_next_token()  # eat binop

        # Parse the unary expression after the binary operator.
        rhs: astx.Expr = self.parse_unary()

        # If BinOp binds less tightly with rhs than the operator after
        # rhs, let the pending operator take rhs as its lhs
        next_prec: int = self.get_tok_precedence()
        if cur_prec < next_prec:
            rhs = self.parse_bin_op_rhs(cur_prec + 1, rhs)

        # Merge lhs/rhs.
        lhs = astx.BinaryOp(
            bin_op,
            cast(astx.DataType, lhs),
            cast(astx.DataType, rhs),
            loc=bin_loc,
        )

parse_block

parse_block() -> Block

Parse a block of nodes.

Source code in src/arx/parser.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def parse_block(self) -> astx.Block:
    """Parse a block of nodes."""
    cur_indent: int = self.tokens.cur_tok.value

    self.tokens.get_next_token()  # eat indentation

    block: astx.Block = astx.Block()

    if cur_indent == self.indent_level:
        raise ParserException("There is no new block to be parsed.")

    if cur_indent > self.indent_level:
        self.indent_level = cur_indent

        while expr := self.parse_expression():
            block.nodes.append(expr)
            # if isinstance(expr, astx.IfStmt):
            #     breakpoint()
            if self.tokens.cur_tok.kind != TokenKind.indent:
                break

            new_indent = self.tokens.cur_tok.value

            if new_indent < cur_indent:
                break

            if new_indent > cur_indent:
                raise ParserException("Indentation not allowed here.")

            self.tokens.get_next_token()  # eat indentation

    self.indent_level -= INDENT_SIZE
    return block

parse_expression

parse_expression() -> Expr

Parse an expression.

Returns:

  • Expr

    The parsed expression, or None if parsing fails.

Source code in src/arx/parser.py
187
188
189
190
191
192
193
194
195
196
197
def parse_expression(self) -> astx.Expr:
    """
    Parse an expression.

    Returns
    -------
    astx.Expr
        The parsed expression, or None if parsing fails.
    """
    lhs: astx.Expr = self.parse_unary()
    return self.parse_bin_op_rhs(0, lhs)

parse_extern

parse_extern() -> FunctionPrototype

Parse the extern expression.

Returns:

  • FunctionPrototype

    The parsed extern expression as a prototype, or None if parsing fails.

Source code in src/arx/parser.py
103
104
105
106
107
108
109
110
111
112
113
114
def parse_extern(self) -> astx.FunctionPrototype:
    """
    Parse the extern expression.

    Returns
    -------
    astx.FunctionPrototype
        The parsed extern expression as a prototype, or None if parsing
        fails.
    """
    self.tokens.get_next_token()  # eat extern.
    return self.parse_extern_prototype()

parse_extern_prototype

parse_extern_prototype() -> FunctionPrototype

Parse an extern prototype expression.

Returns:

  • FunctionPrototype

    The parsed extern prototype, or None if parsing fails.

Source code in src/arx/parser.py
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
def parse_extern_prototype(self) -> astx.FunctionPrototype:
    """
    Parse an extern prototype expression.

    Returns
    -------
    astx.FunctionPrototype
        The parsed extern prototype, or None if parsing fails.
    """
    fn_name: str
    var_typing: astx.DataType
    ret_typing: astx.DataType
    identifier_name: str

    cur_loc: SourceLocation
    fn_loc = self.tokens.cur_tok.location

    if self.tokens.cur_tok.kind == TokenKind.identifier:
        fn_name = self.tokens.cur_tok.value
        self.tokens.get_next_token()
    else:
        raise Exception("Parser: Expected function name in prototype")

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value="("):
        raise Exception("Parser: Expected '(' in the function definition.")

    args = astx.Arguments()
    while self.tokens.get_next_token().kind == TokenKind.identifier:
        # note: this is a workaround
        identifier_name = self.tokens.cur_tok.value
        cur_loc = self.tokens.cur_tok.location

        # TODO: type should be defined dynamic
        var_typing = astx.Float32()

        args.append(
            astx.Argument(identifier_name, var_typing, loc=cur_loc)
        )

        if self.tokens.get_next_token() != Token(
            kind=TokenKind.operator, value=","
        ):
            break

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=")"):
        raise Exception("Parser: Expected ')' in the function definition.")

    # success. #
    self.tokens.get_next_token()  # eat ')'.

    ret_typing = astx.Float32()

    return astx.FunctionPrototype(fn_name, args, ret_typing, loc=fn_loc)

parse_float_expr

parse_float_expr() -> LiteralFloat32

Parse the number expression.

Returns:

  • LiteralFloat32

    The parsed float expression.

Source code in src/arx/parser.py
250
251
252
253
254
255
256
257
258
259
260
261
def parse_float_expr(self) -> astx.LiteralFloat32:
    """
    Parse the number expression.

    Returns
    -------
    astx.LiteralFloat32
        The parsed float expression.
    """
    result = astx.LiteralFloat32(self.tokens.cur_tok.value)
    self.tokens.get_next_token()  # consume the number
    return result

parse_for_stmt

parse_for_stmt() -> ForRangeLoopStmt

Parse the for expression.

Returns:

  • ForRangeLoopStmt

    The parsed for expression, or None if parsing fails.

Source code in src/arx/parser.py
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
def parse_for_stmt(self) -> astx.ForRangeLoopStmt:
    """
    Parse the `for` expression.

    Returns
    -------
    astx.ForRangeLoopStmt
        The parsed `for` expression, or None if parsing fails.
    """
    self.tokens.get_next_token()  # eat the for.

    if self.tokens.cur_tok.kind != TokenKind.identifier:
        raise Exception("Parser: Expected identifier after for")

    # TODO: type should be defined dynamic
    inline_var = astx.InlineVariableDeclaration(
        self.tokens.cur_tok.value,
        astx.Float32(),
    )
    self.tokens.get_next_token()  # eat identifier.

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value="="):
        raise Exception("Parser: Expected '=' after for")
    self.tokens.get_next_token()  # eat '='.

    start: astx.Expr = self.parse_expression()
    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=","):
        raise Exception("Parser: Expected ',' after for start value")
    self.tokens.get_next_token()

    end: astx.Expr = self.parse_expression()

    # The step value is optional
    if self.tokens.cur_tok == Token(kind=TokenKind.operator, value=","):
        self.tokens.get_next_token()
        step = self.parse_expression()
    else:
        step = astx.LiteralFloat32(1.0)

    if self.tokens.cur_tok.kind != TokenKind.kw_in:  # type: ignore
        raise Exception("Parser: Expected 'in' after for")
    self.tokens.get_next_token()  # eat 'in'.

    body_block: astx.Block = astx.Block()
    body_block.nodes.append(self.parse_expression())
    return astx.ForRangeLoopStmt(inline_var, start, end, step, body_block)

parse_function

parse_function() -> FunctionDef

Parse the function definition expression.

Returns:

  • FunctionDef

    The parsed function definition, or None if parsing fails.

Source code in src/arx/parser.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def parse_function(self) -> astx.FunctionDef:
    """
    Parse the function definition expression.

    Returns
    -------
    astx.FunctionDef
        The parsed function definition, or None if parsing fails.
    """
    self.tokens.get_next_token()  # eat function.
    proto: astx.FunctionPrototype = self.parse_prototype()
    return astx.FunctionDef(proto, self.parse_block())

parse_identifier_expr

parse_identifier_expr() -> Expr

Parse the identifier expression.

Returns:

  • Expr

    The parsed expression, or None if parsing fails.

Source code in src/arx/parser.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
def parse_identifier_expr(self) -> astx.Expr:
    """
    Parse the identifier expression.

    Returns
    -------
    astx.Expr
        The parsed expression, or None if parsing fails.
    """
    id_name: str = self.tokens.cur_tok.value

    id_loc: SourceLocation = self.tokens.cur_tok.location

    self.tokens.get_next_token()  # eat identifier.

    # TODO: var type should be dynamic
    var_type = astx.Float32()

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value="("):
        # Simple variable ref, not a function call
        # todo: we need to get the variable type from a specific scope
        return astx.Variable(id_name, var_type, loc=id_loc)

    # Call.
    self.tokens.get_next_token()  # eat (
    args: list[astx.DataType] = []

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=")"):
        while True:
            args.append(cast(astx.DataType, self.parse_expression()))

            if self.tokens.cur_tok == Token(
                kind=TokenKind.operator, value=")"
            ):
                break

            if self.tokens.cur_tok != Token(
                kind=TokenKind.operator, value=","
            ):
                raise Exception(
                    "Parser: Expected ')' or ',' in argument list"
                )
            self.tokens.get_next_token()

    # Eat the ')'.
    self.tokens.get_next_token()

    return astx.FunctionCall(id_name, args, loc=id_loc)

parse_if_stmt

parse_if_stmt() -> IfStmt

Parse the if expression.

Returns:

  • IfStmt

    The parsed if expression, or None if parsing fails.

Source code in src/arx/parser.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def parse_if_stmt(self) -> astx.IfStmt:
    """
    Parse the `if` expression.

    Returns
    -------
    astx.IfStmt
        The parsed `if` expression, or None if parsing fails.
    """
    if_loc: SourceLocation = self.tokens.cur_tok.location

    self.tokens.get_next_token()  # eat the if.

    cond: astx.Expr = self.parse_expression()

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=":"):
        msg = (
            "Parser: `if` statement expected ':', received: '"
            + str(self.tokens.cur_tok)
            + "'."
        )
        raise Exception(msg)

    self.tokens.get_next_token()  # eat the ':'

    then_block: astx.Block = astx.Block()
    else_block: astx.Block = astx.Block()

    then_block = self.parse_block()

    if self.tokens.cur_tok.kind == TokenKind.indent:
        self.tokens.get_next_token()  # eat the indentation

    if self.tokens.cur_tok.kind == TokenKind.kw_else:
        self.tokens.get_next_token()  # eat the else token

        if self.tokens.cur_tok != Token(
            kind=TokenKind.operator, value=":"
        ):
            msg = (
                "Parser: `else` statement expected ':', received: '"
                + str(self.tokens.cur_tok)
                + "'."
            )
            raise Exception(msg)

        self.tokens.get_next_token()  # eat the ':'
        else_block = self.parse_block()

    return astx.IfStmt(cond, then_block, else_block, loc=if_loc)

parse_paren_expr

parse_paren_expr() -> Expr

Parse the parenthesis expression.

Returns:

  • Expr

    The parsed expression.

Source code in src/arx/parser.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def parse_paren_expr(self) -> astx.Expr:
    """
    Parse the parenthesis expression.

    Returns
    -------
    astx.Expr
        The parsed expression.
    """
    self.tokens.get_next_token()  # eat (.
    expr = self.parse_expression()

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=")"):
        raise Exception("Parser: Expected ')'")
    self.tokens.get_next_token()  # eat ).
    return expr

parse_primary

parse_primary() -> AST

Parse the primary expression.

Returns:

  • Expr

    The parsed primary expression, or None if parsing fails.

Source code in src/arx/parser.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def parse_primary(self) -> astx.AST:
    """
    Parse the primary expression.

    Returns
    -------
    astx.Expr
        The parsed primary expression, or None if parsing fails.
    """
    if self.tokens.cur_tok.kind == TokenKind.identifier:
        return self.parse_identifier_expr()
    elif self.tokens.cur_tok.kind == TokenKind.float_literal:
        return self.parse_float_expr()
    elif self.tokens.cur_tok == Token(kind=TokenKind.operator, value="("):
        return self.parse_paren_expr()
    elif self.tokens.cur_tok.kind == TokenKind.kw_if:
        return self.parse_if_stmt()
    elif self.tokens.cur_tok.kind == TokenKind.kw_for:
        return self.parse_for_stmt()
    elif self.tokens.cur_tok.kind == TokenKind.kw_var:
        return self.parse_var_expr()
    elif self.tokens.cur_tok == Token(kind=TokenKind.operator, value=";"):
        # ignore top-level semicolons.
        self.tokens.get_next_token()  # eat `;`
        return self.parse_primary()
    elif self.tokens.cur_tok.kind == TokenKind.kw_return:
        return self.parse_return_function()
    elif self.tokens.cur_tok.kind == TokenKind.indent:
        return self.parse_block()
    else:
        msg: str = (
            "Parser: Unknown token when expecting an expression:"
            f"'{self.tokens.cur_tok.get_name()}'."
        )
        self.tokens.get_next_token()  # eat unknown token
        raise Exception(msg)

parse_prototype

parse_prototype() -> FunctionPrototype

Parse the prototype expression.

Returns:

  • FunctionPrototype

    The parsed prototype, or None if parsing fails.

Source code in src/arx/parser.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
def parse_prototype(self) -> astx.FunctionPrototype:
    """
    Parse the prototype expression.

    Returns
    -------
    astx.FunctionPrototype
        The parsed prototype, or None if parsing fails.
    """
    fn_name: str
    var_typing: astx.DataType
    ret_typing: astx.DataType
    identifier_name: str

    cur_loc: SourceLocation
    fn_loc: SourceLocation = self.tokens.cur_tok.location

    if self.tokens.cur_tok.kind == TokenKind.identifier:
        fn_name = self.tokens.cur_tok.value
        self.tokens.get_next_token()
    else:
        raise Exception("Parser: Expected function name in prototype")

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value="("):
        raise Exception("Parser: Expected '(' in the function definition.")

    args = astx.Arguments()
    while self.tokens.get_next_token().kind == TokenKind.identifier:
        # note: this is a workaround
        identifier_name = self.tokens.cur_tok.value
        cur_loc = self.tokens.cur_tok.location

        # TODO: type should be dynamic
        var_typing = astx.Float32()

        args.append(
            astx.Argument(identifier_name, var_typing, loc=cur_loc)
        )

        if self.tokens.get_next_token() != Token(
            kind=TokenKind.operator, value=","
        ):
            break

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=")"):
        raise Exception("Parser: Expected ')' in the function definition.")

    # success. #
    self.tokens.get_next_token()  # eat ')'.

    # TODO: type should be dynamic
    ret_typing = astx.Float32()

    if self.tokens.cur_tok != Token(kind=TokenKind.operator, value=":"):
        raise Exception("Parser: Expected ':' in the function definition")

    self.tokens.get_next_token()  # eat ':'.

    return astx.FunctionPrototype(fn_name, args, ret_typing, loc=fn_loc)

parse_return_function

parse_return_function() -> FunctionReturn

Parse the return expression.

Returns:

  • FunctionReturn

    The parsed return expression, or None if parsing fails.

Source code in src/arx/parser.py
615
616
617
618
619
620
621
622
623
624
625
626
627
def parse_return_function(self) -> astx.FunctionReturn:
    """
    Parse the return expression.

    Returns
    -------
    astx.FunctionReturn
        The parsed return expression, or None if parsing fails.
    """
    self.tokens.get_next_token()  # eat return
    return astx.FunctionReturn(
        cast(astx.DataType, self.parse_expression())
    )

parse_unary

parse_unary() -> UnaryOp

Parse a unary expression.

Returns:

  • Expr

    The parsed unary expression, or None if parsing fails.

Source code in src/arx/parser.py
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
def parse_unary(self) -> astx.UnaryOp:
    """
    Parse a unary expression.

    Returns
    -------
    astx.Expr
        The parsed unary expression, or None if parsing fails.
    """
    # If the current token is not an operator, it must be a primary expr.
    if (
        self.tokens.cur_tok.kind != TokenKind.operator
        or self.tokens.cur_tok.value in ("(", ",")
    ):
        return cast(astx.UnaryOp, self.parse_primary())

    # If this is a unary operator, read it.
    op_code: str = self.tokens.cur_tok.value
    self.tokens.get_next_token()
    operand = self.parse_unary()
    return astx.UnaryOp(op_code, operand)

parse_var_expr

parse_var_expr() -> VariableDeclaration

Parse the var declaration expression.

Returns:

  • VariableDeclaration

    The parsed var expression, or None if parsing fails.

Source code in src/arx/parser.py
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
def parse_var_expr(self) -> astx.VariableDeclaration:
    """
    Parse the `var` declaration expression.

    Returns
    -------
    astx.VariableDeclaration
        The parsed `var` expression, or None if parsing fails.
    """
    self.tokens.get_next_token()  # eat the var.

    var_names: list[tuple[str, astx.Expr]] = []

    # At least one variable name is required. #
    if self.tokens.cur_tok.kind != TokenKind.identifier:
        raise Exception("Parser: Expected identifier after var")

    while True:
        name: str = self.tokens.cur_tok.value
        self.tokens.get_next_token()  # eat identifier.

        # Read the optional initializer. #
        Init: astx.Expr
        if self.tokens.cur_tok == Token(
            kind=TokenKind.operator, value="="
        ):
            self.tokens.get_next_token()  # eat the '='.

            Init = self.parse_expression()
        else:
            Init = astx.LiteralFloat32(0.0)

        var_names.append((name, Init))

        # end of var list, exit loop. #
        if self.tokens.cur_tok != Token(
            kind=TokenKind.operator, value=","
        ):
            break
        self.tokens.get_next_token()  # eat the ','.

        if self.tokens.cur_tok.kind != TokenKind.identifier:
            raise Exception("Parser: Expected identifier list after var")

    # At this point, we have to have 'in'. #
    if self.tokens.cur_tok.kind != TokenKind.kw_in:  # type: ignore
        raise Exception("Parser: Expected 'in' keyword after 'var'")
    self.tokens.get_next_token()  # eat 'in'.

    body: astx.Expr = self.parse_expression()
    return astx.VariableDeclaration(var_names, "float", body)