From 913c100d8c6524bf923bce90ea08ab07751f8d14 Mon Sep 17 00:00:00 2001 From: Felix Frank Date: Sun, 24 Mar 2024 19:00:56 +0000 Subject: [PATCH] lang: parser: Better semantics for setting positions --- lang/ast/structs.go | 25 ++++---- lang/gapi/gapi.go | 6 ++ lang/parser/parser.y | 149 +++++++++++++++++++++---------------------- 3 files changed, 93 insertions(+), 87 deletions(-) diff --git a/lang/ast/structs.go b/lang/ast/structs.go index 7f0700eaed..f24792cd42 100644 --- a/lang/ast/structs.go +++ b/lang/ast/structs.go @@ -189,7 +189,7 @@ type TextArea struct { } // Locate is used by the parser to store the token positions in AST nodes -func (a TextArea) Locate(line int, col int, endline int, endcol int) { +func (a *TextArea) Locate(line int, col int, endline int, endcol int) { a.startLine = line a.startColumn = col a.endLine = endline @@ -200,6 +200,8 @@ func (a TextArea) Locate(line int, col int, endline int, endcol int) { // position. It is implemented by node types that embed TextArea. type LocalNode interface { Locate(int, int, int, int) + GetPosition() (int, int) + GetEndPosition() (int, int) } // GetPosition returns the starting line/column of an AST node @@ -222,7 +224,7 @@ type StmtBind struct { // String returns a short representation of this statement. func (obj *StmtBind) String() string { - return fmt.Sprintf("bind(%s)", obj.Ident) + return fmt.Sprintf("bind(%s) @ (%d %d)", obj.Ident, obj.startLine+1, obj.startColumn+1) } // Apply is a general purpose iterator method that operates on any AST node. It @@ -255,10 +257,9 @@ func (obj *StmtBind) Interpolate() (interfaces.Stmt, error) { if err != nil { return nil, err } - return &StmtBind{ - Ident: obj.Ident, - Value: interpolated, - }, nil + result := *obj + result.Value = interpolated + return &result, nil } // Copy returns a light copy of this struct. Anything static will not be copied. @@ -2610,6 +2611,8 @@ func (obj *StmtIf) String() string { s += fmt.Sprintf(" else { %s }", obj.ElseBranch.String()) } + s += fmt.Sprintf(" @ (%d %d)", obj.startLine+1, obj.startColumn+1) + return s } @@ -2676,11 +2679,11 @@ func (obj *StmtIf) Interpolate() (interfaces.Stmt, error) { return nil, errwrap.Wrapf(err, "could not interpolate ElseBranch") } } - return &StmtIf{ - Condition: condition, - ThenBranch: thenBranch, - ElseBranch: elseBranch, - }, nil + result := *obj + result.Condition = condition + result.ThenBranch = thenBranch + result.ElseBranch = elseBranch + return &result, nil } // Copy returns a light copy of this struct. Anything static will not be copied. diff --git a/lang/gapi/gapi.go b/lang/gapi/gapi.go index b75fead9fe..6c1005c1f3 100644 --- a/lang/gapi/gapi.go +++ b/lang/gapi/gapi.go @@ -261,6 +261,12 @@ func (obj *GAPI) Cli(info *gapi.Info) (*gapi.Deploy, error) { return nil, nil // success! } + fmt.Println("The Interpolated Tree: %+v") + iast.Apply(func (n interfaces.Node) error { + fmt.Println(n) + return nil + }) + if !args.SkipUnify { // apply type unification unificationLogf := func(format string, v ...interface{}) { diff --git a/lang/parser/parser.y b/lang/parser/parser.y index b1371ebed7..2cf218a7b9 100644 --- a/lang/parser/parser.y +++ b/lang/parser/parser.y @@ -151,7 +151,7 @@ prog: $$.stmt = &ast.StmtProg{ Body: []interfaces.Stmt{}, } - locate(yylex, yyDollar, $$.stmt) + //locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | prog stmt { @@ -164,7 +164,7 @@ prog: $$.stmt = &ast.StmtProg{ Body: stmts, } - locate(yylex, yyDollar, $$.stmt) + //locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } } ; @@ -174,27 +174,27 @@ stmt: $$.stmt = &ast.StmtComment{ Value: $1.str, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | bind { $$.stmt = $1.stmt - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | panic { $$.stmt = $1.stmt - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | resource { $$.stmt = $1.stmt - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | edge { $$.stmt = $1.stmt - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | IF expr OPEN_CURLY prog CLOSE_CURLY { @@ -203,11 +203,11 @@ stmt: ThenBranch: $4.stmt, //ElseBranch: nil, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } | IF expr OPEN_CURLY prog CLOSE_CURLY ELSE OPEN_CURLY prog CLOSE_CURLY { - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) $$.stmt = &ast.StmtIf{ Condition: $2.expr, ThenBranch: $4.stmt, @@ -220,7 +220,7 @@ stmt: // `func name(, ) { }` | FUNC_IDENTIFIER IDENTIFIER OPEN_PAREN args CLOSE_PAREN OPEN_CURLY expr CLOSE_CURLY { - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) $$.stmt = &ast.StmtFunc{ Name: $2.str, Func: &ast.ExprFunc{ @@ -266,7 +266,7 @@ stmt: Name: $2.str, Func: fn, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `class name { }` | CLASS_IDENTIFIER colon_identifier OPEN_CURLY prog CLOSE_CURLY @@ -276,7 +276,7 @@ stmt: Args: nil, Body: $4.stmt, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `class name() { }` // `class name(, ) { }` @@ -287,7 +287,7 @@ stmt: Args: $4.args, Body: $7.stmt, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `include name` | INCLUDE_IDENTIFIER dotted_identifier @@ -295,7 +295,7 @@ stmt: $$.stmt = &ast.StmtInclude{ Name: $2.str, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `include name(...)` | INCLUDE_IDENTIFIER dotted_identifier OPEN_PAREN call_args CLOSE_PAREN @@ -304,7 +304,7 @@ stmt: Name: $2.str, Args: $4.exprs, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `include name as foo` // TODO: should we support: `include name as *` @@ -314,7 +314,7 @@ stmt: Name: $2.str, Alias: $4.str, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `include name(...) as foo` // TODO: should we support: `include name(...) as *` @@ -325,7 +325,7 @@ stmt: Args: $4.exprs, Alias: $7.str, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `import "name"` | IMPORT_IDENTIFIER STRING @@ -334,7 +334,7 @@ stmt: Name: $2.str, //Alias: "", } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `import "name" as alias` | IMPORT_IDENTIFIER STRING AS_IDENTIFIER IDENTIFIER @@ -343,7 +343,7 @@ stmt: Name: $2.str, Alias: $4.str, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `import "name" as *` | IMPORT_IDENTIFIER STRING AS_IDENTIFIER MULTIPLY @@ -352,14 +352,14 @@ stmt: Name: $2.str, Alias: $4.str, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } /* // resource bind | rbind { $$.stmt = $1.stmt - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } */ ; @@ -369,64 +369,64 @@ expr: $$.expr = &ast.ExprBool{ V: $1.bool, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | STRING { $$.expr = &ast.ExprStr{ V: $1.str, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | INTEGER { $$.expr = &ast.ExprInt{ V: $1.int, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | FLOAT { $$.expr = &ast.ExprFloat{ V: $1.float, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | list { // TODO: list could be squashed in here directly... $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | map { // TODO: map could be squashed in here directly... $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | struct { // TODO: struct could be squashed in here directly... $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | call { // TODO: call could be squashed in here directly... $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | var { // TODO: var could be squashed in here directly... $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | func { // TODO: var could be squashed in here directly... $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | IF expr OPEN_CURLY expr CLOSE_CURLY ELSE OPEN_CURLY expr CLOSE_CURLY { @@ -435,13 +435,13 @@ expr: ThenBranch: $4.expr, ElseBranch: $8.expr, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // parenthesis wrap an expression for precedence | OPEN_PAREN expr CLOSE_PAREN { $$.expr = $2.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; list: @@ -451,7 +451,7 @@ list: $$.expr = &ast.ExprList{ Elements: $2.exprs, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; list_elements: @@ -470,7 +470,7 @@ list_element: expr COMMA { $$.expr = $1.expr - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; map: @@ -480,7 +480,7 @@ map: $$.expr = &ast.ExprMap{ KVs: $2.mapKVs, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; map_kvs: @@ -512,7 +512,7 @@ struct: $$.expr = &ast.ExprStruct{ Fields: $3.structFields, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; struct_fields: @@ -547,7 +547,7 @@ call: Args: $3.exprs, //Var: false, // default } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // calling a function that's stored in a variable (a lambda) // `$foo(4, "hey")` # call function value @@ -560,7 +560,7 @@ call: // prefix to the Name, but I felt this was more elegant. Var: true, // lambda } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr PLUS expr { @@ -574,7 +574,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr MINUS expr { @@ -588,7 +588,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr MULTIPLY expr { @@ -602,7 +602,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr DIVIDE expr { @@ -616,7 +616,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr EQ expr { @@ -630,7 +630,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr NEQ expr { @@ -644,7 +644,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr LT expr { @@ -658,7 +658,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr GT expr { @@ -672,7 +672,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr LTE expr { @@ -686,7 +686,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr GTE expr { @@ -700,7 +700,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr AND expr { @@ -714,7 +714,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr OR expr { @@ -728,7 +728,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | NOT expr { @@ -741,7 +741,7 @@ call: $2.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // lookup an index in a list or a key in a map // lookup($foo, $key) @@ -756,7 +756,7 @@ call: //$6.expr, // the default }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // lookup an index in a list or a key in a map with a default // lookup_default($foo, $key, $default) @@ -771,7 +771,7 @@ call: $6.expr, // the default }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // lookup a field in a struct // _struct_lookup($foo, "field") @@ -788,7 +788,7 @@ call: //$5.expr, // the default }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // lookup a field in a struct with a default // _struct_lookup_optional($foo, "field", "default") @@ -805,7 +805,7 @@ call: $5.expr, // the default }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } | expr IN expr { @@ -816,7 +816,7 @@ call: $3.expr, }, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; // list order gets us the position of the arg, but named params would work too! @@ -845,7 +845,7 @@ var: $$.expr = &ast.ExprVar{ Name: $1.str, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; func: @@ -860,7 +860,7 @@ func: //Return: nil, Body: $6.expr, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } // `func(...) { }` | FUNC_IDENTIFIER OPEN_PAREN args CLOSE_PAREN type OPEN_CURLY expr CLOSE_CURLY @@ -870,7 +870,7 @@ func: Return: $5.typ, // return type is known Body: $7.expr, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) isFullyTyped := $5.typ != nil // true if set m := make(map[string]*types.Type) ord := []string{} @@ -939,7 +939,7 @@ bind: Ident: $1.str, Value: $3.expr, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // `$x bool = true` // `$x int = if true { 42 } else { 13 }` @@ -954,7 +954,7 @@ bind: Ident: $1.str, Value: expr, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } ; panic: @@ -983,7 +983,7 @@ panic: ThenBranch: res, //ElseBranch: nil, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } ; /* TODO: do we want to include this? @@ -998,7 +998,7 @@ rbind: Ident: $1.str, Value: $3.stmt, } - locate(yylex, yyDollar, $$.expr) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.expr) } ; */ @@ -1011,7 +1011,7 @@ resource: Name: $2.expr, Contents: $4.resContents, } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } ; resource_body: @@ -1178,7 +1178,7 @@ edge: EdgeHalfList: $1.edgeHalfList, //Notify: false, // unused here } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } // Test["t1"].foo_send -> Test["t2"].blah_recv # send/recv | edge_half_sendrecv ARROW edge_half_sendrecv @@ -1190,7 +1190,7 @@ edge: }, //Notify: false, // unused here, it is implied (i think) } - locate(yylex, yyDollar, $$.stmt) + locate(yylex, $1, yyDollar[len(yyDollar)-1], $$.stmt) } ; edge_half_list: @@ -1482,22 +1482,19 @@ func cast(y yyLexer) *lexParseAST { // The posLast variant that specifies a node will store the coordinates in the // node. -func locate(y yyLexer, dollars []yySymType, node interfaces.Node) { - posLast(y, dollars) +func locate(y yyLexer, first yySymType, last yySymType, node interfaces.Node) { + posLast(y, []yySymType{last}) // TODO: is it really useful to store this in the Lexer? the values are erratic and likely unhelpful if ln, ok := node.(ast.LocalNode) ; !ok { return - } else { - first := dollars[0] - last := dollars[len(dollars)-1] + // only run Locate on nodes that look like they have not received locations yet + // otherwise the parser will come back and overwrite with faux end positions + } else if row, col := ln.GetPosition() ; row == 0 && col == 0 { ln.Locate(first.row, first.col, last.row, last.col) - fmt.Printf("Located a node: %v at (%v:%v-%v:%v) it has %i symbols\n", ln, - first.row, first.col, last.row, last.col, len(dollars)) } } -// postLast runs pos on the first and last token of the current stmt/expr. +// postLast runs pos on the last token of the current stmt/expr. func posLast(y yyLexer, dollars []yySymType) { - pos(y, dollars[0]) pos(y, dollars[len(dollars)-1]) }