package main import ( "fmt" "reflect" //"strconv" "strings" "github.com/z7zmey/php-parser/node" "github.com/z7zmey/php-parser/node/expr" "github.com/z7zmey/php-parser/node/expr/assign" "github.com/z7zmey/php-parser/node/expr/binary" "github.com/z7zmey/php-parser/node/name" "github.com/z7zmey/php-parser/node/scalar" "github.com/z7zmey/php-parser/node/stmt" ) func nodeTypeString(n node.Node) string { return reflect.TypeOf(n).String() } type parseErr struct { n node.Node childErr error } func (pe parseErr) Error() string { return fmt.Sprintf("Parsing %s on line %d: %s", nodeTypeString(pe.n), pe.n.GetPosition().StartLine, pe.childErr) } func (pe parseErr) Unwrap() error { return pe.childErr } // func convert(n_ node.Node) (string, error) { switch n := n_.(type) { // // node // case *node.Root: ret := "package main\n\n" // Hoist all declarations first, and put any top-level code into a generated main() function statements := []string{} for _, s := range n.Stmts { sm, err := convert(s) if err != nil { return "", parseErr{s, err} } switch s.(type) { case *stmt.Class, *stmt.Function: // Declaration - emit immediately (hoist) ret += sm + "\n" default: // Top-level function code - deter emission statements = append(statements, sm) } } // Emit deferred statements if len(statements) > 0 { ret += "func init() {\n" ret += "\t" + strings.Join(statements, "\n\t") + "\n" ret += "}\n" } return ret, nil // // stmt // case *stmt.StmtList: // TODO keep track of variable types within this scope ret := "{\n" // new variable scope for _, s := range n.Stmts { line, err := convert(s) if err != nil { return "", parseErr{s, err} } ret += line + "\n" } return ret + "}\n", nil case *stmt.Class: ret := "" className := n.ClassName.(*node.Identifier).Value memberVars := []string{} memberFuncs := []string{} // Walk all child nodes of the class for _, s_ := range n.Stmts { switch s := s_.(type) { case *stmt.PropertyList: // Class member variable // Doc comment // TODO scan for `@var {type}` strings // Name prop, ok := s.Properties[0].(*stmt.Property) if !ok { return "", parseErr{s, fmt.Errorf("unexpected propertylist structure")} } name := prop.Variable.(*expr.Variable).VarName.(*node.Identifier).Value // Type (unknown) memberType := unknownVarType // 'Modifiers' - protected public readonly ... // prop.Modifiers memberVars = append(memberVars, name+" "+memberType) case *stmt.ClassMethod: // Function name // If function is public/private/protected, set the first character to upper/lowercase funcName, err := applyVisibilityModifier(s.MethodName.(*node.Identifier).Value, s.Modifiers) if err != nil { return "", parseErr{s, err} } // Doc comment // TODO scan for `@param {type}` strings isConstructor := (strings.ToLower(funcName) == `__construct` || strings.ToLower(funcName) == strings.ToLower(className)) if isConstructor { // Constructor functions get transformed to NewFoo() (*Foo, error) // We need to force the return type returnType := name.NewName([]node.Node{name.NewNamePart(className)}) // We also need prefix + suffix statements allStmts := make([]node.Node, 0, 2+len(s.Stmt.(*stmt.StmtList).Stmts)) allStmts = append(allStmts, Literal{`this := &` + className + `{}`}) // TODO also insert variable type into the scope allStmts = append(allStmts, s.Stmt.(*stmt.StmtList).Stmts...) allStmts = append(allStmts, Literal{`return this, nil`}) // Method body funcStmt, err := convertFunctionCommon(s.Params, returnType, true /* always use ptr return */, allStmts) if err != nil { return "", parseErr{s, err} } memberFuncStmt := "func New" + className + funcStmt + "\n" memberFuncs = append(memberFuncs, memberFuncStmt) } else { // Method body funcStmt, err := convertFunctionCommon(s.Params, s.ReturnType, s.ReturnsRef, s.Stmt.(*stmt.StmtList).Stmts) if err != nil { return "", parseErr{s, err} } memberFuncStmt := "func (this *" + className + ") " + funcName + funcStmt + "\n" memberFuncs = append(memberFuncs, memberFuncStmt) } default: return "", parseErr{s, fmt.Errorf("Class '%s' contained unexpected AST node; expected PropertyList / ClassMethod", className)} } } // Create struct typedef containing all explicit properties ret += "type " + className + " struct {\n" ret += "\t" + strings.Join(memberVars, "\n\t") + "\n" ret += "}\n" // Create all member functions ret += strings.Join(memberFuncs, "\n\n") // Done return ret, nil case *stmt.Function: // Top-level function // TODO parse doc comment // FIXME is this the same as a closure? funcName := n.FunctionName.(*node.Identifier).Value // All top-level functions like this are public; ensure function name starts // with an uppercase letter funcName = toPublic(funcName) // Convert body funcStmt, err := convertFunctionCommon(n.Params, n.ReturnType, n.ReturnsRef, n.Stmts) if err != nil { return "", parseErr{n, err} } ret := "func " + funcName + funcStmt + "\n" return ret, nil case *stmt.Return: child, err := convert(n.Expr) if err != nil { return "", parseErr{n, err} } ret := "return " + child + ", nil\n" return ret, nil case *stmt.Throw: // throw (expr); // Treat as an err return // FIXME we don't know the default return type for the function we're in // If the expr is a string literal, we can convert it to errors.New() // Although we probably can't do this in general for stringly-typed expressions if str, ok := n.Expr.(*scalar.String); ok { return "return nil, errors.New(" + str.Value + ")\n", nil } child, err := convert(n.Expr) if err != nil { return "", parseErr{n, err} } return "return nil, " + child + "\n", nil case *stmt.For: if len(n.Init) != 1 { // We can handle the case of multiple init statements by hoisting them // above the loop. There is no negative impact on PHP scoping rules, but // it may cause an extra local variable after the loop that may result // in type mismatch (can be fixed by using an extra scope). return "", parseErr{n, fmt.Errorf("for loop can only have 1 init clause, found %d", len(n.Init))} } finit, err := convert(n.Init[0]) if err != nil { return "", parseErr{n, err} } if len(n.Init) != 1 { return "", parseErr{n, fmt.Errorf("for loop can only have 1 cond clause, found %d", len(n.Cond))} } fcond, err := convert(n.Cond[0]) if err != nil { return "", parseErr{n, err} } if len(n.Init) != 1 { return "", parseErr{n, fmt.Errorf("for loop can only have 1 loop clause, found %d", len(n.Loop))} } loopStmt := n.Loop[0] if preinc, ok := loopStmt.(*expr.PreInc); ok { // It's idiomatic to do for (,, ++i) but preincrement doesn't exist in Go // Luckily for the case of a for loop, we can just swap it to postincrement loopStmt = expr.NewPostInc(preinc.Variable) } else if predec, ok := loopStmt.(*expr.PreDec); ok { // Likewise loopStmt = expr.NewPostDec(predec.Variable) } floop, err := convert(loopStmt) if err != nil { return "", parseErr{n, err} } body, err := convert(convertToStmtList(n.Stmt)) if err != nil { return "", parseErr{n, err} } return "for " + finit + "; " + fcond + "; " + floop + " " + body + "\n", nil case *stmt.Foreach: iterand, err := convert(n.Expr) if err != nil { return "", parseErr{n, err} } valueReceiver, err := convert(n.Variable) if err != nil { return "", parseErr{n, err} } keyReceiver := `_` if n.Key != nil { keyReceiver, err = convert(n.Key) if err != nil { return "", parseErr{n, err} } } body, err := convert(convertToStmtList(n.Stmt)) if err != nil { return "", parseErr{n, err} } return "for " + keyReceiver + ", " + valueReceiver + " := range " + iterand + " " + body + "\n", nil case *stmt.While: cond, err := convert(n.Cond) if err != nil { return "", parseErr{n, err} } body, err := convert(convertToStmtList(n.Stmt)) if err != nil { return "", parseErr{n, err} } return "for " + cond + " " + body + "\n", nil case *stmt.Do: cond, err := convert(n.Cond) if err != nil { return "", parseErr{n, err} } bodyStmts := convertToStmtList(n.Stmt) bodyStmts.Stmts = append(bodyStmts.Stmts, Literal{"if " + cond + "{\nbreak\n}"}) body, err := convert(bodyStmts) if err != nil { return "", parseErr{n, err} } return "for " + cond + " " + body + "\n", nil case *stmt.Expression: child, err := convert(n.Expr) if err != nil { return "", parseErr{n, err} } ret := child + "\n" // standalone expression statement return ret, nil case *stmt.Echo: // Convert into fmt.Print args := make([]string, 0, len(n.Exprs)) for _, expr := range n.Exprs { exprGo, err := convert(expr) if err != nil { return "", parseErr{n, err} } args = append(args, exprGo) } return "fmt.Print(" + strings.Join(args, ", ") + ")\n", nil // newline - standalone statement // // assign // case *assign.Assign: lvalue, err := convert(n.Variable) // might be a more complicated lvalue if err != nil { return "", parseErr{n, err} } rvalue, err := convert(n.Expression) if err != nil { return "", parseErr{n, err} } // TODO this may need to use `:=` return lvalue + " = " + rvalue, nil // // special literals // case Literal: return n.Value, nil case *node.Identifier: return n.Value, nil // // expr // case *expr.FunctionCall: // All our generated functions return err, but this AST node may be in a single-rvalue context // TODO do something more intelligent here // We can't necessarily hoist the whole call, in case we are on the right-hand side of a && operator funcName, err := resolveName(n.Function) if err != nil { return "", parseErr{n, err} } callParams, err := convertFuncCallArgsCommon(n.ArgumentList) if err != nil { return "", parseErr{n, err} } return funcName + callParams, nil // expr only, no semicolon/newline case *expr.New: // new foo(xx) // Transparently convert to calling constructor function. nn, err := resolveName(n.Class) if err != nil { return "", parseErr{n, err} } // FIXME if there is a package specifier embedded in the result name, // the `New` will appear in the wrong place nn = `New` + nn // Convert resolved back to node.Name transparentNameNode := name.NewName([]node.Node{name.NewNamePart(nn)}) return convert(expr.NewFunctionCall(transparentNameNode, n.ArgumentList)) case *expr.PreInc: // """In Go, i++ is a statement, not an expression. So you can't use its value in another expression such as a function call.""" v, err := convert(n.Variable) if err != nil { return "", parseErr{n, err} } return "++" + v, nil case *expr.PostInc: // """In Go, i++ is a statement, not an expression. So you can't use its value in another expression such as a function call.""" v, err := convert(n.Variable) if err != nil { return "", parseErr{n, err} } return v + "++", nil case *expr.MethodCall: // Foo->Bar(Baz) parent, err := convert(n.Variable) if err != nil { return "", parseErr{n, err} } child, err := convert(n.Method) if err != nil { return "", parseErr{n, err} } args, err := convertFuncCallArgsCommon(n.ArgumentList) if err != nil { return "", parseErr{n, err} } return parent + "." + child + args, nil case *expr.PropertyFetch: // Foo->Bar parent, err := convert(n.Variable) if err != nil { return "", parseErr{n, err} } child, err := convert(n.Property) if err != nil { return "", parseErr{n, err} } return parent + "." + child, nil case *expr.Variable: return n.VarName.(*node.Identifier).Value, nil case *expr.ConstFetch: return resolveName(n.Constant) // // binary // case *binary.Plus: // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases return convertBinaryCommon(n.Left, n.Right, `+`) case *binary.Smaller: return convertBinaryCommon(n.Left, n.Right, `<`) case *binary.SmallerOrEqual: return convertBinaryCommon(n.Left, n.Right, `<=`) case *binary.Greater: return convertBinaryCommon(n.Left, n.Right, `>`) case *binary.GreaterOrEqual: return convertBinaryCommon(n.Left, n.Right, `>=`) case *binary.Equal: return convertBinaryCommon(n.Left, n.Right, `==`) case *binary.Identical: // PHP triple-equals return convertBinaryCommon(n.Left, n.Right, `===`) case *binary.Concat: // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases return convertBinaryCommon(n.Left, n.Right, `+`) // // scalar // case *scalar.Lnumber: return n.Value, nil // number formats are compatible case *scalar.String: return n.Value, nil // It's already quoted in PHP format // return strconv.Quote(n.Value), nil // Go source code quoting format // // // default: return "", fmt.Errorf("unsupported node type %s", nodeTypeString(n)) } } // applyVisibilityModifier renames a function to use an upper/lowercase first // letter based on PHP visibility modifiers. func applyVisibilityModifier(funcName string, modifiers []node.Node) (string, error) { isPublic := true for _, mod := range modifiers { ident, ok := mod.(*node.Identifier) if !ok { return "", parseErr{mod, fmt.Errorf("expected node.Identifier")} } switch ident.Value { case "public": isPublic = true case "private", "protected": isPublic = false } } if isPublic { return toPublic(funcName), nil } else { return toPrivate(funcName), nil } } func toPublic(name string) string { nFirst := name[0:1] uFirst := strings.ToUpper(nFirst) if nFirst == uFirst { return name // avoid making more heap garbage } return uFirst + name[1:] } func toPrivate(name string) string { nFirst := name[0:1] lFirst := strings.ToLower(nFirst) if nFirst == lFirst { return name // avoid making more heap garbage } return lFirst + name[1:] } // resolveName turns a `*name.Name` node into a Go string. func resolveName(n node.Node) (string, error) { // TODO support namespace lookups paramType := unknownVarType if vt, ok := n.(*name.Name); ok { if len(vt.Parts) != 1 { return "", parseErr{n, fmt.Errorf("name has %d parts, expected 1", len(vt.Parts))} } paramType = vt.Parts[0].(*name.NamePart).Value } return paramType, nil } // convertToStmtList asserts that the node is either a StmtList or wraps it in a // single-stmt StmtList if not. // Loop bodies may be a StmtList if it is wrapped in {}, or a single statement // if it is not; we want to enforce the use of {} for all loop bodies func convertToStmtList(n node.Node) *stmt.StmtList { if sl, ok := n.(*stmt.StmtList); ok { return sl // It's already a StmtList } return stmt.NewStmtList([]node.Node{n}) } func convertBinaryCommon(left, right node.Node, goBinaryOperator string) (string, error) { // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases // Assume PHP/Go have the same associativity here lhs, err := convert(left) if err != nil { return "", parseErr{left, err} } rhs, err := convert(right) if err != nil { return "", parseErr{right, err} } return "(" + lhs + " " + goBinaryOperator + " " + rhs + ")", nil } func convertFuncCallArgsCommon(args *node.ArgumentList) (string, error) { callParams := make([]string, 0, len(args.Arguments)) for _, arg_ := range args.Arguments { arg, ok := arg_.(*node.Argument) if !ok { return "", parseErr{arg_, fmt.Errorf("expected node.Argument")} } rvalue, err := convert(arg.Expr) if err != nil { return "", parseErr{arg, err} } if arg.IsReference { rvalue = "&" + rvalue } if arg.Variadic { rvalue = "..." + rvalue } callParams = append(callParams, rvalue) } return "(" + strings.Join(callParams, `, `) + ")", nil // expr only, no semicolon/newline } func convertFunctionCommon(params []node.Node, returnType node.Node, returnsRef bool, bodyStmts []node.Node) (string, error) { // TODO scan function and see if it contains any return statements at all // If not, then we only need an err return parameter, not anything else funcParams := []string{} for _, param := range params { param, ok := param.(*node.Parameter) // shadow if !ok { return "", parseErr{param, fmt.Errorf("expected node.Parameter")} } // VariableType: might be nil for untyped parameters paramType, err := resolveName(param.VariableType) if err != nil { return "", parseErr{param, err} } if param.ByRef { paramType = "*" + paramType } if param.Variadic { paramType = "..." + paramType } // Name paramName := param.Variable.(*expr.Variable).VarName.(*node.Identifier).Value funcParams = append(funcParams, paramName+" "+paramType) } // ReturnType funcReturn, err := resolveName(returnType) if err != nil { return "", parseErr{returnType, err} } if returnsRef { funcReturn = "*" + funcReturn } // Build function prototype ret := "(" + strings.Join(funcParams, ", ") + ") (" + funcReturn + ", error) {\n" // Recurse through body statements for _, s := range bodyStmts { bodyStmt, err := convert(s) if err != nil { return "", parseErr{s, err} } ret += bodyStmt + "\n" } // Done // No extra trailing newline in case this is part of a large expression ret += "}" return ret, nil }