php2go/node.go

814 lines
21 KiB
Go

package main
import (
"fmt"
"reflect"
"strconv"
//"strconv"
"strings"
"github.com/z7zmey/php-parser/node"
"github.com/z7zmey/php-parser/node/expr"
"github.com/z7zmey/php-parser/node/expr/assign"
"github.com/z7zmey/php-parser/node/expr/binary"
"github.com/z7zmey/php-parser/node/name"
"github.com/z7zmey/php-parser/node/scalar"
"github.com/z7zmey/php-parser/node/stmt"
)
func nodeTypeString(n node.Node) string {
return reflect.TypeOf(n).String()
}
type parseErr struct {
n node.Node
childErr error
}
func (pe parseErr) Error() string {
return fmt.Sprintf("Parsing %s on line %d: %s", nodeTypeString(pe.n), pe.n.GetPosition().StartLine, pe.childErr)
}
func (pe parseErr) Unwrap() error {
return pe.childErr
}
//
func convert(n_ node.Node) (string, error) {
switch n := n_.(type) {
//
// node
//
case *node.Root:
ret := "package main\n\n"
// Hoist all declarations first, and put any top-level code into a generated main() function
statements := []string{}
for _, s := range n.Stmts {
sm, err := convert(s)
if err != nil {
return "", parseErr{s, err}
}
switch s.(type) {
case *stmt.Class, *stmt.Function:
// Declaration - emit immediately (hoist)
ret += sm + "\n"
default:
// Top-level function code - deter emission
statements = append(statements, sm)
}
}
// Emit deferred statements
if len(statements) > 0 {
ret += "func init() {\n"
ret += "\t" + strings.Join(statements, "\n\t") + "\n"
ret += "}\n"
}
return ret, nil
case *node.Identifier:
return n.Value, nil
case Literal:
return n.Value, nil
//
// stmt
//
case *stmt.StmtList:
// TODO keep track of variable types within this scope
ret := "{\n" // new variable scope
for _, s := range n.Stmts {
line, err := convert(s)
if err != nil {
return "", parseErr{s, err}
}
ret += line + "\n"
}
return ret + "}\n", nil
case *stmt.Class:
ret := ""
className := n.ClassName.(*node.Identifier).Value
memberVars := []string{}
memberFuncs := []string{}
// Walk all child nodes of the class
for _, s_ := range n.Stmts {
switch s := s_.(type) {
case *stmt.PropertyList:
// Class member variable
// Doc comment
// TODO scan for `@var {type}` strings
// Name
prop, ok := s.Properties[0].(*stmt.Property)
if !ok {
return "", parseErr{s, fmt.Errorf("unexpected propertylist structure")}
}
name := prop.Variable.(*expr.Variable).VarName.(*node.Identifier).Value
// Type (unknown)
memberType := unknownVarType
// 'Modifiers' - protected public readonly ...
// prop.Modifiers
memberVars = append(memberVars, name+" "+memberType)
case *stmt.ClassMethod:
// Function name
// If function is public/private/protected, set the first character to upper/lowercase
funcName, err := applyVisibilityModifier(s.MethodName.(*node.Identifier).Value, s.Modifiers)
if err != nil {
return "", parseErr{s, err}
}
// Doc comment
// TODO scan for `@param {type}` strings
isConstructor := (strings.ToLower(funcName) == `__construct` || strings.ToLower(funcName) == strings.ToLower(className))
if isConstructor {
// Constructor functions get transformed to NewFoo() (*Foo, error)
// We need to force the return type
returnType := name.NewName([]node.Node{name.NewNamePart(className)})
// We also need prefix + suffix statements
allStmts := make([]node.Node, 0, 2+len(s.Stmt.(*stmt.StmtList).Stmts))
allStmts = append(allStmts, Literal{`this := &` + className + `{}`}) // TODO also insert variable type into the scope
allStmts = append(allStmts, s.Stmt.(*stmt.StmtList).Stmts...)
allStmts = append(allStmts, Literal{`return this, nil`})
// Method body
funcStmt, err := convertFunctionCommon(s.Params, returnType, true /* always use ptr return */, allStmts)
if err != nil {
return "", parseErr{s, err}
}
memberFuncStmt := "func New" + className + funcStmt + "\n"
memberFuncs = append(memberFuncs, memberFuncStmt)
} else {
// Method body
funcStmt, err := convertFunctionCommon(s.Params, s.ReturnType, s.ReturnsRef, s.Stmt.(*stmt.StmtList).Stmts)
if err != nil {
return "", parseErr{s, err}
}
memberFuncStmt := "func (this *" + className + ") " + funcName + funcStmt + "\n"
memberFuncs = append(memberFuncs, memberFuncStmt)
}
default:
return "", parseErr{s, fmt.Errorf("Class '%s' contained unexpected AST node; expected PropertyList / ClassMethod", className)}
}
}
// Create struct typedef containing all explicit properties
ret += "type " + className + " struct {\n"
ret += "\t" + strings.Join(memberVars, "\n\t") + "\n"
ret += "}\n"
// Create all member functions
ret += strings.Join(memberFuncs, "\n\n")
// Done
return ret, nil
case *stmt.Function:
// Top-level function
// TODO parse doc comment
// FIXME is this the same as a closure?
funcName := n.FunctionName.(*node.Identifier).Value
// All top-level functions like this are public; ensure function name starts
// with an uppercase letter
funcName = toPublic(funcName)
// Convert body
funcStmt, err := convertFunctionCommon(n.Params, n.ReturnType, n.ReturnsRef, n.Stmts)
if err != nil {
return "", parseErr{n, err}
}
ret := "func " + funcName + funcStmt + "\n"
return ret, nil
case *stmt.Return:
child, err := convert(n.Expr)
if err != nil {
return "", parseErr{n, err}
}
ret := "return " + child + ", nil\n"
return ret, nil
case *stmt.Throw:
// throw (expr);
// Treat as an err return
// FIXME we don't know the default return type for the function we're in
// If the expr is a string literal, we can convert it to errors.New()
// Although we probably can't do this in general for stringly-typed expressions
if str, ok := n.Expr.(*scalar.String); ok {
return "return nil, errors.New(" + str.Value + ")\n", nil
}
child, err := convert(n.Expr)
if err != nil {
return "", parseErr{n, err}
}
return "return nil, " + child + "\n", nil
case *stmt.For:
var preinit, finit string
var err error = nil
if len(n.Init) == 0 {
// No initialiser in loop
} else if len(n.Init) == 1 {
finit, err = convert(n.Init[0])
if err != nil {
return "", parseErr{n, err}
}
} else {
// We can handle the case of multiple init statements by hoisting them
// above the loop. There is no negative impact on PHP scoping rules, but
// it may cause an extra local variable after the loop that may result
// in type mismatch (can be fixed by using an extra scope).
for _, initStmt := range n.Init {
singleInitStmt, err := convert(initStmt)
if err != nil {
return "", parseErr{initStmt, err}
}
preinit += singleInitStmt + "\n"
}
}
if len(n.Cond) != 1 {
return "", parseErr{n, fmt.Errorf("for loop can only have 1 cond clause, found %d", len(n.Cond))}
}
fcond, err := convert(n.Cond[0])
if err != nil {
return "", parseErr{n, err}
}
if len(n.Loop) != 1 {
return "", parseErr{n, fmt.Errorf("for loop can only have 1 loop clause, found %d", len(n.Loop))}
}
loopStmt := n.Loop[0]
if preinc, ok := loopStmt.(*expr.PreInc); ok {
// It's idiomatic to do for (,, ++i) but preincrement doesn't exist in Go
// Luckily for the case of a for loop, we can just swap it to postincrement
loopStmt = expr.NewPostInc(preinc.Variable)
} else if predec, ok := loopStmt.(*expr.PreDec); ok { // Likewise
loopStmt = expr.NewPostDec(predec.Variable)
}
floop, err := convert(loopStmt)
if err != nil {
return "", parseErr{n, err}
}
body, err := convert(convertToStmtList(n.Stmt))
if err != nil {
return "", parseErr{n, err}
}
return preinit + "for " + finit + "; " + fcond + "; " + floop + " " + body + "\n", nil
case *stmt.Foreach:
iterand, err := convert(n.Expr)
if err != nil {
return "", parseErr{n, err}
}
valueReceiver, err := convert(n.Variable)
if err != nil {
return "", parseErr{n, err}
}
keyReceiver := `_`
if n.Key != nil {
keyReceiver, err = convert(n.Key)
if err != nil {
return "", parseErr{n, err}
}
}
body, err := convert(convertToStmtList(n.Stmt))
if err != nil {
return "", parseErr{n, err}
}
return "for " + keyReceiver + ", " + valueReceiver + " := range " + iterand + " " + body + "\n", nil
case *stmt.While:
cond, err := convert(n.Cond)
if err != nil {
return "", parseErr{n, err}
}
body, err := convert(convertToStmtList(n.Stmt))
if err != nil {
return "", parseErr{n, err}
}
return "for " + cond + " " + body + "\n", nil
case *stmt.Do:
cond, err := convert(n.Cond)
if err != nil {
return "", parseErr{n, err}
}
bodyStmts := convertToStmtList(n.Stmt)
bodyStmts.Stmts = append(bodyStmts.Stmts, Literal{"if " + cond + "{\nbreak\n}"})
body, err := convert(bodyStmts)
if err != nil {
return "", parseErr{n, err}
}
return "for " + cond + " " + body + "\n", nil
case *stmt.Expression:
child, err := convert(n.Expr)
if err != nil {
return "", parseErr{n, err}
}
ret := child + "\n" // standalone expression statement
return ret, nil
case *stmt.Echo:
// Convert into fmt.Print
args := make([]string, 0, len(n.Exprs))
for _, expr := range n.Exprs {
exprGo, err := convert(expr)
if err != nil {
return "", parseErr{n, err}
}
args = append(args, exprGo)
}
return "fmt.Print(" + strings.Join(args, ", ") + ")\n", nil // newline - standalone statement
case *stmt.InlineHtml:
// Convert into fmt.Print
// TODO the result from strconv.Quote is not that nice to maintain if there are multiple newlines
// Should convert it into a backtick-delimeted multiline string instead
return "fmt.Print(" + strconv.Quote(n.Value) + ")\n", nil // newline - standalone statement
case *stmt.Nop:
return "", nil
//
// assign
//
case *assign.Assign:
lvalue, err := convert(n.Variable) // might be a more complicated lvalue
if err != nil {
return "", parseErr{n, err}
}
rvalue, err := convert(n.Expression)
if err != nil {
return "", parseErr{n, err}
}
// TODO this may need to use `:=`
return lvalue + " = " + rvalue, nil
//
// expr
//
case *expr.FunctionCall:
// All our generated functions return err, but this AST node may be in a single-rvalue context
// TODO do something more intelligent here
// We can't necessarily hoist the whole call, in case we are on the right-hand side of a && operator
funcName, err := resolveName(n.Function)
if err != nil {
return "", parseErr{n, err}
}
callParams, err := convertFuncCallArgsCommon(n.ArgumentList)
if err != nil {
return "", parseErr{n, err}
}
return funcName + callParams, nil // expr only, no semicolon/newline
case *expr.New:
// new foo(xx)
// Transparently convert to calling constructor function.
nn, err := resolveName(n.Class)
if err != nil {
return "", parseErr{n, err}
}
// FIXME if there is a package specifier embedded in the result name,
// the `New` will appear in the wrong place
nn = `New` + nn
// Convert resolved back to node.Name
transparentNameNode := name.NewName([]node.Node{name.NewNamePart(nn)})
return convert(expr.NewFunctionCall(transparentNameNode, n.ArgumentList))
case *expr.PreInc:
// """In Go, i++ is a statement, not an expression. So you can't use its value in another expression such as a function call."""
v, err := convert(n.Variable)
if err != nil {
return "", parseErr{n, err}
}
return "++" + v, nil
case *expr.PostInc:
// """In Go, i++ is a statement, not an expression. So you can't use its value in another expression such as a function call."""
v, err := convert(n.Variable)
if err != nil {
return "", parseErr{n, err}
}
return v + "++", nil
case *expr.MethodCall:
// Foo->Bar(Baz)
parent, err := convert(n.Variable)
if err != nil {
return "", parseErr{n, err}
}
child, err := convert(n.Method)
if err != nil {
return "", parseErr{n, err}
}
args, err := convertFuncCallArgsCommon(n.ArgumentList)
if err != nil {
return "", parseErr{n, err}
}
return parent + "." + child + args, nil
case *expr.PropertyFetch:
// Foo->Bar
parent, err := convert(n.Variable)
if err != nil {
return "", parseErr{n, err}
}
child, err := convert(n.Property)
if err != nil {
return "", parseErr{n, err}
}
return parent + "." + child, nil
case *expr.Variable:
return n.VarName.(*node.Identifier).Value, nil
case *expr.ConstFetch:
return resolveName(n.Constant)
//
// binary
//
case *binary.BitwiseAnd:
return convertBinaryCommon(n.Left, n.Right, `&`)
case *binary.BitwiseOr:
return convertBinaryCommon(n.Left, n.Right, `|`)
case *binary.BitwiseXor:
return convertBinaryCommon(n.Left, n.Right, `^`) // n.b. Go only supports this for integers; PHP also supports it for bools
case *binary.BooleanAnd:
return convertBinaryCommon(n.Left, n.Right, `&&`)
case *binary.BooleanOr:
return convertBinaryCommon(n.Left, n.Right, `||`)
//case *binary.Coalesce:
// TODO this can't be expressed in an rvalue context in Go (unless we create a typed closure..?)
case *binary.Concat:
return convertBinaryCommon(n.Left, n.Right, `+`) // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases
case *binary.Div:
return convertBinaryCommon(n.Left, n.Right, `/`) // PHP will upgrade ints to floats, Go won't
case *binary.Equal:
return convertBinaryCommon(n.Left, n.Right, `==`) // Type-lax equality comparator
case *binary.GreaterOrEqual:
return convertBinaryCommon(n.Left, n.Right, `>=`)
case *binary.Greater:
return convertBinaryCommon(n.Left, n.Right, `>`)
case *binary.Identical:
return convertBinaryCommon(n.Left, n.Right, `==`) // PHP uses `===`, Go is already type-safe
case *binary.LogicalAnd:
// This is the lexer token when using `and` in PHP. It's equivalent to
// `&&` but has different precedence
// e.g. $a = $b && $c ==> $a = ($b && $c)
// $a = $b and $c ==> ($a = $b) and $c
// So far, we are relying on the PHP parser having already having handled
// the precedence difference - transform to `&&` unconditionally
return convertBinaryCommon(n.Left, n.Right, `&&`)
case *binary.LogicalOr:
// As above
return convertBinaryCommon(n.Left, n.Right, `||`)
case *binary.LogicalXor:
// As above
return convertBinaryCommon(n.Left, n.Right, `^`) // n.b. Go only supports this for integers; PHP also supports it for bools
case *binary.Minus:
return convertBinaryCommon(n.Left, n.Right, `-`)
case *binary.Mod:
// Go doesn't have a built-in operator for mod - convert to a call to math.Mod()
rval, err := convert(n.Left)
if err != nil {
return "", parseErr{n, err}
}
modulo, err := convert(n.Right)
if err != nil {
return "", parseErr{n, err}
}
return `math.Mod(` + rval + `, ` + modulo + `)`, nil
case *binary.Mul:
return convertBinaryCommon(n.Left, n.Right, `*`)
case *binary.NotEqual:
return convertBinaryCommon(n.Left, n.Right, `!=`) // Type-lax equality comparator
case *binary.NotIdentical:
return convertBinaryCommon(n.Left, n.Right, `!=`) // PHP uses `!==`, Go is already type-safe
case *binary.Plus:
return convertBinaryCommon(n.Left, n.Right, `+`) // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases
case *binary.Pow:
// Go doesn't have a built-in operator for mod - convert to a call to math.Pow()
base, err := convert(n.Left)
if err != nil {
return "", parseErr{n, err}
}
exponent, err := convert(n.Right)
if err != nil {
return "", parseErr{n, err}
}
return `math.Pow(` + base + `, ` + exponent + `)`, nil
case *binary.ShiftLeft:
return convertBinaryCommon(n.Left, n.Right, `<<`)
case *binary.ShiftRight:
return convertBinaryCommon(n.Left, n.Right, `>>`)
case *binary.SmallerOrEqual:
return convertBinaryCommon(n.Left, n.Right, `<=`)
case *binary.Smaller:
return convertBinaryCommon(n.Left, n.Right, `<`)
case *binary.Spaceship:
// The spaceship operator returns -1 / 0 / 1 based on a gteq/leq comparison
// Go doesn't have a built-in spaceship operator
// The primary use case is in user-definded sort comparators, where Go
// uses bools instead ints anyway.
// Subtraction is a reasonable substitute
return convertBinaryCommon(n.Left, n.Right, `-`)
//
// scalar
//
case *scalar.Lnumber:
return n.Value, nil // number formats are compatible
case *scalar.String:
return n.Value, nil // It's already quoted in PHP format
// return strconv.Quote(n.Value), nil // Go source code quoting format
//
//
//
default:
return "", fmt.Errorf("unsupported node type %s", nodeTypeString(n))
}
}
// applyVisibilityModifier renames a function to use an upper/lowercase first
// letter based on PHP visibility modifiers.
func applyVisibilityModifier(funcName string, modifiers []node.Node) (string, error) {
isPublic := true
for _, mod := range modifiers {
ident, ok := mod.(*node.Identifier)
if !ok {
return "", parseErr{mod, fmt.Errorf("expected node.Identifier")}
}
switch ident.Value {
case "public":
isPublic = true
case "private", "protected":
isPublic = false
}
}
if isPublic {
return toPublic(funcName), nil
} else {
return toPrivate(funcName), nil
}
}
func toPublic(name string) string {
nFirst := name[0:1]
uFirst := strings.ToUpper(nFirst)
if nFirst == uFirst {
return name // avoid making more heap garbage
}
return uFirst + name[1:]
}
func toPrivate(name string) string {
nFirst := name[0:1]
lFirst := strings.ToLower(nFirst)
if nFirst == lFirst {
return name // avoid making more heap garbage
}
return lFirst + name[1:]
}
// resolveName turns a `*name.Name` node into a Go string.
func resolveName(n node.Node) (string, error) {
// TODO support namespace lookups
paramType := unknownVarType
if vt, ok := n.(*name.Name); ok {
if len(vt.Parts) != 1 {
return "", parseErr{n, fmt.Errorf("name has %d parts, expected 1", len(vt.Parts))}
}
paramType = vt.Parts[0].(*name.NamePart).Value
}
return paramType, nil
}
// convertToStmtList asserts that the node is either a StmtList or wraps it in a
// single-stmt StmtList if not.
// Loop bodies may be a StmtList if it is wrapped in {}, or a single statement
// if it is not; we want to enforce the use of {} for all loop bodies
func convertToStmtList(n node.Node) *stmt.StmtList {
if sl, ok := n.(*stmt.StmtList); ok {
return sl // It's already a StmtList
}
return stmt.NewStmtList([]node.Node{n})
}
func convertBinaryCommon(left, right node.Node, goBinaryOperator string) (string, error) {
// PHP uses + for numbers, `.` for strings; Go uses `+` in both cases
// Assume PHP/Go have the same associativity here
lhs, err := convert(left)
if err != nil {
return "", parseErr{left, err}
}
rhs, err := convert(right)
if err != nil {
return "", parseErr{right, err}
}
return "(" + lhs + " " + goBinaryOperator + " " + rhs + ")", nil
}
func convertFuncCallArgsCommon(args *node.ArgumentList) (string, error) {
callParams := make([]string, 0, len(args.Arguments))
for _, arg_ := range args.Arguments {
arg, ok := arg_.(*node.Argument)
if !ok {
return "", parseErr{arg_, fmt.Errorf("expected node.Argument")}
}
rvalue, err := convert(arg.Expr)
if err != nil {
return "", parseErr{arg, err}
}
if arg.IsReference {
rvalue = "&" + rvalue
}
if arg.Variadic {
rvalue = "..." + rvalue
}
callParams = append(callParams, rvalue)
}
return "(" + strings.Join(callParams, `, `) + ")", nil // expr only, no semicolon/newline
}
func convertFunctionCommon(params []node.Node, returnType node.Node, returnsRef bool, bodyStmts []node.Node) (string, error) {
// TODO scan function and see if it contains any return statements at all
// If not, then we only need an err return parameter, not anything else
funcParams := []string{}
for _, param := range params {
param, ok := param.(*node.Parameter) // shadow
if !ok {
return "", parseErr{param, fmt.Errorf("expected node.Parameter")}
}
// VariableType: might be nil for untyped parameters
paramType, err := resolveName(param.VariableType)
if err != nil {
return "", parseErr{param, err}
}
if param.ByRef {
paramType = "*" + paramType
}
if param.Variadic {
paramType = "..." + paramType
}
// Name
paramName := param.Variable.(*expr.Variable).VarName.(*node.Identifier).Value
funcParams = append(funcParams, paramName+" "+paramType)
}
// ReturnType
funcReturn, err := resolveName(returnType)
if err != nil {
return "", parseErr{returnType, err}
}
if returnsRef {
funcReturn = "*" + funcReturn
}
// Build function prototype
ret := "(" + strings.Join(funcParams, ", ") + ") (" + funcReturn + ", error) {\n"
// Recurse through body statements
for _, s := range bodyStmts {
bodyStmt, err := convert(s)
if err != nil {
return "", parseErr{s, err}
}
ret += bodyStmt + "\n"
}
// Done
// No extra trailing newline in case this is part of a large expression
ret += "}"
return ret, nil
}