php2go/node.go

package main

import (
	"fmt"
	"reflect"
	"strconv"

	//"strconv"
	"strings"

	"github.com/z7zmey/php-parser/node"
	"github.com/z7zmey/php-parser/node/expr"
	"github.com/z7zmey/php-parser/node/expr/assign"
	"github.com/z7zmey/php-parser/node/expr/binary"
	"github.com/z7zmey/php-parser/node/name"
	"github.com/z7zmey/php-parser/node/scalar"
	"github.com/z7zmey/php-parser/node/stmt"
)

func nodeTypeString(n node.Node) string {
	return reflect.TypeOf(n).String()
}

type parseErr struct {
	n        node.Node
	childErr error
}

func (pe parseErr) Error() string {
	return fmt.Sprintf("Parsing %s on line %d: %s", nodeTypeString(pe.n), pe.n.GetPosition().StartLine, pe.childErr)
}

func (pe parseErr) Unwrap() error {
	return pe.childErr
}

//

func convert(n_ node.Node) (string, error) {
	switch n := n_.(type) {

	//
	// node
	//

	case *node.Root:
		ret := "package main\n\n"

		// Hoist all declarations first, and put any top-level code into a generated main() function
		statements := []string{}

		for _, s := range n.Stmts {
			sm, err := convert(s)
			if err != nil {
				return "", parseErr{s, err}
			}

			switch s.(type) {
			case *stmt.Class, *stmt.Function:
				// Declaration - emit immediately (hoist)
				ret += sm + "\n"

			default:
				// Top-level function code - deter emission
				statements = append(statements, sm)
			}
		}

		// Emit deferred statements
		if len(statements) > 0 {
			ret += "func init() {\n"
			ret += "\t" + strings.Join(statements, "\n\t") + "\n"
			ret += "}\n"
		}

		return ret, nil

	case *node.Identifier:
		return n.Value, nil

	case Literal:
		return n.Value, nil

	//
	// stmt
	//

	case *stmt.StmtList:
		// TODO keep track of variable types within this scope

		ret := "{\n" // new variable scope
		for _, s := range n.Stmts {
			line, err := convert(s)
			if err != nil {
				return "", parseErr{s, err}
			}

			ret += line + "\n"
		}
		return ret + "}\n", nil

	case *stmt.Class:
		ret := ""

		className := n.ClassName.(*node.Identifier).Value
		memberVars := []string{}
		memberFuncs := []string{}

		// Walk all child nodes of the class
		for _, s_ := range n.Stmts {
			switch s := s_.(type) {

			case *stmt.PropertyList:
				// Class member variable

				// Doc comment
				// TODO scan for `@var {type}` strings

				// Name
				prop, ok := s.Properties[0].(*stmt.Property)
				if !ok {
					return "", parseErr{s, fmt.Errorf("unexpected propertylist structure")}
				}
				name := prop.Variable.(*expr.Variable).VarName.(*node.Identifier).Value

				// Type (unknown)
				memberType := unknownVarType

				// 'Modifiers' - protected public readonly ...
				// prop.Modifiers

				memberVars = append(memberVars, name+" "+memberType)

			case *stmt.ClassMethod:
				// Function name
				// If function is public/private/protected, set the first character to upper/lowercase
				funcName, err := applyVisibilityModifier(s.MethodName.(*node.Identifier).Value, s.Modifiers)
				if err != nil {
					return "", parseErr{s, err}
				}

				// Doc comment
				// TODO scan for `@param {type}` strings

				isConstructor := (strings.ToLower(funcName) == `__construct` || strings.ToLower(funcName) == strings.ToLower(className))

				if isConstructor {
					// Constructor functions get transformed to NewFoo() (*Foo, error)

					// We need to force the return type
					returnType := name.NewName([]node.Node{name.NewNamePart(className)})

					// We also need prefix + suffix statements
					allStmts := make([]node.Node, 0, 2+len(s.Stmt.(*stmt.StmtList).Stmts))
					allStmts = append(allStmts, Literal{`this := &` + className + `{}`}) // TODO also insert variable type into the scope
					allStmts = append(allStmts, s.Stmt.(*stmt.StmtList).Stmts...)
					allStmts = append(allStmts, Literal{`return this, nil`})

					// Method body
					funcStmt, err := convertFunctionCommon(s.Params, returnType, true /* always use ptr return */, allStmts)
					if err != nil {
						return "", parseErr{s, err}
					}
					memberFuncStmt := "func New" + className + funcStmt + "\n"
					memberFuncs = append(memberFuncs, memberFuncStmt)

				} else {

					// Method body
					funcStmt, err := convertFunctionCommon(s.Params, s.ReturnType, s.ReturnsRef, s.Stmt.(*stmt.StmtList).Stmts)
					if err != nil {
						return "", parseErr{s, err}
					}
					memberFuncStmt := "func (this *" + className + ") " + funcName + funcStmt + "\n"
					memberFuncs = append(memberFuncs, memberFuncStmt)

				}

			default:
				return "", parseErr{s, fmt.Errorf("Class '%s' contained unexpected AST node; expected PropertyList / ClassMethod", className)}
			}
		}

		// Create struct typedef containing all explicit properties
		ret += "type " + className + " struct {\n"
		ret += "\t" + strings.Join(memberVars, "\n\t") + "\n"
		ret += "}\n"

		// Create all member functions
		ret += strings.Join(memberFuncs, "\n\n")

		// Done
		return ret, nil

	case *stmt.Function:
		// Top-level function
		// TODO parse doc comment
		// FIXME is this the same as a closure?
		funcName := n.FunctionName.(*node.Identifier).Value

		// All top-level functions like this are public; ensure function name starts
		// with an uppercase letter
		funcName = toPublic(funcName)

		// Convert body
		funcStmt, err := convertFunctionCommon(n.Params, n.ReturnType, n.ReturnsRef, n.Stmts)
		if err != nil {
			return "", parseErr{n, err}
		}

		ret := "func " + funcName + funcStmt + "\n"
		return ret, nil

	case *stmt.Return:
		child, err := convert(n.Expr)
		if err != nil {
			return "", parseErr{n, err}
		}

		ret := "return " + child + ", nil\n"
		return ret, nil

	case *stmt.Throw:
		// throw (expr);
		// Treat as an err return
		// FIXME we don't know the default return type for the function we're in

		// If the expr is a string literal, we can convert it to errors.New()
		// Although we probably can't do this in general for stringly-typed expressions

		if str, ok := n.Expr.(*scalar.String); ok {
			return "return nil, errors.New(" + str.Value + ")\n", nil
		}

		child, err := convert(n.Expr)
		if err != nil {
			return "", parseErr{n, err}
		}

		return "return nil, " + child + "\n", nil

	case *stmt.For:

		var preinit, finit string
		var err error = nil

		if len(n.Init) == 0 {
			// No initialiser in loop

		} else if len(n.Init) == 1 {
			finit, err = convert(n.Init[0])
			if err != nil {
				return "", parseErr{n, err}
			}

		} else {
			// We can handle the case of multiple init statements by hoisting them
			// above the loop. There is no negative impact on PHP scoping rules, but
			// it may cause an extra local variable after the loop that may result
			// in type mismatch (can be fixed by using an extra scope).
			for _, initStmt := range n.Init {
				singleInitStmt, err := convert(initStmt)
				if err != nil {
					return "", parseErr{initStmt, err}
				}

				preinit += singleInitStmt + "\n"
			}
		}

		if len(n.Cond) != 1 {
			return "", parseErr{n, fmt.Errorf("for loop can only have 1 cond clause, found %d", len(n.Cond))}
		}
		fcond, err := convert(n.Cond[0])
		if err != nil {
			return "", parseErr{n, err}
		}

		if len(n.Loop) != 1 {
			return "", parseErr{n, fmt.Errorf("for loop can only have 1 loop clause, found %d", len(n.Loop))}
		}
		loopStmt := n.Loop[0]
		if preinc, ok := loopStmt.(*expr.PreInc); ok {
			// It's idiomatic to do for (,, ++i) but preincrement doesn't exist in Go
			// Luckily for the case of a for loop, we can just swap it to postincrement
			loopStmt = expr.NewPostInc(preinc.Variable)
		} else if predec, ok := loopStmt.(*expr.PreDec); ok { // Likewise
			loopStmt = expr.NewPostDec(predec.Variable)
		}

		floop, err := convert(loopStmt)
		if err != nil {
			return "", parseErr{n, err}
		}

		body, err := convert(convertToStmtList(n.Stmt))
		if err != nil {
			return "", parseErr{n, err}
		}

		return preinit + "for " + finit + "; " + fcond + "; " + floop + " " + body + "\n", nil

	case *stmt.Foreach:
		iterand, err := convert(n.Expr)
		if err != nil {
			return "", parseErr{n, err}
		}

		valueReceiver, err := convert(n.Variable)
		if err != nil {
			return "", parseErr{n, err}
		}

		keyReceiver := `_`
		if n.Key != nil {
			keyReceiver, err = convert(n.Key)
			if err != nil {
				return "", parseErr{n, err}
			}
		}

		body, err := convert(convertToStmtList(n.Stmt))
		if err != nil {
			return "", parseErr{n, err}
		}

		return "for " + keyReceiver + ", " + valueReceiver + " := range " + iterand + " " + body + "\n", nil

	case *stmt.While:
		cond, err := convert(n.Cond)
		if err != nil {
			return "", parseErr{n, err}
		}

		body, err := convert(convertToStmtList(n.Stmt))
		if err != nil {
			return "", parseErr{n, err}
		}

		return "for " + cond + " " + body + "\n", nil

	case *stmt.Do:
		cond, err := convert(n.Cond)
		if err != nil {
			return "", parseErr{n, err}
		}

		bodyStmts := convertToStmtList(n.Stmt)
		bodyStmts.Stmts = append(bodyStmts.Stmts, Literal{"if " + cond + "{\nbreak\n}"})

		body, err := convert(bodyStmts)
		if err != nil {
			return "", parseErr{n, err}
		}

		return "for " + cond + " " + body + "\n", nil

	case *stmt.Expression:
		child, err := convert(n.Expr)
		if err != nil {
			return "", parseErr{n, err}
		}

		ret := child + "\n" // standalone expression statement
		return ret, nil

	case *stmt.Echo:
		// Convert into fmt.Print
		args := make([]string, 0, len(n.Exprs))
		for _, expr := range n.Exprs {
			exprGo, err := convert(expr)
			if err != nil {
				return "", parseErr{n, err}
			}

			args = append(args, exprGo)
		}
		return "fmt.Print(" + strings.Join(args, ", ") + ")\n", nil // newline - standalone statement

	case *stmt.InlineHtml:
		// Convert into fmt.Print
		// TODO the result from strconv.Quote is not that nice to maintain if there are multiple newlines
		// Should convert it into a backtick-delimeted multiline string instead
		return "fmt.Print(" + strconv.Quote(n.Value) + ")\n", nil // newline - standalone statement

	case *stmt.Nop:
		return "", nil

	//
	// assign
	//

	case *assign.Assign:
		lvalue, err := convert(n.Variable) // might be a more complicated lvalue
		if err != nil {
			return "", parseErr{n, err}
		}

		rvalue, err := convert(n.Expression)
		if err != nil {
			return "", parseErr{n, err}
		}

		// TODO this may need to use `:=`
		return lvalue + " = " + rvalue, nil

	//
	// expr
	//

	case *expr.FunctionCall:
		// All our generated functions return err, but this AST node may be in a single-rvalue context
		// TODO do something more intelligent here
		// We can't necessarily hoist the whole call, in case we are on the right-hand side of a && operator
		funcName, err := resolveName(n.Function)
		if err != nil {
			return "", parseErr{n, err}
		}

		callParams, err := convertFuncCallArgsCommon(n.ArgumentList)
		if err != nil {
			return "", parseErr{n, err}
		}

		return funcName + callParams, nil // expr only, no semicolon/newline

	case *expr.New:
		// new foo(xx)
		// Transparently convert to calling constructor function.
		nn, err := resolveName(n.Class)
		if err != nil {
			return "", parseErr{n, err}
		}
		// FIXME if there is a package specifier embedded in the result name,
		// the `New` will appear in the wrong place
		nn = `New` + nn

		// Convert resolved back to node.Name
		transparentNameNode := name.NewName([]node.Node{name.NewNamePart(nn)})

		return convert(expr.NewFunctionCall(transparentNameNode, n.ArgumentList))

	case *expr.PreInc:
		// """In Go, i++ is a statement, not an expression. So you can't use its value in another expression such as a function call."""
		v, err := convert(n.Variable)
		if err != nil {
			return "", parseErr{n, err}
		}

		return "++" + v, nil

	case *expr.PostInc:
		// """In Go, i++ is a statement, not an expression. So you can't use its value in another expression such as a function call."""
		v, err := convert(n.Variable)
		if err != nil {
			return "", parseErr{n, err}
		}

		return v + "++", nil

	case *expr.MethodCall:
		// Foo->Bar(Baz)
		parent, err := convert(n.Variable)
		if err != nil {
			return "", parseErr{n, err}
		}

		child, err := convert(n.Method)
		if err != nil {
			return "", parseErr{n, err}
		}

		args, err := convertFuncCallArgsCommon(n.ArgumentList)
		if err != nil {
			return "", parseErr{n, err}
		}

		return parent + "." + child + args, nil

	case *expr.PropertyFetch:
		// Foo->Bar
		parent, err := convert(n.Variable)
		if err != nil {
			return "", parseErr{n, err}
		}

		child, err := convert(n.Property)
		if err != nil {
			return "", parseErr{n, err}
		}

		return parent + "." + child, nil

	case *expr.Variable:
		return n.VarName.(*node.Identifier).Value, nil

	case *expr.ConstFetch:
		return resolveName(n.Constant)

	//
	// binary
	//

	case *binary.BitwiseAnd:
		return convertBinaryCommon(n.Left, n.Right, `&`)

	case *binary.BitwiseOr:
		return convertBinaryCommon(n.Left, n.Right, `|`)

	case *binary.BitwiseXor:
		return convertBinaryCommon(n.Left, n.Right, `^`) // n.b. Go only supports this for integers; PHP also supports it for bools

	case *binary.BooleanAnd:
		return convertBinaryCommon(n.Left, n.Right, `&&`)

	case *binary.BooleanOr:
		return convertBinaryCommon(n.Left, n.Right, `||`)

	//case *binary.Coalesce:
	// TODO this can't be expressed in an rvalue context in Go (unless we create a typed closure..?)

	case *binary.Concat:
		return convertBinaryCommon(n.Left, n.Right, `+`) // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases

	case *binary.Div:
		return convertBinaryCommon(n.Left, n.Right, `/`) // PHP will upgrade ints to floats, Go won't

	case *binary.Equal:
		return convertBinaryCommon(n.Left, n.Right, `==`) // Type-lax equality comparator

	case *binary.GreaterOrEqual:
		return convertBinaryCommon(n.Left, n.Right, `>=`)

	case *binary.Greater:
		return convertBinaryCommon(n.Left, n.Right, `>`)

	case *binary.Identical:
		return convertBinaryCommon(n.Left, n.Right, `==`) // PHP uses `===`, Go is already type-safe

	case *binary.LogicalAnd:
		// This is the lexer token when using `and` in PHP. It's equivalent to
		// `&&` but has different precedence
		// e.g. $a = $b && $c   ==> $a = ($b && $c)
		//      $a = $b and $c  ==> ($a = $b) and $c
		// So far, we are relying on the PHP parser having already having handled
		// the precedence difference - transform to `&&` unconditionally
		return convertBinaryCommon(n.Left, n.Right, `&&`)

	case *binary.LogicalOr:
		// As above
		return convertBinaryCommon(n.Left, n.Right, `||`)

	case *binary.LogicalXor:
		// As above
		return convertBinaryCommon(n.Left, n.Right, `^`) // n.b. Go only supports this for integers; PHP also supports it for bools

	case *binary.Minus:
		return convertBinaryCommon(n.Left, n.Right, `-`)

	case *binary.Mod:
		// Go doesn't have a built-in operator for mod - convert to a call to math.Mod()
		rval, err := convert(n.Left)
		if err != nil {
			return "", parseErr{n, err}
		}

		modulo, err := convert(n.Right)
		if err != nil {
			return "", parseErr{n, err}
		}

		return `math.Mod(` + rval + `, ` + modulo + `)`, nil

	case *binary.Mul:
		return convertBinaryCommon(n.Left, n.Right, `*`)

	case *binary.NotEqual:
		return convertBinaryCommon(n.Left, n.Right, `!=`) // Type-lax equality comparator

	case *binary.NotIdentical:
		return convertBinaryCommon(n.Left, n.Right, `!=`) // PHP uses `!==`, Go is already type-safe

	case *binary.Plus:
		return convertBinaryCommon(n.Left, n.Right, `+`) // PHP uses + for numbers, `.` for strings; Go uses `+` in both cases

	case *binary.Pow:
		// Go doesn't have a built-in operator for mod - convert to a call to math.Pow()

		base, err := convert(n.Left)
		if err != nil {
			return "", parseErr{n, err}
		}

		exponent, err := convert(n.Right)
		if err != nil {
			return "", parseErr{n, err}
		}

		return `math.Pow(` + base + `, ` + exponent + `)`, nil

	case *binary.ShiftLeft:
		return convertBinaryCommon(n.Left, n.Right, `<<`)

	case *binary.ShiftRight:
		return convertBinaryCommon(n.Left, n.Right, `>>`)

	case *binary.SmallerOrEqual:
		return convertBinaryCommon(n.Left, n.Right, `<=`)

	case *binary.Smaller:
		return convertBinaryCommon(n.Left, n.Right, `<`)

	case *binary.Spaceship:
		// The spaceship operator returns -1 / 0 / 1 based on a gteq/leq comparison
		// Go doesn't have a built-in spaceship operator
		// The primary use case is in user-definded sort comparators, where Go
		//  uses bools instead ints anyway.
		// Subtraction is a reasonable substitute
		return convertBinaryCommon(n.Left, n.Right, `-`)

	//
	// scalar
	//

	case *scalar.Lnumber:
		return n.Value, nil // number formats are compatible

	case *scalar.String:
		return n.Value, nil // It's already quoted in PHP format
		// return strconv.Quote(n.Value), nil // Go source code quoting format

	//
	//
	//

	default:
		return "", fmt.Errorf("unsupported node type %s", nodeTypeString(n))
	}
}

// applyVisibilityModifier renames a function to use an upper/lowercase first
// letter based on PHP visibility modifiers.
func applyVisibilityModifier(funcName string, modifiers []node.Node) (string, error) {
	isPublic := true

	for _, mod := range modifiers {
		ident, ok := mod.(*node.Identifier)
		if !ok {
			return "", parseErr{mod, fmt.Errorf("expected node.Identifier")}
		}

		switch ident.Value {
		case "public":
			isPublic = true
		case "private", "protected":
			isPublic = false
		}

	}

	if isPublic {
		return toPublic(funcName), nil
	} else {
		return toPrivate(funcName), nil
	}
}

func toPublic(name string) string {
	nFirst := name[0:1]
	uFirst := strings.ToUpper(nFirst)
	if nFirst == uFirst {
		return name // avoid making more heap garbage
	}

	return uFirst + name[1:]
}

func toPrivate(name string) string {
	nFirst := name[0:1]
	lFirst := strings.ToLower(nFirst)
	if nFirst == lFirst {
		return name // avoid making more heap garbage
	}

	return lFirst + name[1:]
}

// resolveName turns a `*name.Name` node into a Go string.
func resolveName(n node.Node) (string, error) {
	// TODO support namespace lookups

	paramType := unknownVarType
	if vt, ok := n.(*name.Name); ok {
		if len(vt.Parts) != 1 {
			return "", parseErr{n, fmt.Errorf("name has %d parts, expected 1", len(vt.Parts))}
		}
		paramType = vt.Parts[0].(*name.NamePart).Value
	}

	return paramType, nil
}

// convertToStmtList asserts that the node is either a StmtList or wraps it in a
//  single-stmt StmtList if not.
// Loop bodies may be a StmtList if it is wrapped in {}, or a single statement
//  if it is not; we want to enforce the use of {} for all loop bodies
func convertToStmtList(n node.Node) *stmt.StmtList {
	if sl, ok := n.(*stmt.StmtList); ok {
		return sl // It's already a StmtList
	}

	return stmt.NewStmtList([]node.Node{n})
}

func convertBinaryCommon(left, right node.Node, goBinaryOperator string) (string, error) {

	// PHP uses + for numbers, `.` for strings; Go uses `+` in both cases
	// Assume PHP/Go have the same associativity here
	lhs, err := convert(left)
	if err != nil {
		return "", parseErr{left, err}
	}
	rhs, err := convert(right)
	if err != nil {
		return "", parseErr{right, err}
	}

	return "(" + lhs + " " + goBinaryOperator + " " + rhs + ")", nil
}

func convertFuncCallArgsCommon(args *node.ArgumentList) (string, error) {

	callParams := make([]string, 0, len(args.Arguments))
	for _, arg_ := range args.Arguments {
		arg, ok := arg_.(*node.Argument)
		if !ok {
			return "", parseErr{arg_, fmt.Errorf("expected node.Argument")}
		}

		rvalue, err := convert(arg.Expr)
		if err != nil {
			return "", parseErr{arg, err}
		}
		if arg.IsReference {
			rvalue = "&" + rvalue
		}
		if arg.Variadic {
			rvalue = "..." + rvalue
		}

		callParams = append(callParams, rvalue)
	}

	return "(" + strings.Join(callParams, `, `) + ")", nil // expr only, no semicolon/newline
}

func convertFunctionCommon(params []node.Node, returnType node.Node, returnsRef bool, bodyStmts []node.Node) (string, error) {

	// TODO scan function and see if it contains any return statements at all
	// If not, then we only need an err return parameter, not anything else

	funcParams := []string{}
	for _, param := range params {
		param, ok := param.(*node.Parameter) // shadow
		if !ok {
			return "", parseErr{param, fmt.Errorf("expected node.Parameter")}
		}

		// VariableType: might be nil for untyped parameters
		paramType, err := resolveName(param.VariableType)
		if err != nil {
			return "", parseErr{param, err}
		}
		if param.ByRef {
			paramType = "*" + paramType
		}
		if param.Variadic {
			paramType = "..." + paramType
		}

		// Name
		paramName := param.Variable.(*expr.Variable).VarName.(*node.Identifier).Value

		funcParams = append(funcParams, paramName+" "+paramType)
	}

	// ReturnType
	funcReturn, err := resolveName(returnType)
	if err != nil {
		return "", parseErr{returnType, err}
	}
	if returnsRef {
		funcReturn = "*" + funcReturn
	}

	// Build function prototype
	ret := "(" + strings.Join(funcParams, ", ") + ") (" + funcReturn + ", error) {\n"

	// Recurse through body statements
	for _, s := range bodyStmts {
		bodyStmt, err := convert(s)
		if err != nil {
			return "", parseErr{s, err}
		}

		ret += bodyStmt + "\n"
	}

	// Done
	// No extra trailing newline in case this is part of a large expression
	ret += "}"
	return ret, nil
}