lexer: parse fields with a state machine

This commit is contained in:
mappu 2023-12-11 19:32:37 +13:00
parent e7daf5e73c
commit afac87e1a9

107
lexer.go
View File

@ -31,6 +31,99 @@ func (l *lexer) Peek() (Token, error) {
return tok, nil
}
func (l *lexer) Fields(line string) ([]string, error) {
const (
STATE_START = 0
STATE_IN_WORD = 1
STATE_IN_QUOTED_STRING = 3
STATE_BACKSLASH = 4
STATE_LINE_COMMENT = 5
)
var (
ret = []string{}
state = STATE_START
buff = ""
)
for _, c := range line {
switch state {
case STATE_START:
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' {
continue
} else if c == ';' {
state = STATE_LINE_COMMENT
} else if c == '"' {
state = STATE_IN_QUOTED_STRING
} else {
buff = string(c)
state = STATE_IN_WORD
}
case STATE_IN_WORD:
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' {
ret = append(ret, buff)
buff = ""
state = STATE_START
} else if c == ';' {
ret = append(ret, buff)
buff = ""
state = STATE_LINE_COMMENT
} else {
buff += string(c)
}
case STATE_IN_QUOTED_STRING:
if c == '"' {
ret = append(ret, buff)
buff = ""
state = STATE_START
} else if c == '\\' {
state = STATE_BACKSLASH
} else {
buff += string(c)
}
case STATE_BACKSLASH:
if c == 'n' {
buff += "\n"
} else if c == 'r' {
buff += "\r"
} else if c == 't' {
buff += "\t"
} else if c == 's' {
buff += " "
} else if c == '\\' {
buff += `\`
} else {
return nil, fmt.Errorf("Unknown backslash escape sequence")
}
state = STATE_IN_QUOTED_STRING
case STATE_LINE_COMMENT:
if c == '\n' {
state = STATE_START
} // ignore all else
default:
panic("impossible state")
}
}
if state == STATE_IN_QUOTED_STRING {
return nil, fmt.Errorf("unterminated string literal")
}
if state == STATE_IN_WORD {
ret = append(ret, buff)
buff = ""
state = STATE_START
}
return ret, nil
}
func (l *lexer) Next() (Token, error) {
if l.peek != nil {
ret := l.peek
@ -46,21 +139,19 @@ func (l *lexer) Next() (Token, error) {
return nil, err
}
// Strip leading spaces
line = strings.TrimLeft(line, " \t\r\n")
//
// Strip trailing line-comments (;)
line, _, _ = strings.Cut(line, `;`)
fields, err := l.Fields(line)
if err != nil {
return nil, err
}
if len(line) == 0 {
if len(fields) == 0 {
// This line only contained comments
// Continue to the next line
return l.Next()
}
fields := strings.Fields(line)
// FIXME commas!?
switch strings.ToLower(fields[0]) {
case "section":
return SectionToken{fields[1]}, nil