supabase-cli/pkg/parser/state.go

185 lines
3.9 KiB
Go

package parser
import (
"bytes"
"strings"
"unicode"
"unicode/utf8"
)
const (
// Omit BEGIN to allow arbitrary whitespaces between BEGIN and ATOMIC keywords.
// This can fail if ATOMIC is used as column name because it is not a reserved
// keyword: https://www.postgresql.org/docs/current/sql-keywords-appendix.html
BEGIN_ATOMIC = "ATOMIC"
END_ATOMIC = "END"
)
type State interface {
// Return nil to emit token
Next(r rune, data []byte) State
}
// Initial state: ready to parse next token
type ReadyState struct{}
func (s *ReadyState) Next(r rune, data []byte) State {
switch r {
case '$':
offset := len(data) - utf8.RuneLen(r)
return &TagState{offset: offset}
case '\'':
fallthrough
case '"':
return &QuoteState{delimiter: r}
case '-':
return &CommentState{}
case '/':
return &BlockState{}
case '\\':
return &EscapeState{}
case ';':
// Emit token
return nil
case '(':
return &AtomicState{prev: s, delimiter: []byte{')'}}
case 'c':
fallthrough
case 'C':
offset := len(data) - len(BEGIN_ATOMIC)
if offset >= 0 && strings.EqualFold(string(data[offset:]), BEGIN_ATOMIC) {
return &AtomicState{prev: s, delimiter: []byte(END_ATOMIC)}
}
}
return s
}
// Opened a line comment
type CommentState struct{}
func (s *CommentState) Next(r rune, data []byte) State {
if r == '-' {
// No characters are escaped in comments, which is the same as dollar
return &DollarState{delimiter: []byte{'\n'}}
}
// Break out of comment state
state := &ReadyState{}
return state.Next(r, data)
}
// Opened a block comment
type BlockState struct {
depth int
}
func (s *BlockState) Next(r rune, data []byte) State {
const open = "/*"
const close = "*/"
window := data[len(data)-2:]
if bytes.Equal(window, []byte(open)) {
s.depth += 1
return s
}
if s.depth == 0 {
// Break out of block state
state := &ReadyState{}
return state.Next(r, data)
}
if bytes.Equal(window, []byte(close)) {
s.depth -= 1
if s.depth == 0 {
return &ReadyState{}
}
}
return s
}
// Opened a single quote ' or double quote "
type QuoteState struct {
delimiter rune
escape bool
}
func (s *QuoteState) Next(r rune, data []byte) State {
if s.escape {
// Preserve escaped quote ''
if r == s.delimiter {
s.escape = false
return s
}
// Break out of quote state
state := &ReadyState{}
return state.Next(r, data)
}
if r == s.delimiter {
s.escape = true
}
return s
}
// Opened a dollar quote, no characters are ever esacped.
type DollarState struct {
delimiter []byte
}
func (s *DollarState) Next(r rune, data []byte) State {
window := data[len(data)-len(s.delimiter):]
if bytes.Equal(window, s.delimiter) {
// Break out of dollar state
return &ReadyState{}
}
return s
}
// Opened a tag, ie. $tag$
type TagState struct {
offset int
}
func (s *TagState) Next(r rune, data []byte) State {
if r == '$' {
// Make a copy since the data slice may be overwritten
tag := data[s.offset:]
dollar := DollarState{
delimiter: make([]byte, len(tag)),
}
copy(dollar.delimiter, tag)
return &dollar
}
// Valid tag: https://www.postgresql.org/docs/current/sql-syntax-lexical.html
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
return s
}
// Break out of tag state
state := &ReadyState{}
return state.Next(r, data)
}
// Opened a \ escape
type EscapeState struct{}
func (s *EscapeState) Next(r rune, data []byte) State {
return &ReadyState{}
}
// Opened BEGIN ATOMIC function body
type AtomicState struct {
prev State
delimiter []byte
}
func (s *AtomicState) Next(r rune, data []byte) State {
// If we are in a quoted state, the current delimiter doesn't count.
if curr := s.prev.Next(r, data); curr != nil {
s.prev = curr
}
if _, ok := s.prev.(*ReadyState); ok {
window := data[len(data)-len(s.delimiter):]
// Treat delimiter as case insensitive
if strings.EqualFold(string(window), string(s.delimiter)) {
return &ReadyState{}
}
}
return s
}