185 lines
3.9 KiB
Go
185 lines
3.9 KiB
Go
package parser
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
const (
|
|
// Omit BEGIN to allow arbitrary whitespaces between BEGIN and ATOMIC keywords.
|
|
// This can fail if ATOMIC is used as column name because it is not a reserved
|
|
// keyword: https://www.postgresql.org/docs/current/sql-keywords-appendix.html
|
|
BEGIN_ATOMIC = "ATOMIC"
|
|
END_ATOMIC = "END"
|
|
)
|
|
|
|
type State interface {
|
|
// Return nil to emit token
|
|
Next(r rune, data []byte) State
|
|
}
|
|
|
|
// Initial state: ready to parse next token
|
|
type ReadyState struct{}
|
|
|
|
func (s *ReadyState) Next(r rune, data []byte) State {
|
|
switch r {
|
|
case '$':
|
|
offset := len(data) - utf8.RuneLen(r)
|
|
return &TagState{offset: offset}
|
|
case '\'':
|
|
fallthrough
|
|
case '"':
|
|
return &QuoteState{delimiter: r}
|
|
case '-':
|
|
return &CommentState{}
|
|
case '/':
|
|
return &BlockState{}
|
|
case '\\':
|
|
return &EscapeState{}
|
|
case ';':
|
|
// Emit token
|
|
return nil
|
|
case '(':
|
|
return &AtomicState{prev: s, delimiter: []byte{')'}}
|
|
case 'c':
|
|
fallthrough
|
|
case 'C':
|
|
offset := len(data) - len(BEGIN_ATOMIC)
|
|
if offset >= 0 && strings.EqualFold(string(data[offset:]), BEGIN_ATOMIC) {
|
|
return &AtomicState{prev: s, delimiter: []byte(END_ATOMIC)}
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Opened a line comment
|
|
type CommentState struct{}
|
|
|
|
func (s *CommentState) Next(r rune, data []byte) State {
|
|
if r == '-' {
|
|
// No characters are escaped in comments, which is the same as dollar
|
|
return &DollarState{delimiter: []byte{'\n'}}
|
|
}
|
|
// Break out of comment state
|
|
state := &ReadyState{}
|
|
return state.Next(r, data)
|
|
}
|
|
|
|
// Opened a block comment
|
|
type BlockState struct {
|
|
depth int
|
|
}
|
|
|
|
func (s *BlockState) Next(r rune, data []byte) State {
|
|
const open = "/*"
|
|
const close = "*/"
|
|
window := data[len(data)-2:]
|
|
if bytes.Equal(window, []byte(open)) {
|
|
s.depth += 1
|
|
return s
|
|
}
|
|
if s.depth == 0 {
|
|
// Break out of block state
|
|
state := &ReadyState{}
|
|
return state.Next(r, data)
|
|
}
|
|
if bytes.Equal(window, []byte(close)) {
|
|
s.depth -= 1
|
|
if s.depth == 0 {
|
|
return &ReadyState{}
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Opened a single quote ' or double quote "
|
|
type QuoteState struct {
|
|
delimiter rune
|
|
escape bool
|
|
}
|
|
|
|
func (s *QuoteState) Next(r rune, data []byte) State {
|
|
if s.escape {
|
|
// Preserve escaped quote ''
|
|
if r == s.delimiter {
|
|
s.escape = false
|
|
return s
|
|
}
|
|
// Break out of quote state
|
|
state := &ReadyState{}
|
|
return state.Next(r, data)
|
|
}
|
|
if r == s.delimiter {
|
|
s.escape = true
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Opened a dollar quote, no characters are ever esacped.
|
|
type DollarState struct {
|
|
delimiter []byte
|
|
}
|
|
|
|
func (s *DollarState) Next(r rune, data []byte) State {
|
|
window := data[len(data)-len(s.delimiter):]
|
|
if bytes.Equal(window, s.delimiter) {
|
|
// Break out of dollar state
|
|
return &ReadyState{}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Opened a tag, ie. $tag$
|
|
type TagState struct {
|
|
offset int
|
|
}
|
|
|
|
func (s *TagState) Next(r rune, data []byte) State {
|
|
if r == '$' {
|
|
// Make a copy since the data slice may be overwritten
|
|
tag := data[s.offset:]
|
|
dollar := DollarState{
|
|
delimiter: make([]byte, len(tag)),
|
|
}
|
|
copy(dollar.delimiter, tag)
|
|
return &dollar
|
|
}
|
|
// Valid tag: https://www.postgresql.org/docs/current/sql-syntax-lexical.html
|
|
if unicode.IsLetter(r) || unicode.IsDigit(r) || r == '_' {
|
|
return s
|
|
}
|
|
// Break out of tag state
|
|
state := &ReadyState{}
|
|
return state.Next(r, data)
|
|
}
|
|
|
|
// Opened a \ escape
|
|
type EscapeState struct{}
|
|
|
|
func (s *EscapeState) Next(r rune, data []byte) State {
|
|
return &ReadyState{}
|
|
}
|
|
|
|
// Opened BEGIN ATOMIC function body
|
|
type AtomicState struct {
|
|
prev State
|
|
delimiter []byte
|
|
}
|
|
|
|
func (s *AtomicState) Next(r rune, data []byte) State {
|
|
// If we are in a quoted state, the current delimiter doesn't count.
|
|
if curr := s.prev.Next(r, data); curr != nil {
|
|
s.prev = curr
|
|
}
|
|
if _, ok := s.prev.(*ReadyState); ok {
|
|
window := data[len(data)-len(s.delimiter):]
|
|
// Treat delimiter as case insensitive
|
|
if strings.EqualFold(string(window), string(s.delimiter)) {
|
|
return &ReadyState{}
|
|
}
|
|
}
|
|
return s
|
|
}
|