Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Begin implementing a new AST library #342

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions buf.gen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: v2
managed:
enabled: true
override:
- file_option: go_package_prefix
value: 'github.com/bufbuild/protocompile/internal/gen'
plugins:
- remote: buf.build/protocolbuffers/go
out: internal/gen
opt: paths=source_relative
10 changes: 10 additions & 0 deletions buf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: v2
modules:
- path: proto
name: buf.build/bufbuild/pbc
lint:
use:
- STANDARD
breaking:
use:
- WIRE_JSON
156 changes: 156 additions & 0 deletions experimental/ast/builtin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
// Copyright 2020-2024 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ast

import "fmt"

const (
BuiltinUnknown Builtin = iota
BuiltinInt32
BuiltinInt64
BuiltinUInt32
BuiltinUInt64
BuiltinSInt32
BuiltinSInt64

BuiltinFloat
BuiltinDouble

BuiltinFixed32
BuiltinFixed64
BuiltinSFixed32
BuiltinSFixed64

BuiltinBool
BuiltinString
BuiltinBytes

// This corresponds to the builtin "type" map<K, V>.
BuiltinMap

// This corresponds to the builtin "constant" max, used in range expressions.
BuiltinMax

builtinCount

BuiltinFloat32 = BuiltinFloat
BuiltinFloat64 = BuiltinDouble
)

var (
builtinByName = map[string]Builtin{
"int32": BuiltinInt32,
"int64": BuiltinInt64,
"uint32": BuiltinUInt32,
"uint64": BuiltinUInt64,
"sint32": BuiltinSInt32,
"sint64": BuiltinSInt64,

"float": BuiltinFloat,
"double": BuiltinDouble,

"fixed32": BuiltinFixed32,
"fixed64": BuiltinFixed64,
"sfixed32": BuiltinSFixed32,
"sfixed64": BuiltinSFixed64,

"bool": BuiltinBool,
"string": BuiltinString,
"bytes": BuiltinBytes,

"map": BuiltinMap,
"max": BuiltinMax,
}

builtinNames = func() []string {
names := make([]string, builtinCount)
names[0] = "unknown"

for name, idx := range builtinByName {
names[idx] = name
}
return names
}()

// This is not used for syntactic analysis, since all keywords are contextual.
// This is only used for affecting how diagnostics are rendered.
//
// This is also not a complete list of keywords, only those we want to call out
// as special in Token.describe.
keywords = map[string]bool{
"syntax": true,
"edition": true,
"import": true,
"weak": true,
"public": true,
"package": true,

"option": true,
"message": true,
"enum": true,
"service": true,
"extend": true,
"oneof": true,

"extensions": true,
"reserved": true,
"to": true,

"rpc": true,
"returns": true,

"repeated": true,
"optional": true,
"required": true,
"group": true,
"stream": true,
}
)

// Builtin is one of the built-in Protobuf types.
type Builtin int8

// BuiltinByName looks up a builtin type by name.
//
// If name does not name a builtin, returns [BuiltinUnknown].
func BuiltinByName(name string) Builtin {
// The zero value is BuiltinUnknown.
return builtinByName[name]
}

// String implements [strings.Stringer] for Builtin.
func (b Builtin) String() string {
if int(b) < len(builtinNames) {
return builtinNames[int(b)]
}
return fmt.Sprintf("builtin%d", int(b))
}

// IsPrimitive returns if this builtin name refers to one of the primitive types.
func (b Builtin) IsPrimitive() bool {
switch b {
case BuiltinInt32, BuiltinInt64,
BuiltinUInt32, BuiltinUInt64,
BuiltinSInt32, BuiltinSInt64,
BuiltinFloat, BuiltinDouble,
BuiltinFixed32, BuiltinFixed64,
BuiltinSFixed32, BuiltinSFixed64,
BuiltinBool,
BuiltinString, BuiltinBytes:
return true
default:
return false
}
}
178 changes: 178 additions & 0 deletions experimental/ast/context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
// Copyright 2020-2024 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package ast

import (
"fmt"

"github.com/bufbuild/protocompile/experimental/report"
"github.com/bufbuild/protocompile/internal/arena"
)

// Context is where all of the book-keeping for the AST of a particular file is kept.
//
// Virtually all operations inside of package ast2 involve a Context. However, most of
// the exported types carry their Context with them, so you don't need to worry about
// passing it around.
type Context struct {
file *report.IndexedFile

// Storage for tokens.
stream []tokenImpl
syntheticTokens []tokenSynthetic

// This contains materialized literals for some tokens.
//
// Not all tokens will have an entry here; only those that have "unusual"
// representations. This means the lexer can deal with the complex parsing
// logic on our behalf in general, but common cases are re-parsed on-demand.
//
// All elements of this map are string, uint64, or float64.
literals map[rawToken]any

// Storage for the various node types.
decls decls
types types
exprs exprs

options arena.Arena[rawCompactOptions]
}

// Contextual is any AST type that carries a context (virtually all of them).
type Contextual interface {
// Context returns this types's [Context].
//
// Zero values of this type should return nil.
Context() *Context
}

// newContext creates a fresh context for a particular file.
func newContext(file report.File) *Context {
c := &Context{file: report.NewIndexedFile(file), literals: map[rawToken]any{}}
c.NewDeclBody(Token{}) // This is the rawBody for the whole file.
return c
}

// Parse parses a Protobuf file, and places any diagnostics encountered in report.
func Parse(file report.File, report *report.Report) File {
lexer := lexer{Context: newContext(file)}

report.Stage++
lexer.Lex(report)

report.Stage++
parse(report, lexer.Context)

report.Stage++
legalize(report, nil, lexer.Context.Root())

return lexer.Context.Root()
}

// Context implements [Contextual] for Context.
func (c *Context) Context() *Context {
return c
}

// Stream returns a cursor over the whole lexed token stream.
func (c *Context) Stream() *Cursor {
return &Cursor{
withContext: withContext{c},
start: 1,
end: rawToken(len(c.stream) + 1),
}
}

// Path returns the (alleged) file system path for this file.
//
// This path is not used for anything except for diagnostics.
func (c *Context) Path() string {
return c.file.File().Path
}

// Returns the full text of the file.
func (c *Context) Text() string {
return c.file.File().Text
}

// Root returns the root AST node for this context.
func (c *Context) Root() File {
// NewContext() sticks the root at the beginning of bodies for us.
return File{wrapDecl[DeclScope](1, c)}
}

// Tokens returns a flat slice over all of the non-synthetic tokens in this context,
// with no respect to nesting.
//
// You should probably use [Context.Stream] instead of this.
func (c *Context) Tokens() Slice[Token] {
return funcSlice[tokenImpl, Token]{
s: c.stream,
f: func(i int, _ *tokenImpl) Token { return rawToken(i + 1).With(c) },
}
}

// NOTE: Some methods of Context live in the context_*.go files. This is to
// reduce clutter in this file.

// panicIfNil panics if this context is nil.
//
// This is helpful for immediately panicking on function entry.
func (c *Context) panicIfNil() {
_ = c.file
}

// ours checks that a contextual value is owned by this context, and panics if not.
//
// Does not panic if that is nil or has a nil context. Panics if c is nil.
func (c *Context) panicIfNotOurs(that ...Contextual) {
c.panicIfNil()
for _, that := range that {
if that == nil {
continue
}

c2 := that.Context()
if c2 == nil || c2 == c {
continue
}
panic(fmt.Sprintf("protocompile/ast: attempt to mix different contexts: %p(%q) and %p(%q)", c, c.Path(), c2, c2.Path()))
}
}

// withContext is an embedable type that provides common operations involving
// a context, causing it to implement Contextual.
type withContext struct {
ctx *Context
}

// Context returns this type's associated [ast.Context].
//
// Returns `nil` if this is this type's zero value.
func (c withContext) Context() *Context {
return c.ctx
}

// Nil checks whether this is this type's zero value.
func (c withContext) Nil() bool {
return c.ctx == nil
}

// panicIfNil panics if this context is nil.
//
// This is helpful for immediately panicking on function entry.
func (c *withContext) panicIfNil() {
c.Context().panicIfNil()
}
Loading