Skip to content

Commit

Permalink
Merge pull request #80 from tminaorg/engine-starter-refactor
Browse files Browse the repository at this point in the history
small search refactor
  • Loading branch information
k4lizen authored Sep 26, 2023
2 parents e8e00a7 + 503c714 commit 9bb4616
Show file tree
Hide file tree
Showing 8 changed files with 429 additions and 109 deletions.
10 changes: 4 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@ brzaguza.*

.vscode/*

src/engines/yandex/site/*
src/engines/etools/site/*
src/engines/swisscows/site/*
src/engines/metager/site/*
src/engines/presearch/site/*
src/engines/*/site/*
!src/engines/_engines_test

logdump/*.html
log/*.log

# go generate
*_stringer.go
*_enumer.go
*_enumer.go
*_searcher.go
319 changes: 319 additions & 0 deletions generate/searcher/searcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,319 @@
package main

import (
"flag"
"fmt"
"go/ast"
"go/constant"
"go/format"
"go/token"
"go/types"
"log"
"os"
"path"
"strings"

"golang.org/x/tools/go/packages"
)

var (
typeName = flag.String("type", "", "type name; must be set")
output = flag.String("output", "", "output file name; default srcdir/<type>_searcher.go")
trimprefix = flag.String("trimprefix", "", "trim the `prefix` from the generated constant names")
linecomment = flag.Bool("linecomment", false, "use line comment text as printed text when present")
buildTags = flag.String("tags", "", "comma-separated list of build tags to apply")
packageName = flag.String("packagename", "", "name of the package for generated code; default current package")
enginesImport = flag.String("enginesimport", "github.com/tminaorg/brzaguza/src/engines", "source of the engines import, which is prefixed to imports for consts; default github.com/tminaorg/brzaguza/src/engines")
)

// Usage is a replacement usage function for the flags package.
func Usage() {
fmt.Fprintf(os.Stderr, "Usage of searcher:\n")
fmt.Fprintf(os.Stderr, "\tsearcher [flags] -type T [directory]\n")
fmt.Fprintf(os.Stderr, "\tsearcher [flags] -type T files... # Must be a single package\n")
fmt.Fprintf(os.Stderr, "Flags:\n")
flag.PrintDefaults()
}

func main() {
log.SetFlags(0)
log.SetPrefix("searcher: ")
flag.Usage = Usage
flag.Parse()
if len(*typeName) == 0 {
flag.Usage()
os.Exit(2)
}
/* ----------------------------------
//! Should be comma seperated list of type names, currently is only the first type name
---------------------------------- */
types := strings.Split(*typeName, ",")
var tags []string
if len(*buildTags) > 0 {
tags = strings.Split(*buildTags, ",")
}

// We accept either one directory or a list of files. Which do we have?
args := flag.Args()
if len(args) == 0 {
// Default: process whole package in current directory.
args = []string{"."}
}

// Parse the package once.
var dir string
g := Generator{
trimPrefix: *trimprefix,
lineComment: *linecomment,
}

if len(args) == 1 && isDirectoryFatal(args[0]) {
dir = args[0]
} else {
if len(tags) != 0 {
log.Fatal("-tags option applies only to directories, not when files are specified")
}
dir = path.Dir(args[0])
}

g.parsePackage(args, tags)

// Print the header and package clause.
g.Printf("// Code generated by \"searcher %s\"; DO NOT EDIT.\n", strings.Join(os.Args[1:], " "))
g.Printf("\n")
var pkgName string
if *packageName == "" {
pkgName = g.pkg.name
} else {
pkgName = *packageName
}
g.Printf("package %s", pkgName)
g.Printf("\n")
g.Printf("import \"%s\"\n", *enginesImport) // Used by all methods.

// Run generate for each type.
for _, typeName := range types {
g.generate(typeName)
}

// Format the output.
src := g.format()

// Write to file.
outputName := *output
if outputName == "" {
baseName := fmt.Sprintf("%s_searcher.go", types[0])
outputName = path.Join(dir, strings.ToLower(baseName))
}
err := os.WriteFile(outputName, src, 0644)
if err != nil {
log.Fatalf("writing output: %s", err)
}
}

func (g *Generator) Printf(format string, args ...interface{}) {
fmt.Fprintf(&g.buf, format, args...)
}

// parsePackage analyzes the single package constructed from the patterns and tags.
// parsePackage exits if there is an error.
func (g *Generator) parsePackage(patterns []string, tags []string) {
cfg := &packages.Config{
Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | packages.NeedSyntax,
Tests: false,
BuildFlags: []string{fmt.Sprintf("-tags=%s", strings.Join(tags, " "))},
Logf: g.logf,
}
pkgs, err := packages.Load(cfg, patterns...)
if err != nil {
log.Fatal(err)
}
if len(pkgs) != 1 {
log.Fatalf("error: %d packages matching %v", len(pkgs), strings.Join(patterns, " "))
}
g.addPackage(pkgs[0])
}

// addPackage adds a type checked Package and its syntax files to the generator.
func (g *Generator) addPackage(pkg *packages.Package) {
g.pkg = &Package{
name: pkg.Name,
defs: pkg.TypesInfo.Defs,
files: make([]*File, len(pkg.Syntax)),
}

for i, file := range pkg.Syntax {
g.pkg.files[i] = &File{
file: file,
pkg: g.pkg,
trimPrefix: g.trimPrefix,
lineComment: g.lineComment,
}
}
}

// generate produces imports and the NewEngineStarter method for the named type.
func (g *Generator) generate(typeName string) {
values := make(Values, 0, 100)
for _, file := range g.pkg.files {
// Set the state for this run of the walker.
file.typeName = typeName
file.values = nil
if file.file != nil {
ast.Inspect(file.file, file.genDecl)
values = append(values, file.values...)
}
}

if len(values) == 0 {
log.Fatalf("no values defined for type %s", typeName)
}

// Generate code for importing engines
for _, v := range values {
if validConst(v) {
g.Printf("import \"%s/%s\"\n", *enginesImport, strings.ToLower(v.originalName))
}
}

// Generate code that will fail if the constants change value.
g.Printf("func _() {\n")
g.Printf("\t// An \"invalid array index\" compiler error signifies that the constant values have changed.\n")
g.Printf("\t// Re-run the searcher command to generate them again.\n")
g.Printf("\tvar x [1]struct{}\n")
for _, v := range values {
origName := v.originalName
if *packageName != "" {
origName = fmt.Sprintf("%s.%s", g.pkg.name, v.originalName)
}
g.Printf("\t_ = x[%s - (%s)]\n", origName, v.str)
}
g.Printf("}\n")

g.buildOneRun(values, typeName)
}

// format returns the gofmt-ed contents of the Generator's buffer.
func (g *Generator) format() []byte {
src, err := format.Source(g.buf.Bytes())
if err != nil {
// Should never happen, but can arise when developing this code.
// The user can compile the output to see the error.
log.Printf("warning: internal error: invalid Go generated: %s", err)
log.Printf("warning: compile the package to analyze the error")
return g.buf.Bytes()
}
return src
}

func (v *Value) String() string {
return v.str
}

// genDecl processes one declaration clause.
func (f *File) genDecl(node ast.Node) bool {
decl, ok := node.(*ast.GenDecl)
if !ok || decl.Tok != token.CONST {
// We only care about const declarations.
return true
}
// The name of the type of the constants we are declaring.
// Can change if this is a multi-element declaration.
typ := ""
// Loop over the elements of the declaration. Each element is a ValueSpec:
// a list of names possibly followed by a type, possibly followed by values.
// If the type and value are both missing, we carry down the type (and value,
// but the "go/types" package takes care of that).
for _, spec := range decl.Specs {
vspec := spec.(*ast.ValueSpec) // Guaranteed to succeed as this is CONST.
if vspec.Type == nil && len(vspec.Values) > 0 {
// "X = 1". With no type but a value. If the constant is untyped,
// skip this vspec and reset the remembered type.
typ = ""

// If this is a simple type conversion, remember the type.
// We don't mind if this is actually a call; a qualified call won't
// be matched (that will be SelectorExpr, not Ident), and only unusual
// situations will result in a function call that appears to be
// a type conversion.
ce, ok := vspec.Values[0].(*ast.CallExpr)
if !ok {
continue
}
id, ok := ce.Fun.(*ast.Ident)
if !ok {
continue
}
typ = id.Name
}
if vspec.Type != nil {
// "X T". We have a type. Remember it.
ident, ok := vspec.Type.(*ast.Ident)
if !ok {
continue
}
typ = ident.Name
}
if typ != f.typeName {
// This is not the type we're looking for.
continue
}
// We now have a list of names (from one line of source code) all being
// declared with the desired type.
// Grab their names and actual values and store them in f.values.
for _, name := range vspec.Names {
if name.Name == "_" {
continue
}
// This dance lets the type checker find the values for us. It's a
// bit tricky: look up the object declared by the name, find its
// types.Const, and extract its value.
obj, ok := f.pkg.defs[name]
if !ok {
log.Fatalf("no value for constant %s", name)
}
info := obj.Type().Underlying().(*types.Basic).Info()
if info&types.IsInteger == 0 {
log.Fatalf("can't handle non-integer constant type %s", typ)
}
value := obj.(*types.Const).Val() // Guaranteed to succeed as this is CONST.
if value.Kind() != constant.Int {
log.Fatalf("can't happen: constant is not an integer %s", name)
}
i64, isInt := constant.Int64Val(value)
u64, isUint := constant.Uint64Val(value)
if !isInt && !isUint {
log.Fatalf("internal error: value of %s is not an integer: %s", name, value.String())
}
if !isInt {
u64 = uint64(i64)
}
v := Value{
originalName: name.Name,
value: u64,
signed: info&types.IsUnsigned == 0,
str: value.String(),
}
if c := vspec.Comment; f.lineComment && c != nil && len(c.List) == 1 {
v.name = strings.TrimSpace(c.Text())
} else {
v.name = strings.TrimPrefix(v.originalName, f.trimPrefix)
}
f.values = append(f.values, v)
}
}
return false
}

// buildOneRun generates the variables and NewEngineStarter func for a single run of contiguous values.
func (g *Generator) buildOneRun(values Values, typeName string) {
g.Printf("\n")
// The generated code is simple enough to write as a Printf format.
g.Printf("\nfunc NewEngineStarter() []EngineSearch {\n\tmm := make([]EngineSearch, %d)", len(values))
for _, v := range values {
if validConst(v) {
g.Printf("\n\tmm[%s.%s] = %s.Search", g.pkg.name, v.name, strings.ToLower(v.name))
}
}
g.Printf("\n\treturn mm\n}")
}
53 changes: 53 additions & 0 deletions generate/searcher/structs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package main

import (
"bytes"
"go/ast"
"go/types"
)

// Value represents a declared constant.
type Value struct {
originalName string // The name of the constant.
name string // The name with trimmed prefix.
// The value is stored as a bit pattern alone. The boolean tells us
// whether to interpret it as an int64 or a uint64; the only place
// this matters is when sorting.
// Much of the time the str field is all we need; it is printed
// by Value.String.
value uint64 // Will be converted to int64 when needed.
signed bool // Whether the constant is a signed type.
str string // The string representation given by the "go/constant" package.
}

type Values []Value

// Generator holds the state of the analysis. Primarily used to buffer
// the output for format.Source.
type Generator struct {
buf bytes.Buffer // Accumulated output.
pkg *Package // Package we are scanning.

trimPrefix string
lineComment bool

logf func(format string, args ...interface{}) // test logging hook; nil when not testing
}

// File holds a single parsed file and associated data.
type File struct {
pkg *Package // Package to which this file belongs.
file *ast.File // Parsed AST.
// These fields are reset for each type being generated.
typeName string // Name of the constant type.
values Values // Accumulator for constant values of that type.

trimPrefix string
lineComment bool
}

type Package struct {
name string
defs map[*ast.Ident]types.Object
files []*File
}
Loading

0 comments on commit 9bb4616

Please sign in to comment.