Skip to content

Commit

Permalink
Merge pull request Psiphon-Labs#706 from adotkhan/optimize-regen
Browse files Browse the repository at this point in the history
Optimize regen.NewGenerator performance
  • Loading branch information
rod-hynes authored Nov 13, 2024
2 parents 00b3ad8 + 92e6be9 commit 00aad9d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 10 deletions.
28 changes: 18 additions & 10 deletions psiphon/common/regen/internal_generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,21 +122,21 @@ func newGenerator(regexp *syntax.Regexp, args *GeneratorArgs) (generator *intern

// Generator that does nothing.
func noop(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, args.Debug), func() ([]byte, error) {
return []byte{}, nil
}}, nil
}

func opEmptyMatch(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
enforceOp(regexp, syntax.OpEmptyMatch)
return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, args.Debug), func() ([]byte, error) {
return []byte{}, nil
}}, nil
}

func opLiteral(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
enforceOp(regexp, syntax.OpLiteral)
return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, args.Debug), func() ([]byte, error) {
if args.ByteMode {
return runesToBytes(regexp.Rune...)
} else {
Expand All @@ -147,7 +147,7 @@ func opLiteral(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator,

func opAnyChar(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
enforceOp(regexp, syntax.OpAnyChar)
return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, args.Debug), func() ([]byte, error) {
if args.ByteMode {
return runesToBytes(rune(args.rng.Intn(math.MaxUint8 + 1)))
} else {
Expand All @@ -164,7 +164,7 @@ func opAnyCharNotNl(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenera
} else {
charClass = newCharClass(1, rune(math.MaxInt32))
}
return createCharClassGenerator(regexp.String(), charClass, args)
return createCharClassGenerator(regexpName(regexp, args.Debug), charClass, args)
}

func opQuest(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator, error) {
Expand Down Expand Up @@ -200,7 +200,7 @@ func opCharClass(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator
} else {
charClass = parseCharClass(regexp.Rune)
}
return createCharClassGenerator(regexp.String(), charClass, args)
return createCharClassGenerator(regexpName(regexp, args.Debug), charClass, args)
}

func opConcat(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenerator, error) {
Expand All @@ -211,7 +211,7 @@ func opConcat(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenerator
return nil, generatorError(err, "error creating generators for concat pattern /%s/", regexp)
}

return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, genArgs.Debug), func() ([]byte, error) {
var result bytes.Buffer
for _, generator := range generators {
gen, err := generator.Generate()
Expand All @@ -234,7 +234,7 @@ func opAlternate(regexp *syntax.Regexp, genArgs *GeneratorArgs) (*internalGenera

numGens := len(generators)

return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, genArgs.Debug), func() ([]byte, error) {
i := genArgs.rng.Intn(numGens)
generator := generators[i]
return generator.Generate()
Expand All @@ -257,7 +257,7 @@ func opCapture(regexp *syntax.Regexp, args *GeneratorArgs) (*internalGenerator,
// Group indices are 0-based, but index 0 is the whole expression.
index := regexp.Cap - 1

return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, args.Debug), func() ([]byte, error) {
return args.CaptureGroupHandler(index, regexp.Name, groupRegexp, generator, args)
}}, nil
}
Expand Down Expand Up @@ -312,7 +312,7 @@ func createRepeatingGenerator(regexp *syntax.Regexp, genArgs *GeneratorArgs, min
max = int(genArgs.MaxUnboundedRepeatCount)
}

return &internalGenerator{regexp.String(), func() ([]byte, error) {
return &internalGenerator{regexpName(regexp, genArgs.Debug), func() ([]byte, error) {
n := min + genArgs.rng.Intn(max-min+1)

var result bytes.Buffer
Expand All @@ -326,3 +326,11 @@ func createRepeatingGenerator(regexp *syntax.Regexp, genArgs *GeneratorArgs, min
return result.Bytes(), nil
}}, nil
}

// regexpName returns `regexp.String()` only if `debug` is true.
func regexpName(regexp *syntax.Regexp, debug bool) string {
if debug {
return regexp.String()
}
return ""
}
6 changes: 6 additions & 0 deletions psiphon/common/regen/regen.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,9 @@ type GeneratorArgs struct {
// ByteMode is not compatible with negated character classes (e.g. "[^a]").
ByteMode bool

// Debug is to used by the generator to log extra information.
Debug bool

// Used by generators.
rng *rand.Rand
}
Expand Down Expand Up @@ -209,6 +212,9 @@ func (a *GeneratorArgs) Rng() (*rand.Rand, error) {
// Generator generates random bytes or strings.
type Generator interface {
Generate() ([]byte, error)

// String returns a string representation of the generator for debugging.
// Value is empty string if Debug is false.
String() string
}

Expand Down

0 comments on commit 00aad9d

Please sign in to comment.