diff --git a/.gitignore b/.gitignore index 046e895..57497da 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ data/*.tmp-wal simplify* tgpkg* docgen* +!cmd/** .tup diff --git a/cmd/docgen/docgen.go b/cmd/docgen/docgen.go new file mode 100644 index 0000000..c24eaf8 --- /dev/null +++ b/cmd/docgen/docgen.go @@ -0,0 +1,225 @@ +package main + +import ( + "compress/gzip" + _ "embed" + "flag" + "fmt" + "html/template" + "io" + "os" + "path/filepath" + "strings" +) + +type CompressOpts struct { + Name string + Simplify []float64 + SimplifyMinPoints int + MinPrecXY int +} + +type RenderOpts struct { + Places []Place +} + +type Place struct { + Name string +} + +type Readme struct { + Parameters []CompressOpts + LargePlaces []string + SmallPlaces []string + Datasets []Dataset +} + +type Dataset struct { + Name string + Variants []Variant + PreviewNames []string + Render RenderOpts +} + +type Variant struct { + Name string + Fullname string + Path string + Size string + Previews []Preview +} + +type Preview struct { + Name string + Path string +} + +type Featured struct { + Name string + Source string + Desc string +} + +type WriteCounter struct { + Total int64 +} + +func (wc *WriteCounter) Write(p []byte) (int, error) { + n := len(p) + wc.Total += int64(n) + return n, nil +} + +func main() { + + var templatePath string + var rootDir string + var dataDir string + var dataUri string + var output string + + flag.StringVar(&templatePath, "template", "", "path to the template file") + flag.StringVar(&rootDir, "root", "./", "root directory") + flag.StringVar(&dataDir, "data", rootDir+"data/", "data directory") + flag.StringVar(&dataUri, "download", rootDir+"data/", "download uri prefix") + flag.StringVar(&output, "output", rootDir+"README.md", "output file") + flag.Parse() + + if templatePath == "" { + panic("template path not set") + } + + templateStr, err := os.ReadFile(templatePath) + if err != nil { + panic(err) + } + + gzipCache := map[string]int64{} + + t, err := template. + New("readme"). + Funcs(template.FuncMap{ + "filesize": func(path string) int64 { + stat, err := os.Stat(rootDir + path) + if err != nil { + return 0 + // return "❓" + } + return stat.Size() + }, + "gzipfilesize": func(path string) int64 { + if size, ok := gzipCache[path]; ok { + return size + } + + f, err := os.Open(rootDir + path) + if err != nil { + return 0 + } + defer f.Close() + + wc := &WriteCounter{} + gw := gzip.NewWriter(wc) + if _, err = io.Copy(gw, f); err != nil { + return 0 + } + gw.Close() + gzipCache[path] = wc.Total + return wc.Total + }, + "kb": func(b int64) string { + return fmt.Sprintf("%d\u202FKB", b/1000) + }, + "variants": func(name, glob string, previews []string, suffix string) []Variant { + return getVariants(name, dataDir+name+glob+".gpkg", previews, suffix) + }, + "gpkg": func(name string) string { + return name + ".gpkg" + }, + "download": func(name string) string { + return dataUri + name + }, + "local": func(name string) string { + return dataDir + name + }, + "percent": func(a, b int64) string { + frac := float64(a) / float64(b) + return fmt.Sprintf("%.1f%%", frac*100) + }, + "featured": func(name, source, desc string) Featured { + return Featured{ + Name: name, + Source: source, + Desc: desc, + } + }, + }). + Parse(string(templateStr)) + + if err != nil { + panic(err) + } + + f, err := os.Create(output) + if err != nil { + panic(err) + } + defer f.Close() + + f.WriteString("\n\n") + + readme := Readme{ + LargePlaces: []string{ + "world", + "europe", + "africa", + "usa", + "japan", + }, + SmallPlaces: []string{ + "world", + "berlin", + "nyc", + "tokyo", + "ljubljana", + }, + } + + err = t.Execute(f, readme) + if err != nil { + panic(err) + } +} + +func getVariants(name, glob string, previewNames []string, previewSuffix string) []Variant { + files, err := filepath.Glob(glob) + if err != nil { + panic(err) + } + var variants []Variant + for _, file := range files { + base := filepath.Base(file) + noext := strings.TrimSuffix(base, filepath.Ext(file)) + suffix := strings.TrimPrefix(noext, name+"_") + dir := filepath.Dir(file) + pbase := name + "_" + previewSuffix + suffix + previews := []Preview{} + for _, place := range previewNames { + pfile := filepath.Join(dir, pbase+"_"+place+".png") + if _, err := os.Stat(pfile); os.IsNotExist(err) { + continue + } + previews = append(previews, Preview{ + Name: place, + Path: filepath.ToSlash(pfile), + }) + } + variant := Variant{ + Name: suffix, + Fullname: noext, + Previews: previews, + } + variants = append(variants, variant) + } + return variants +} diff --git a/cmd/simplify/simplify.go b/cmd/simplify/simplify.go new file mode 100644 index 0000000..13863ec --- /dev/null +++ b/cmd/simplify/simplify.go @@ -0,0 +1,292 @@ +package main + +import ( + "bytes" + "context" + _ "embed" + "flag" + "fmt" + "io" + "os/exec" + "path/filepath" + "runtime" + "strconv" + "strings" + "sync" + + "github.com/peterstace/simplefeatures/geom" + "github.com/smilyorg/tinygpkg/binary" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +type Opts struct { + Levels []float64 + MinPoints int +} + +type simplifyJob struct { + name string + fid int64 + wkb []byte + h binary.Header +} + +type Result struct { + debug string + usedSimplify float64 + originalSize int + originalPoints int + simplifiedPoints int +} + +type writeJob struct { + name string + fid int64 + wkb []byte + h binary.Header + info Result +} + +func simplifier(wg *sync.WaitGroup, in <-chan simplifyJob, out chan<- writeJob, opts Opts) { + for job := range in { + simplified, info, err := simplifyGeometry(job.wkb, opts) + if err != nil { + panic(err) + } + if simplified == nil { + continue + } + out <- writeJob{job.name, job.fid, simplified, job.h, info} + } + wg.Done() +} + +func writer(wg *sync.WaitGroup, writec *sqlite.Conn, table string, in <-chan writeJob) { + writes := writec.Prep(fmt.Sprintf(` + UPDATE %s + SET geom = ? + WHERE fid = ?`, + table, + )) + for job := range in { + fid := job.fid + g := job.h + wkb := job.wkb + info := job.info + + g.SetEnvelopeContentsIndicatorCode(binary.NoEnvelope) + + // Write simplified + w := new(bytes.Buffer) + g.Write(w) + io.Copy(w, bytes.NewReader(wkb)) + + buf := w.Bytes() + writes.BindBytes(1, buf) + writes.BindInt64(2, fid) + _, err := writes.Step() + if err != nil { + panic(fmt.Errorf("unable to write twkb: %s", err.Error())) + } + err = writes.Reset() + if err != nil { + panic(fmt.Errorf("unable to reset write statement: %s", err.Error())) + } + + fmt.Printf( + "simplify %10s fid %6d %.0e simplify %6d to %6d points %7d to %7d bytes %4.0f%% at %6d bytes written %s\n", + job.name, fid, info.usedSimplify, info.originalPoints, info.simplifiedPoints, info.originalSize, len(wkb), 100.*float32(len(wkb))/float32(info.originalSize), len(buf), info.debug, + ) + } + wg.Done() +} + +func simplify(name string, path string, table string, opts Opts) error { + pool, err := sqlitex.Open(path, 0, 2) + if err != nil { + return err + } + defer pool.Close() + + readc := pool.Get(context.Background()) + defer pool.Put(readc) + + reads := readc.Prep(fmt.Sprintf(` + SELECT fid, geom + FROM %s`, + table, + )) + defer reads.Reset() + + writec := pool.Get(context.Background()) + defer pool.Put(writec) + + err = sqlitex.ExecuteScript(writec, fmt.Sprintf(` + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update1; + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update2; + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update3; + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update4; + `, table), nil) + if err != nil { + return err + } + + sqlitex.Execute(writec, "BEGIN TRANSACTION;", nil) + + simplifyChan := make(chan simplifyJob, 10) + writeChan := make(chan writeJob, 10) + cwg := &sync.WaitGroup{} + for i := 0; i < runtime.NumCPU(); i++ { + cwg.Add(1) + go simplifier(cwg, simplifyChan, writeChan, opts) + } + wwg := &sync.WaitGroup{} + wwg.Add(1) + go writer(wwg, writec, table, writeChan) + + for { + if exists, err := reads.Step(); err != nil { + return fmt.Errorf("error listing geometry: %s", err.Error()) + } else if !exists { + break + } + + fid := reads.ColumnInt64(0) + r := reads.ColumnReader(1) + g, err := binary.Read(r) + if err != nil { + return fmt.Errorf("error reading gpkg: %s", err.Error()) + } + + if g.Empty() { + fmt.Printf("simplify %s fid %6d empty, skipping\n", table, fid) + continue + } + + wkb, err := io.ReadAll(r) + if err != nil { + return fmt.Errorf("error reading geometry: %s", err.Error()) + } + + simplifyChan <- simplifyJob{ + name: name, + fid: fid, + wkb: wkb, + h: *g, + } + } + + close(simplifyChan) + cwg.Wait() + close(writeChan) + wwg.Wait() + + err = sqlitex.Execute(writec, "COMMIT;", nil) + if err != nil { + return fmt.Errorf("unable to commit: %s", err.Error()) + } + + err = sqlitex.Execute(writec, "VACUUM;", nil) + if err != nil { + return fmt.Errorf("unable to vacuum: %s", err.Error()) + } + return nil +} + +func simplifyGeometry(wkb []byte, opts Opts) (simplified []byte, r Result, err error) { + r.originalSize = len(wkb) + + gm, err := geom.UnmarshalWKB(wkb) + if err != nil { + return nil, r, fmt.Errorf("error unmarshalling wkb: %s", err.Error()) + } + + var extra []string + + r.originalPoints = gm.DumpCoordinates().Length() + r.simplifiedPoints = r.originalPoints + r.usedSimplify = 0.0 + if r.originalPoints < opts.MinPoints { + extra = append(extra, "not simplified min points") + } else { + si := 0 + for ; si < len(opts.Levels); si++ { + r.usedSimplify = opts.Levels[si] + gms, err := gm.Simplify(r.usedSimplify) + if err == nil { + points := gms.DumpCoordinates().Length() + if points >= 3 { + gm = gms + r.simplifiedPoints = points + break + } + extra = append(extra, "not simplified collapse") + break + } + } + if si == len(opts.Levels) { + extra = append(extra, "not simplified max level") + } else if si > 0 { + extra = append(extra, "simplify fallback") + } + } + + simplified = gm.AsBinary() + r.debug = strings.Join(extra, " ") + return +} + +func tempCopy(src string, dst string) (string, func(), error) { + tmp := dst + ".tmp" + err := exec.Command("cp", src, tmp).Run() + if err != nil { + return tmp, nil, err + } + + return tmp, func() { + exec.Command("mv", tmp, dst).Run() + }, nil +} + +func main() { + table := flag.String("table", "ne_110m_admin_0_countries", "name of the table to simplify") + levelsString := flag.String("levels", "1,0.1,0.01,0.001", "comma-separated list of simplification levels") + minPoints := flag.Int("minpoints", 20, "minimum number of points to simplify") + output := flag.String("o", "", "output file name") + flag.Parse() + + levels := make([]float64, 0) + for _, level := range strings.Split(*levelsString, ",") { + if levelFloat, err := strconv.ParseFloat(level, 64); err == nil { + levels = append(levels, levelFloat) + } + } + + opts := Opts{ + Levels: levels, + MinPoints: *minPoints, + } + + if len(flag.Args()) == 0 { + fmt.Println("Error: path to the geopackage file is required") + return + } + + path := flag.Arg(0) + + if *output == "" { + fmt.Println("Error: output file name is required") + return + } + + basename := filepath.Base(*output) + + tmp, move, err := tempCopy(path, *output) + if err != nil { + fmt.Println("Error: unable to copy file") + return + } + defer move() + simplify(basename, tmp, *table, opts) +} diff --git a/cmd/tgpkg/tgpkg.go b/cmd/tgpkg/tgpkg.go new file mode 100644 index 0000000..2b3dd7c --- /dev/null +++ b/cmd/tgpkg/tgpkg.go @@ -0,0 +1,427 @@ +package main + +import ( + "bytes" + "context" + _ "embed" + "flag" + "fmt" + "io" + "os/exec" + "path/filepath" + "runtime" + "strings" + "sync" + + "github.com/peterstace/simplefeatures/geom" + "github.com/smilyorg/tinygpkg/binary" + "zombiezen.com/go/sqlite" + "zombiezen.com/go/sqlite/sqlitex" +) + +func Build() error { + println("hello world") + return nil +} + +const OUTPUT_DIR = "./" +const DATA_DIR = OUTPUT_DIR + "data/" +const NATURAL_EARTH_URL = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/117488dc884bad03366ff727eca013e434615127/geojson/" +const GEOBOUNDARIES_URL = "https://github.com/wmgeolab/geoBoundaries/raw/742b9ae7d6a57e57fe6d47f29343bc1081a8f09d/releaseData/" +const GDAL_DOCKER_IMAGE = "ghcr.io/osgeo/gdal:alpine-normal-3.7.1" +const MIN_PRECXY = -8 +const MAX_PRECXY = +7 + +type Opts struct { + MinPrecXY int + MaxPrecXY int +} + +type compressJob struct { + fid int64 + wkb []byte + h binary.Header +} + +type compressInfo struct { + debug string + usedPrecXY int + originalSize int +} + +type writeJob struct { + name string + fid int64 + twkb []byte + h binary.Header + info compressInfo +} + +func compressor(name string, wg *sync.WaitGroup, in <-chan compressJob, out chan<- writeJob, opts Opts) { + for job := range in { + twkb, info, err := compressGeometry(job.wkb, opts) + if err != nil { + panic(err) + } + if twkb == nil { + continue + } + out <- writeJob{name, job.fid, twkb, job.h, info} + } + wg.Done() +} + +func writer(wg *sync.WaitGroup, writec *sqlite.Conn, table string, in <-chan writeJob) { + writes := writec.Prep(fmt.Sprintf(` + UPDATE %s + SET geom = ? + WHERE fid = ?`, + table, + )) + for job := range in { + fid := job.fid + g := job.h + twkb := job.twkb + info := job.info + + g.SetType(binary.ExtendedType) + g.ExtensionCode = binary.ExtensionTWKB + g.SetEnvelopeContentsIndicatorCode(binary.NoEnvelope) + + // Write TWKB + w := new(bytes.Buffer) + g.Write(w) + io.Copy(w, bytes.NewReader(twkb)) + + buf := w.Bytes() + writes.BindBytes(1, buf) + writes.BindInt64(2, fid) + _, err := writes.Step() + if err != nil { + panic(fmt.Errorf("unable to write twkb: %s", err.Error())) + } + err = writes.Reset() + if err != nil { + panic(fmt.Errorf("unable to reset write statement: %s", err.Error())) + } + + fmt.Printf( + "compress %10s fid %6d %7d wkb bytes %7d twkb bytes %4.0f%% at %d precXY %6d bytes written %s\n", + job.name, fid, info.originalSize, len(twkb), 100.*float32(len(twkb))/float32(info.originalSize), info.usedPrecXY, len(buf), info.debug, + ) + } + wg.Done() +} + +func compressGeopackage(name, gpkgPath, tgpkgPath, table string, opts Opts) error { + tmp, move, err := tempCopy(gpkgPath, tgpkgPath) + if err != nil { + return err + } + defer move() + pool, err := sqlitex.Open(tmp, 0, 2) + if err != nil { + return err + } + defer pool.Close() + + readc := pool.Get(context.Background()) + defer pool.Put(readc) + + reads := readc.Prep(fmt.Sprintf(` + SELECT fid, geom + FROM %s`, + table, + )) + defer reads.Reset() + + writec := pool.Get(context.Background()) + defer pool.Put(writec) + + err = sqlitex.ExecuteScript(writec, fmt.Sprintf(` + INSERT OR IGNORE INTO gpkg_extensions VALUES ('%[1]s', 'geom', 'mlunar_twkb', 'https://github.com/SmilyOrg/tinygpkg', 'read-write'); + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update1; + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update2; + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update3; + DROP TRIGGER IF EXISTS rtree_%[1]s_geom_update4; + `, table), nil) + if err != nil { + return err + } + + sqlitex.Execute(writec, "BEGIN TRANSACTION;", nil) + + compressChan := make(chan compressJob, 10) + writeChan := make(chan writeJob, 10) + cwg := &sync.WaitGroup{} + for i := 0; i < runtime.NumCPU(); i++ { + cwg.Add(1) + go compressor(name, cwg, compressChan, writeChan, opts) + } + wwg := &sync.WaitGroup{} + wwg.Add(1) + go writer(wwg, writec, table, writeChan) + + for { + if exists, err := reads.Step(); err != nil { + return fmt.Errorf("error listing geometry: %s", err.Error()) + } else if !exists { + break + } + + fid := reads.ColumnInt64(0) + r := reads.ColumnReader(1) + g, err := binary.Read(r) + if err != nil { + return fmt.Errorf("error reading gpkg: %s", err.Error()) + } + + if g.Empty() { + fmt.Printf("compress fid %6d empty, skipping\n", fid) + continue + } + + if g.Type() != binary.StandardType { + fmt.Printf("compress fid %6d non-standard geometry, skipping\n", fid) + continue + } + + wkb, err := io.ReadAll(r) + if err != nil { + return fmt.Errorf("error reading geometry: %s", err.Error()) + } + + compressChan <- compressJob{ + fid: fid, + wkb: wkb, + h: *g, + } + } + + close(compressChan) + cwg.Wait() + close(writeChan) + wwg.Wait() + + err = sqlitex.Execute(writec, "COMMIT;", nil) + if err != nil { + return fmt.Errorf("unable to commit: %s", err.Error()) + } + + err = sqlitex.Execute(writec, "VACUUM;", nil) + if err != nil { + return fmt.Errorf("unable to vacuum: %s", err.Error()) + } + return nil +} + +func compressGeometry(wkb []byte, opts Opts) (twkb []byte, info compressInfo, err error) { + info.originalSize = len(wkb) + + gm, err := geom.UnmarshalWKB(wkb) + if err != nil { + return nil, info, fmt.Errorf("error unmarshalling wkb: %s", err.Error()) + } + + var extra []string + + info.usedPrecXY = opts.MinPrecXY + for ; info.usedPrecXY <= opts.MaxPrecXY; info.usedPrecXY++ { + twkb, err = geom.MarshalTWKB(gm, info.usedPrecXY) + if err != nil { + return nil, info, fmt.Errorf("error marshalling twkb: %s", err.Error()) + } + + _, err = geom.UnmarshalTWKB(twkb) + if err == nil { + break + } + } + + if info.usedPrecXY >= opts.MaxPrecXY { + extra = append(extra, "unable to compress, skipping") + twkb = nil + } else if info.usedPrecXY != opts.MinPrecXY { + extra = append(extra, "roundtrip fallback") + } + + info.debug = strings.Join(extra, " ") + return twkb, info, nil +} + +func decompressGeopackage(name, tgpkgPath, gpkgPath, table string) error { + tmp, move, err := tempCopy(tgpkgPath, gpkgPath) + if err != nil { + return err + } + defer move() + pool, err := sqlitex.Open(tmp, 0, 2) + if err != nil { + return err + } + defer pool.Close() + + readc := pool.Get(context.Background()) + defer pool.Put(readc) + + reads := readc.Prep(fmt.Sprintf(` + SELECT fid, geom + FROM %s`, + table, + )) + defer reads.Reset() + + writec := pool.Get(context.Background()) + defer pool.Put(writec) + + writes := writec.Prep(fmt.Sprintf(` + UPDATE %s + SET geom = ? + WHERE fid = ?`, + table, + )) + + sqlitex.Execute(writec, "BEGIN TRANSACTION;", nil) + + for { + if exists, err := reads.Step(); err != nil { + return fmt.Errorf("error listing geometry: %s", err.Error()) + } else if !exists { + break + } + + fid := reads.ColumnInt64(0) + r := reads.ColumnReader(1) + g, err := binary.Read(r) + if err != nil { + return fmt.Errorf("unable to read gpkg: %w", err) + } + + if g.Empty() { + fmt.Printf("decompress fid %6d empty, skipping\n", fid) + continue + } + + if g.Type() == binary.StandardType { + fmt.Printf("decompress fid %6d standard geometry, skipping\n", fid) + continue + } + + if !bytes.Equal(g.ExtensionCode, binary.ExtensionTWKB) { + return fmt.Errorf("invalid extension code: %s", string(g.ExtensionCode)) + } + + twkb, err := io.ReadAll(r) + if err != nil { + return fmt.Errorf("unable to read twkb: %w", err) + } + + gm, err := geom.UnmarshalTWKB(twkb) + if err != nil { + return fmt.Errorf("unable to unmarshal twkb: %w", err) + } + + wkb := gm.AsBinary() + + g.SetType(binary.StandardType) + g.SetEnvelopeContentsIndicatorCode(binary.NoEnvelope) + + // Write WKB + w := new(bytes.Buffer) + g.Write(w) + io.Copy(w, bytes.NewReader(wkb)) + + buf := w.Bytes() + writes.BindBytes(1, buf) + writes.BindInt64(2, fid) + _, err = writes.Step() + if err != nil { + return fmt.Errorf("unable to write wkb: %s", err.Error()) + } + err = writes.Reset() + if err != nil { + return fmt.Errorf("unable to reset write statement: %w", err) + } + + fmt.Printf( + "decompress %10s fid %6d %7d twkb bytes to %7d wkb bytes %4.0f%% %7d bytes written\n", + name, fid, len(twkb), len(wkb), 100.*float32(len(wkb))/float32(len(twkb)), len(buf), + ) + } + + sqlitex.Execute(writec, "COMMIT;", nil) + + err = sqlitex.ExecuteScript(writec, fmt.Sprintf(` + DELETE FROM gpkg_extensions WHERE table_name = '%[1]s' AND column_name = 'geom' AND extension_name = 'mlunar_twkb'; + `, table), nil) + if err != nil { + return err + } + + err = sqlitex.Execute(writec, "VACUUM;", nil) + if err != nil { + return err + } + + return nil +} + +type Place struct { + Name string +} + +func tempCopy(src string, dst string) (string, func(), error) { + tmp := dst + ".tmp" + err := exec.Command("cp", src, tmp).Run() + if err != nil { + return tmp, nil, err + } + + return tmp, func() { + exec.Command("mv", tmp, dst).Run() + }, nil +} + +func main() { + table := flag.String("table", "ne_110m_admin_0_countries", "table to compress") + minprecxy := flag.Int("minprecxy", 3, "minimum X and Y coordinate TWKB precision") + maxprecxy := flag.Int("maxprecxy", MAX_PRECXY, "maximum X and Y coordinate TWKB precision") + output := flag.String("o", "", "output file name") + flag.Parse() + + if len(flag.Args()) < 2 { + fmt.Println("Error: command and input file name are required") + return + } + + cmd := flag.Arg(0) + path := flag.Arg(1) + + if *output == "" { + fmt.Println("Error: output file name is required") + return + } + + basename := filepath.Base(*output) + + tmp, move, err := tempCopy(path, *output) + if err != nil { + fmt.Println("Error: unable to copy file") + return + } + defer move() + + opts := Opts{ + MinPrecXY: *minprecxy, + MaxPrecXY: *maxprecxy, + } + + switch cmd { + case "compress": + compressGeopackage(basename, path, tmp, *table, opts) + case "decompress": + decompressGeopackage(basename, path, tmp, *table) + default: + fmt.Println("Error: unknown command: " + cmd) + } +}