From 3dff306f6a32faf6a97df13481db24e24434fa32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Wed, 12 Jun 2019 15:30:13 -0400 Subject: [PATCH 01/17] Restore to working state --- main.go | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 139 insertions(+), 8 deletions(-) diff --git a/main.go b/main.go index c752048..cebffb9 100644 --- a/main.go +++ b/main.go @@ -26,26 +26,157 @@ var ( commentReg = regexp.MustCompile("(?m)\n?") ) +// Print the usage spec to the terminal and exit cleanly +func printUsage(help bool) { + usage := "usage: wp-import [-h|--help] [-i instance] [-f] filename.xml" + if help { + usage = usage + "\n" + + " -h|--help Prints this help message.\n" + + " -i Specifies the instance to use.\n" + + " Should be one of the instances set up in instances.ini.\n" + + " Defaults to \"writeas\" (https://write.as).\n" + + " -f Specifies the filename to read from.\n" + + " This can be a relative or absolute path.\n" + + " The flag can be excluded if the filename is the last argument." + } + fmt.Println(usage) + os.Exit(0) +} + +// This should allow input in these formats: +// wp-import -h (or --help) +// wp-import filename +// wp-import -i instance filename +// wp-import -i instance -f filename + +func parseArgs(args []string) map[string]string { + arguments := make(map[string]string) + if len(args) == 2 { + if args[1] == "-h" || args[1] == "--help" { + printUsage(true) + } else if string(args[1][0]) != "-" { + arguments["filename"] = args[1] + } else { + printUsage(false) + } + } else if len(args) < 2 { + printUsage(false) + } else { + // Starting at 1 because args[0] is the program name + for i := 1; i < len(args); i++ { + if args[i] == "-h" || args[i] == "--help" { + printUsage(true) + } else if args[i] == "-i" { + if i+1 == len(args) || string(args[i+1][0]) == "-" { + printUsage(false) + } + arguments["instance"] = args[i+1] + i++ + } else if args[i] == "-f" { + if i+1 == len(args) || string(args[i+1][0]) == "-" { + printUsage(false) + } + arguments["filename"] = args[i+1] + i++ + } else if i == len(args)-1 && string(args[i][0]) != "-" { + arguments["filename"] = args[i] + } + } + } + if arguments["filename"] == "" { + printUsage(false) + } + return arguments +} + +type instance struct { + Url string + Token string +} + +// Temporarily using an ini file to store instance tokens. +// This is probably not what the rest of the code does, +// but I need some way to handle this for now. +// TODO: Get this in line with the rest of the code (see T586) + +// ini file format: +// Each instance has its own [section] +// Semicolons (;) at the beginning of a line indicate a comment +// Can't start a comment mid-line (this allows semicolons in variable values) +// Blank lines are ignored +func importConfig() map[string]instance { + file, err := ioutil.ReadFile("instances.ini") + if err != nil { + errQuit("Error reading instances.ini") + } + lines := strings.Split(string(file), "\n") + instances := make(map[string]instance) + curinst := "" + newinst := instance{} + for i := 0; i < len(lines); i++ { + line := lines[i] + fc := string(line[0]) + if line == "" || fc == ";" { + continue + } + if fc == "[" { + if curinst != "" { + instances[curinst] = newinst + newinst = instance{} + } + curinst = line[1:(len(line) - 1)] + } else { + loc := strings.Index(line, "=") + if curinst == "" || loc == -1 { + errQuit("Malformed ini file") + } + k := line[:loc] + v := line[loc+1:] + if k == "url" { + newinst.Url = v + } else if k == "token" { + newinst.Token = v + } else { + errQuit("Malformed ini file") + } + } + } + instances[curinst] = newinst + return instances +} + func main() { - if len(os.Args) < 2 { - //errQuit("usage: wp-import https://write.as filename.xml") - errQuit("usage: wp-import filename.xml") + a := parseArgs(os.Args) + // if len(os.Args) < 2 { + // //errQuit("usage: wp-import https://write.as filename.xml") + // errQuit("usage: wp-import filename.xml") + // } + // fname := os.Args[1] + fname := a["filename"] + inst := "writeas" + if a["instance"] != "" { + inst = a["instance"] } - //instance := os.Args[1] - instance := "https://write.as" - fname := os.Args[1] - // TODO: load user config from same func as writeas-cli + instances := importConfig() + //fmt.Println(instances) t := "" + u := "" + if val, ok := instances[inst]; ok { + t = val.Token + u = val.Url + } if t == "" { errQuit("not authenticated. run: writeas auth ") } cl := writeas.NewClientWith(writeas.Config{ - URL: instance + "/api", + URL: u + "/api", Token: t, }) + errQuit("We've reached the point where I need an actual token.") + log.Printf("Reading %s...\n", fname) raw, _ := ioutil.ReadFile(fname) From 6afec21fa53ce6582995bdb07da3b34a8dcb7729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Wed, 12 Jun 2019 15:42:11 -0400 Subject: [PATCH 02/17] Include basic instances.ini --- instances.ini | 3 +++ main.go | 2 ++ 2 files changed, 5 insertions(+) create mode 100644 instances.ini diff --git a/instances.ini b/instances.ini new file mode 100644 index 0000000..921deef --- /dev/null +++ b/instances.ini @@ -0,0 +1,3 @@ +[writeas] +url=https://write.as +token=00000000-0000-0000-0000-000000000000 \ No newline at end of file diff --git a/main.go b/main.go index cebffb9..ce1cd65 100644 --- a/main.go +++ b/main.go @@ -166,6 +166,8 @@ func main() { t = val.Token u = val.Url } + // TODO: change this so it offers in-app authentication. + // Store URLS and tokens in instances.ini. if t == "" { errQuit("not authenticated. run: writeas auth ") } From da6d1042a693a2e34cced5dcf4056fa9050080c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Wed, 12 Jun 2019 15:43:09 -0400 Subject: [PATCH 03/17] Add comments to instances.ini --- instances.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/instances.ini b/instances.ini index 921deef..10761a3 100644 --- a/instances.ini +++ b/instances.ini @@ -1,3 +1,5 @@ +; each instance gets a [section] +; each instance has a url and a token [writeas] url=https://write.as token=00000000-0000-0000-0000-000000000000 \ No newline at end of file From 80a3bb43799e9364e7740a36dcd223bc69eb22cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Wed, 12 Jun 2019 16:19:59 -0400 Subject: [PATCH 04/17] Create new token if token doesn't exist --- main.go | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/main.go b/main.go index ce1cd65..353a4d8 100644 --- a/main.go +++ b/main.go @@ -11,6 +11,7 @@ package main import ( + "bufio" "bytes" "fmt" "github.com/frankbille/go-wxr-import" @@ -20,6 +21,7 @@ import ( "log" "os" "regexp" + "strings" ) var ( @@ -90,6 +92,7 @@ func parseArgs(args []string) map[string]string { } type instance struct { + Name string Url string Token string } @@ -160,24 +163,44 @@ func main() { instances := importConfig() //fmt.Println(instances) + var cl writeas.Client t := "" u := "" if val, ok := instances[inst]; ok { t = val.Token u = val.Url + cl = writeas.NewClientWith(writeas.Config{ + URL: u + "/api", + Token: t, + }) + } else { + fmt.Println("We don't have a token for " + inst + ".") + r := bufio.NewReader(os.Stdin) + fmt.Print("Instance name: ") + name, _ := strings.Trim(r.ReadString("\n"), "\n") + fmt.Print("Instance URL: ") + url, _ := strings.Trim(r.ReadString("\n"), "\n") + if string(url[:4]) != "https" { + url = "https:\\" + url + } + fmt.Print("Username: ") + uname, _ := strings.Trim(r.ReadString("\n"), "\n") + fmt.Print("Password: ") + passwd, _ := strings.Trim(r.ReadString("\n"), "\n") + cl = writeas.NewClientWith(writeas.Config{ + URL: url + "/api", + Token: "", + }) + usr, uerr := cl.LogIn(uname, passwd) + if uerr != nil { + errQuit("Couldn't log in with those credentials.") + } + file, ferr = os.OpenFile("instances.ini", os.O_APPEND|os.O_WRONLY, 0644) + defer file.Close() + printstr = "\n[" + name + "]\nurl=" + url + "\ntoken=" + cl.Token() + fmt.Fprintln(file, printstr) + fmt.Println("Okay, you're logged in.") } - // TODO: change this so it offers in-app authentication. - // Store URLS and tokens in instances.ini. - if t == "" { - errQuit("not authenticated. run: writeas auth ") - } - - cl := writeas.NewClientWith(writeas.Config{ - URL: u + "/api", - Token: t, - }) - - errQuit("We've reached the point where I need an actual token.") log.Printf("Reading %s...\n", fname) raw, _ := ioutil.ReadFile(fname) From 7140a2190d2b1ba01eef389be37c3e26a2a72626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Wed, 12 Jun 2019 16:27:20 -0400 Subject: [PATCH 05/17] Add more robust error message when rate limited --- main.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 353a4d8..2fff5b0 100644 --- a/main.go +++ b/main.go @@ -193,7 +193,11 @@ func main() { }) usr, uerr := cl.LogIn(uname, passwd) if uerr != nil { - errQuit("Couldn't log in with those credentials.") + if err.Error == "Stop repeatedly trying to log in." { + errQuit("Stop repeatedly trying to log in.") + } else { + errQuit("Couldn't log in with those credentials.") + } } file, ferr = os.OpenFile("instances.ini", os.O_APPEND|os.O_WRONLY, 0644) defer file.Close() From 76952b3b108842d4a6c1dd0e0fd04804e717b03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Thu, 13 Jun 2019 16:02:05 -0400 Subject: [PATCH 06/17] Update interactive section to use scanner --- main.go | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/main.go b/main.go index 2fff5b0..5ef513f 100644 --- a/main.go +++ b/main.go @@ -163,7 +163,7 @@ func main() { instances := importConfig() //fmt.Println(instances) - var cl writeas.Client + var cl *writeas.Client t := "" u := "" if val, ok := instances[inst]; ok { @@ -175,33 +175,36 @@ func main() { }) } else { fmt.Println("We don't have a token for " + inst + ".") - r := bufio.NewReader(os.Stdin) - fmt.Print("Instance name: ") - name, _ := strings.Trim(r.ReadString("\n"), "\n") + r := bufio.NewScanner(os.Stdin) fmt.Print("Instance URL: ") - url, _ := strings.Trim(r.ReadString("\n"), "\n") - if string(url[:4]) != "https" { - url = "https:\\" + url + r.Scan() + url := r.Text() + if string(url[:5]) != "https" { + url = "https://" + url } + if string(url[len(url)-1:]) == "/" { + url = string(url[:len(url)-1]) + } + //fmt.Println("Using URL", url) fmt.Print("Username: ") - uname, _ := strings.Trim(r.ReadString("\n"), "\n") + r.Scan() + uname := r.Text() + //fmt.Println("Using username", uname) fmt.Print("Password: ") - passwd, _ := strings.Trim(r.ReadString("\n"), "\n") + r.Scan() + passwd := r.Text() + //fmt.Println("Using password", passwd) cl = writeas.NewClientWith(writeas.Config{ URL: url + "/api", Token: "", }) - usr, uerr := cl.LogIn(uname, passwd) + _, uerr := cl.LogIn(uname, passwd) if uerr != nil { - if err.Error == "Stop repeatedly trying to log in." { - errQuit("Stop repeatedly trying to log in.") - } else { - errQuit("Couldn't log in with those credentials.") - } + errQuit("Couldn't log in with those credentials.") } - file, ferr = os.OpenFile("instances.ini", os.O_APPEND|os.O_WRONLY, 0644) + file, _ := os.OpenFile("instances.ini", os.O_APPEND|os.O_WRONLY, 0644) defer file.Close() - printstr = "\n[" + name + "]\nurl=" + url + "\ntoken=" + cl.Token() + printstr := "\n[" + inst + "]\nurl=" + url + "\ntoken=" + cl.Token() fmt.Fprintln(file, printstr) fmt.Println("Okay, you're logged in.") } @@ -231,7 +234,7 @@ func main() { log.Printf("Done!\n") log.Printf("Found %d items.\n", len(ch.Items)) - for i, wpp := range ch.Items { + for _, wpp := range ch.Items { if wpp.PostType != "post" { continue } From 013ac35214a65d71f8cd4254ec74b3a6246f2b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Thu, 13 Jun 2019 16:04:50 -0400 Subject: [PATCH 07/17] Separate empty line check from comment check --- main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 5ef513f..77ac469 100644 --- a/main.go +++ b/main.go @@ -118,8 +118,11 @@ func importConfig() map[string]instance { newinst := instance{} for i := 0; i < len(lines); i++ { line := lines[i] + if line == "" { + continue + } fc := string(line[0]) - if line == "" || fc == ";" { + if fc == ";" { continue } if fc == "[" { From c0964ce259b106449e7c2ce0b4936d2983f9f4fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Thu, 13 Jun 2019 16:27:49 -0400 Subject: [PATCH 08/17] Amend collection name to reduce collision chance --- main.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.go b/main.go index 77ac469..23d50a5 100644 --- a/main.go +++ b/main.go @@ -16,6 +16,7 @@ import ( "fmt" "github.com/frankbille/go-wxr-import" "github.com/writeas/godown" + "github.com/writeas/nerds/store" "go.code.as/writeas.v2" "io/ioutil" "log" @@ -222,6 +223,7 @@ func main() { postsCount := 0 for _, ch := range d.Channels { + ch.Title = ch.Title + " " + store.GenerateFriendlyRandomString(4) log.Printf("Channel: %s\n", ch.Title) // Create the blog From 9a806595e4aa7b94f15b80803d01c8bd0541b4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Thu, 13 Jun 2019 17:01:06 -0400 Subject: [PATCH 09/17] Truncate language to 2 characters --- main.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 23d50a5..28e5dc8 100644 --- a/main.go +++ b/main.go @@ -266,7 +266,12 @@ func main() { if tags != "" { con += "\n\n" + tags } - + var postlang string + if len(ch.Language) > 2 { + postlang = string(ch.Language[:2]) + } else { + postlang = ch.Language + } p := &writeas.PostParams{ Title: wpp.Title, Slug: wpp.PostName, @@ -274,7 +279,7 @@ func main() { Created: &wpp.PostDateGmt, Updated: &wpp.PostDateGmt, Font: "norm", - Language: &ch.Language, + Language: &postlang, Collection: coll.Alias, } log.Printf("Creating %s", p.Title) From 6cfeb65ba3764b42f9d58f83f3a07ec732cfac4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 09:45:54 -0400 Subject: [PATCH 10/17] Change blog name only on collision --- main.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 28e5dc8..29c664d 100644 --- a/main.go +++ b/main.go @@ -223,7 +223,6 @@ func main() { postsCount := 0 for _, ch := range d.Channels { - ch.Title = ch.Title + " " + store.GenerateFriendlyRandomString(4) log.Printf("Channel: %s\n", ch.Title) // Create the blog @@ -234,7 +233,17 @@ func main() { log.Printf("Creating %s...\n", ch.Title) coll, err := cl.CreateCollection(c) if err != nil { - errQuit(err.Error()) + if err.Error() == "Collection name is already taken." { + newTitle := ch.Title + " " + store.GenerateFriendlyRandomString(4) + log.Printf("A blog by that name already exists. Changing to %s...\n", newTitle) + c.Title = newTitle + coll, err = cl.CreateCollection(c) + if err != nil { + errQuit(err.Error()) + } + } else { + errQuit(err.Error()) + } } log.Printf("Done!\n") From e198fdc4d7cad11a69b33625f5a7e6e29499c910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 11:10:54 -0400 Subject: [PATCH 11/17] Verify that it's a WordPress file --- main.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/main.go b/main.go index 29c664d..4fa4c57 100644 --- a/main.go +++ b/main.go @@ -98,6 +98,25 @@ type instance struct { Token string } +// Preparing to be able to handle multiple types of files. +// For right now, just verify that it's a valid WordPress WXR file. +// Do this two ways: check that the file extension is "xml", and +// verify that the word "WordPress" appears in the first 200 characters. +func identifyFile(fname string, raw []byte) wxr.Wxr { + parts := strings.Split(fname, ".") + extension := parts[len(parts)-1] + rawstr := string(raw[:200]) + var d wxr.Wxr + + if extension == "xml" && strings.Contains(rawstr, "WordPress") { + log.Println("This looks like a WordPress file. Parsing...") + return wxr.ParseWxr(raw) + } else { + errQuit("I can't tell what kind of file this is.") + } + +} + // Temporarily using an ini file to store instance tokens. // This is probably not what the rest of the code does, // but I need some way to handle this for now. @@ -217,7 +236,10 @@ func main() { raw, _ := ioutil.ReadFile(fname) log.Println("Parsing...") - d := wxr.ParseWxr(raw) + + // What kind of file is it? + d := identifyFile(fname, raw) + log.Printf("Found %d channels.\n", len(d.Channels)) postsCount := 0 From acccce9305843f87817bbc74548d5fa85cc75b77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 11:51:43 -0400 Subject: [PATCH 12/17] Add general struct for imported blogs --- main.go | 152 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 60 deletions(-) diff --git a/main.go b/main.go index 4fa4c57..cd8651b 100644 --- a/main.go +++ b/main.go @@ -98,23 +98,99 @@ type instance struct { Token string } +type ImportedBlogs struct { + Collections []*SingleBlog +} + +type SingleBlog struct { + Params *writeas.CollectionParams + Posts []*writeas.PostParams +} + // Preparing to be able to handle multiple types of files. // For right now, just verify that it's a valid WordPress WXR file. // Do this two ways: check that the file extension is "xml", and // verify that the word "WordPress" appears in the first 200 characters. -func identifyFile(fname string, raw []byte) wxr.Wxr { +func identifyFile(fname string, raw []byte) *ImportedBlogs { parts := strings.Split(fname, ".") extension := parts[len(parts)-1] rawstr := string(raw[:200]) - var d wxr.Wxr if extension == "xml" && strings.Contains(rawstr, "WordPress") { log.Println("This looks like a WordPress file. Parsing...") - return wxr.ParseWxr(raw) + return parseWPFile(wxr.ParseWxr(raw)) } else { errQuit("I can't tell what kind of file this is.") } + // punt + return &ImportedBlogs{} +} + +// Turn our WXR struct into an ImportedBlogs struct. +// Creating a general format for imported blogs is the first step to +// generalizing the import process. +func parseWPFile(d wxr.Wxr) *ImportedBlogs { + coll := &ImportedBlogs{} + for _, ch := range d.Channels { + // Create the blog + c := &SingleBlog{ + Params: &writeas.CollectionParams{ + Title: ch.Title, + Description: ch.Description, + }, + Posts: make([]*writeas.PostParams, 0, 0), + } + + for _, wpp := range ch.Items { + if wpp.PostType != "post" { + continue + } + + // Convert to Markdown + b := bytes.NewBufferString("") + r := bytes.NewReader([]byte(wpp.Content)) + err := godown.Convert(b, r, nil) + if err != nil { + errQuit(err.Error()) + } + con := b.String() + // Remove unneeded WordPress comments that take up space, like + con = commentReg.ReplaceAllString(con, "") + + // Append tags + tags := "" + sep := "" + for _, cat := range wpp.Categories { + if cat.Domain != "post_tag" { + continue + } + tags += sep + "#" + cat.DisplayName + sep = " " + } + if tags != "" { + con += "\n\n" + tags + } + var postlang string + if len(ch.Language) > 2 { + postlang = string(ch.Language[:2]) + } else { + postlang = ch.Language + } + p := &writeas.PostParams{ + Title: wpp.Title, + Slug: wpp.PostName, + Content: con, + Created: &wpp.PostDateGmt, + Updated: &wpp.PostDateGmt, + Font: "norm", + Language: &postlang, + } + c.Posts = append(c.Posts, p) + } + coll.Collections = append(coll.Collections, c) + } + return coll } // Temporarily using an ini file to store instance tokens. @@ -238,27 +314,24 @@ func main() { log.Println("Parsing...") // What kind of file is it? - d := identifyFile(fname, raw) + d := identifyFile(fname, raw) // d is now an ImportedBlogs object, not a WXR object - log.Printf("Found %d channels.\n", len(d.Channels)) + log.Printf("Found %d channels.\n", len(d.Collections)) postsCount := 0 - for _, ch := range d.Channels { - log.Printf("Channel: %s\n", ch.Title) + for _, ch := range d.Collections { + c := ch.Params + title := c.Title + log.Printf("Channel: %s\n", title) - // Create the blog - c := &writeas.CollectionParams{ - Title: ch.Title, - Description: ch.Description, - } - log.Printf("Creating %s...\n", ch.Title) + log.Printf("Creating %s...\n", title) coll, err := cl.CreateCollection(c) if err != nil { if err.Error() == "Collection name is already taken." { - newTitle := ch.Title + " " + store.GenerateFriendlyRandomString(4) - log.Printf("A blog by that name already exists. Changing to %s...\n", newTitle) - c.Title = newTitle + title = title + " " + store.GenerateFriendlyRandomString(4) + log.Printf("A blog by that name already exists. Changing to %s...\n", title) + c.Title = title coll, err = cl.CreateCollection(c) if err != nil { errQuit(err.Error()) @@ -269,51 +342,10 @@ func main() { } log.Printf("Done!\n") - log.Printf("Found %d items.\n", len(ch.Items)) - for _, wpp := range ch.Items { - if wpp.PostType != "post" { - continue - } - - // Convert to Markdown - b := bytes.NewBufferString("") - r := bytes.NewReader([]byte(wpp.Content)) - err = godown.Convert(b, r, nil) - con := b.String() - - // Remove unneeded WordPress comments that take up space, like - con = commentReg.ReplaceAllString(con, "") - - // Append tags - tags := "" - sep := "" - for _, cat := range wpp.Categories { - if cat.Domain != "post_tag" { - continue - } - tags += sep + "#" + cat.DisplayName - sep = " " - } - if tags != "" { - con += "\n\n" + tags - } - var postlang string - if len(ch.Language) > 2 { - postlang = string(ch.Language[:2]) - } else { - postlang = ch.Language - } - p := &writeas.PostParams{ - Title: wpp.Title, - Slug: wpp.PostName, - Content: con, - Created: &wpp.PostDateGmt, - Updated: &wpp.PostDateGmt, - Font: "norm", - Language: &postlang, - Collection: coll.Alias, - } + log.Printf("Found %d posts.\n", len(ch.Posts)) + for _, p := range ch.Posts { log.Printf("Creating %s", p.Title) + p.Collection = coll.Alias _, err = cl.CreatePost(p) if err != nil { fmt.Fprintf(os.Stderr, "create post: %s\n", err) From 0de1a8e1aa3c4974a025a10137d306be2f47a4c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 15:40:07 -0400 Subject: [PATCH 13/17] Use flag package, add dry-run option --- main.go | 184 +++++++++++++++++++++++--------------------------------- 1 file changed, 74 insertions(+), 110 deletions(-) diff --git a/main.go b/main.go index cd8651b..8106aac 100644 --- a/main.go +++ b/main.go @@ -11,12 +11,18 @@ package main import ( + // "archive/zip" "bufio" "bytes" + "flag" "fmt" "github.com/frankbille/go-wxr-import" + "github.com/howeyc/gopass" "github.com/writeas/godown" "github.com/writeas/nerds/store" + // "github.com/writeas/web-core/posts" + // "github.com/writeas/wf-migrate" + // "github.com/writeas/zip-import" "go.code.as/writeas.v2" "io/ioutil" "log" @@ -29,69 +35,6 @@ var ( commentReg = regexp.MustCompile("(?m)\n?") ) -// Print the usage spec to the terminal and exit cleanly -func printUsage(help bool) { - usage := "usage: wp-import [-h|--help] [-i instance] [-f] filename.xml" - if help { - usage = usage + "\n" + - " -h|--help Prints this help message.\n" + - " -i Specifies the instance to use.\n" + - " Should be one of the instances set up in instances.ini.\n" + - " Defaults to \"writeas\" (https://write.as).\n" + - " -f Specifies the filename to read from.\n" + - " This can be a relative or absolute path.\n" + - " The flag can be excluded if the filename is the last argument." - } - fmt.Println(usage) - os.Exit(0) -} - -// This should allow input in these formats: -// wp-import -h (or --help) -// wp-import filename -// wp-import -i instance filename -// wp-import -i instance -f filename - -func parseArgs(args []string) map[string]string { - arguments := make(map[string]string) - if len(args) == 2 { - if args[1] == "-h" || args[1] == "--help" { - printUsage(true) - } else if string(args[1][0]) != "-" { - arguments["filename"] = args[1] - } else { - printUsage(false) - } - } else if len(args) < 2 { - printUsage(false) - } else { - // Starting at 1 because args[0] is the program name - for i := 1; i < len(args); i++ { - if args[i] == "-h" || args[i] == "--help" { - printUsage(true) - } else if args[i] == "-i" { - if i+1 == len(args) || string(args[i+1][0]) == "-" { - printUsage(false) - } - arguments["instance"] = args[i+1] - i++ - } else if args[i] == "-f" { - if i+1 == len(args) || string(args[i+1][0]) == "-" { - printUsage(false) - } - arguments["filename"] = args[i+1] - i++ - } else if i == len(args)-1 && string(args[i][0]) != "-" { - arguments["filename"] = args[i] - } - } - } - if arguments["filename"] == "" { - printUsage(false) - } - return arguments -} - type instance struct { Name string Url string @@ -111,14 +54,25 @@ type SingleBlog struct { // For right now, just verify that it's a valid WordPress WXR file. // Do this two ways: check that the file extension is "xml", and // verify that the word "WordPress" appears in the first 200 characters. -func identifyFile(fname string, raw []byte) *ImportedBlogs { +func identifyFile(fname string) *ImportedBlogs { + log.Printf("Reading %s...\n", fname) parts := strings.Split(fname, ".") extension := parts[len(parts)-1] - rawstr := string(raw[:200]) - if extension == "xml" && strings.Contains(rawstr, "WordPress") { - log.Println("This looks like a WordPress file. Parsing...") - return parseWPFile(wxr.ParseWxr(raw)) + if extension == "xml" { + raw, _ := ioutil.ReadFile(fname) + rawstr := string(raw[:200]) + if strings.Contains(rawstr, "WordPress") { + log.Println("This looks like a WordPress file. Parsing...") + return ParseWPFile(wxr.ParseWxr(raw)) + } else { + // It's XML but not WordPress + errQuit("I can't tell what kind of file this is.") + } + // Future development: + //} else if extension == "zip" { + // log.Println("This looks like a Zip archive. Parsing...") + // return ParseZipFile(fname) } else { errQuit("I can't tell what kind of file this is.") } @@ -129,7 +83,7 @@ func identifyFile(fname string, raw []byte) *ImportedBlogs { // Turn our WXR struct into an ImportedBlogs struct. // Creating a general format for imported blogs is the first step to // generalizing the import process. -func parseWPFile(d wxr.Wxr) *ImportedBlogs { +func ParseWPFile(d wxr.Wxr) *ImportedBlogs { coll := &ImportedBlogs{} for _, ch := range d.Channels { // Create the blog @@ -248,20 +202,29 @@ func importConfig() map[string]instance { } func main() { - a := parseArgs(os.Args) - // if len(os.Args) < 2 { - // //errQuit("usage: wp-import https://write.as filename.xml") - // errQuit("usage: wp-import filename.xml") - // } - // fname := os.Args[1] - fname := a["filename"] + f_inst := flag.String("i", "writeas", "Named WriteFreely Host (not URL)") + f_file := flag.String("f", "", "File to be imported") + f_help := flag.Bool("h", false, "Print this help message") + f_dry := flag.Bool("d", false, "Dry run (parse the input file but don't upload the contents)") + flag.Parse() + a := flag.Args() + if (*f_file == "" && len(a) == 0) || (*f_help == true) { + fmt.Fprintf(os.Stderr, "usage: wfimport [-i myinstance] [-f] file1\n") + flag.PrintDefaults() + return + } + var fname string + if *f_file != "" { + fname = *f_file + } else { + fname = a[0] + } inst := "writeas" - if a["instance"] != "" { - inst = a["instance"] + if *f_inst != "" { + inst = *f_inst } instances := importConfig() - //fmt.Println(instances) var cl *writeas.Client t := "" u := "" @@ -284,15 +247,15 @@ func main() { if string(url[len(url)-1:]) == "/" { url = string(url[:len(url)-1]) } - //fmt.Println("Using URL", url) fmt.Print("Username: ") r.Scan() uname := r.Text() - //fmt.Println("Using username", uname) fmt.Print("Password: ") - r.Scan() - passwd := r.Text() - //fmt.Println("Using password", passwd) + tpwd, pwerr := gopass.GetPasswdMasked() + if pwerr != nil { + errQuit(pwerr.Error()) + } + passwd := string(tpwd) cl = writeas.NewClientWith(writeas.Config{ URL: url + "/api", Token: "", @@ -308,13 +271,8 @@ func main() { fmt.Println("Okay, you're logged in.") } - log.Printf("Reading %s...\n", fname) - raw, _ := ioutil.ReadFile(fname) - - log.Println("Parsing...") - // What kind of file is it? - d := identifyFile(fname, raw) // d is now an ImportedBlogs object, not a WXR object + d := identifyFile(fname) // d is now an ImportedBlogs object, not a WXR object log.Printf("Found %d channels.\n", len(d.Collections)) @@ -324,38 +282,44 @@ func main() { c := ch.Params title := c.Title log.Printf("Channel: %s\n", title) - - log.Printf("Creating %s...\n", title) - coll, err := cl.CreateCollection(c) - if err != nil { - if err.Error() == "Collection name is already taken." { - title = title + " " + store.GenerateFriendlyRandomString(4) - log.Printf("A blog by that name already exists. Changing to %s...\n", title) - c.Title = title - coll, err = cl.CreateCollection(c) - if err != nil { + var coll *writeas.Collection + var err error + if *f_dry == false { + log.Printf("Creating %s...\n", title) + coll, err = cl.CreateCollection(c) + if err != nil { + if err.Error() == "Collection name is already taken." { + title = title + " " + store.GenerateFriendlyRandomString(4) + log.Printf("A blog by that name already exists. Changing to %s...\n", title) + c.Title = title + coll, err = cl.CreateCollection(c) + if err != nil { + errQuit(err.Error()) + } + } else { errQuit(err.Error()) } - } else { - errQuit(err.Error()) } + log.Printf("Done!\n") } - log.Printf("Done!\n") - log.Printf("Found %d posts.\n", len(ch.Posts)) for _, p := range ch.Posts { log.Printf("Creating %s", p.Title) - p.Collection = coll.Alias - _, err = cl.CreatePost(p) - if err != nil { - fmt.Fprintf(os.Stderr, "create post: %s\n", err) - continue + if *f_dry == false { + p.Collection = coll.Alias + _, err = cl.CreatePost(p) + if err != nil { + fmt.Fprintf(os.Stderr, "create post: %s\n", err) + continue + } } - postsCount++ } } log.Printf("Created %d posts.\n", postsCount) + if *f_dry == true { + log.Println("THIS WAS A DRY RUN! No posts or collections were actually created on the remote server.") + } } func errQuit(m string) { From 93baa01b58bab50dad6ab787da8d4cc81b23edd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 15:43:03 -0400 Subject: [PATCH 14/17] Change instance URL behavior --- main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 8106aac..46d6854 100644 --- a/main.go +++ b/main.go @@ -238,10 +238,10 @@ func main() { } else { fmt.Println("We don't have a token for " + inst + ".") r := bufio.NewScanner(os.Stdin) - fmt.Print("Instance URL: ") + fmt.Print("Instance URL (include http/https): ") r.Scan() url := r.Text() - if string(url[:5]) != "https" { + if string(url[:4]) != "http" { url = "https://" + url } if string(url[len(url)-1:]) == "/" { From 7be4a8dab24b938cd6f916ea2dc21a1329b47b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 16:18:13 -0400 Subject: [PATCH 15/17] Add verbose flag --- main.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/main.go b/main.go index 46d6854..220de3a 100644 --- a/main.go +++ b/main.go @@ -64,10 +64,12 @@ func identifyFile(fname string) *ImportedBlogs { rawstr := string(raw[:200]) if strings.Contains(rawstr, "WordPress") { log.Println("This looks like a WordPress file. Parsing...") + // Since I know it's a WP file I might as well do this here + // instead of delegating wxr.ParseWxr to the helper function return ParseWPFile(wxr.ParseWxr(raw)) } else { // It's XML but not WordPress - errQuit("I can't tell what kind of file this is.") + errQuit("It's XML, but not in a format I recognize.") } // Future development: //} else if extension == "zip" { @@ -206,6 +208,7 @@ func main() { f_file := flag.String("f", "", "File to be imported") f_help := flag.Bool("h", false, "Print this help message") f_dry := flag.Bool("d", false, "Dry run (parse the input file but don't upload the contents)") + f_verb := flag.Bool("v", false, "Display all messages instead of just important ones") flag.Parse() a := flag.Args() if (*f_file == "" && len(a) == 0) || (*f_help == true) { @@ -213,6 +216,7 @@ func main() { flag.PrintDefaults() return } + vbs := *f_verb var fname string if *f_file != "" { fname = *f_file @@ -268,7 +272,9 @@ func main() { defer file.Close() printstr := "\n[" + inst + "]\nurl=" + url + "\ntoken=" + cl.Token() fmt.Fprintln(file, printstr) - fmt.Println("Okay, you're logged in.") + if vbs { + fmt.Println("Okay, you're logged in.") + } } // What kind of file is it? @@ -300,11 +306,15 @@ func main() { errQuit(err.Error()) } } - log.Printf("Done!\n") + if vbs { + log.Printf("Done!\n") + } } log.Printf("Found %d posts.\n", len(ch.Posts)) for _, p := range ch.Posts { - log.Printf("Creating %s", p.Title) + if vbs { + log.Printf("Creating %s", p.Title) + } if *f_dry == false { p.Collection = coll.Alias _, err = cl.CreatePost(p) From cc85babe0ea39b907e61d59922e711a683103ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=ABlle=20Anthony?= Date: Fri, 21 Jun 2019 17:17:52 -0400 Subject: [PATCH 16/17] Add initial Zip parsing (from zip-import) Final commit of the day 6/21. Code doesn't currently work, but I see how to get it there. --- instances.ini => instances-example.ini | 2 +- main.go | 129 +++++++++++++++++++++++-- 2 files changed, 120 insertions(+), 11 deletions(-) rename instances.ini => instances-example.ini (70%) diff --git a/instances.ini b/instances-example.ini similarity index 70% rename from instances.ini rename to instances-example.ini index 10761a3..3d732cb 100644 --- a/instances.ini +++ b/instances-example.ini @@ -2,4 +2,4 @@ ; each instance has a url and a token [writeas] url=https://write.as -token=00000000-0000-0000-0000-000000000000 \ No newline at end of file +token=00000000-0000-0000-0000-000000000000 diff --git a/main.go b/main.go index 220de3a..216b424 100644 --- a/main.go +++ b/main.go @@ -11,7 +11,7 @@ package main import ( - // "archive/zip" + "archive/zip" "bufio" "bytes" "flag" @@ -20,7 +20,7 @@ import ( "github.com/howeyc/gopass" "github.com/writeas/godown" "github.com/writeas/nerds/store" - // "github.com/writeas/web-core/posts" + "github.com/writeas/web-core/posts" // "github.com/writeas/wf-migrate" // "github.com/writeas/zip-import" "go.code.as/writeas.v2" @@ -64,17 +64,18 @@ func identifyFile(fname string) *ImportedBlogs { rawstr := string(raw[:200]) if strings.Contains(rawstr, "WordPress") { log.Println("This looks like a WordPress file. Parsing...") - // Since I know it's a WP file I might as well do this here - // instead of delegating wxr.ParseWxr to the helper function - return ParseWPFile(wxr.ParseWxr(raw)) + // Changed my mind. Since we're exporting ParseWPFile it should + // accept the contents of a file and not rely on the importing + // program to also import wxr. + // (We can let them import ioutil, it's core.) + return ParseWPFile(raw) } else { // It's XML but not WordPress errQuit("It's XML, but not in a format I recognize.") } - // Future development: - //} else if extension == "zip" { - // log.Println("This looks like a Zip archive. Parsing...") - // return ParseZipFile(fname) + } else if extension == "zip" { + log.Println("This looks like a Zip archive. Parsing...") + return ParseZipFile(fname) } else { errQuit("I can't tell what kind of file this is.") } @@ -85,7 +86,8 @@ func identifyFile(fname string) *ImportedBlogs { // Turn our WXR struct into an ImportedBlogs struct. // Creating a general format for imported blogs is the first step to // generalizing the import process. -func ParseWPFile(d wxr.Wxr) *ImportedBlogs { +func ParseWPFile(raw []byte) *ImportedBlogs { + d = wxr.ParseWxr(raw) coll := &ImportedBlogs{} for _, ch := range d.Channels { // Create the blog @@ -149,6 +151,113 @@ func ParseWPFile(d wxr.Wxr) *ImportedBlogs { return coll } +func ParseZipFile(fname string) *ImportedBlogs { + return &ImportedBlogs{} + zf, err := zip.OpenReader(fname) + if err != nil { + errQuit(err.Error()) + } + defer zf.Close() + + coll := &ImportedBlogs{} + t_coll := make(map[string]*SingleBlog{}) + + t_coll["Drafts"] = &SingleBlog{ + Params: &writeas.CollectionParams{}, + Posts: make([]*writeas.PostParams, 0, 0), + } + + for _, f := range zf.File { + // A trailing slash means this is an empty directory + isEmptyDir := strings.HasSuffix(f.Name, "/") + if isEmptyDir { + title := f.Name[:len(f.Name)-1] + if (t_coll[title] == &SingleBlog{}) { + t_coll[title] = &SingleBlog{ + Params: &writeas.CollectionParams{ + Title: title, + Description: "", + }, + Posts: make([]*writeas.PostParams, 0, 0), + } + } + continue + } + + // Get directory, slug, etc. from the filename + fParts := strings.Split(f.Name, "/") + var collAlias string + var postFname string + if len(fParts) == 1 { + // This is a top-level file + collAlias = "Drafts" + postFname = fParts[0] + } else { + // This is a collection post + collAlias = fParts[0] + postFname = fParts[1] + } + + // Ideally, we'll reach each collection's directory before we reach + // the first post in the collection. But we can't rely on a zip + // file's ordering to be deterministic. So just in case, we do this + // check twice. + if (t_coll[title] == &SingleBlog{}) { + t_coll[title] = &SingleBlog{ + Params: &writeas.CollectionParams{ + Title: title, + Description: "", + }, + Posts: make([]*writeas.PostParams, 0, 0), + } + } + + // Get file contents + fc, err := f.Open() + if err != nil { + fmt.Fprintf(os.Stderr, "open file failed: %s: %v", f.Name, err) + continue + } + defer fc.Close() + content, err := ioutil.ReadAll(fc) + if err != nil { + fmt.Fprintf(os.Stderr, "read file failed: %s: %v", f.Name, err) + continue + } + + // Build post parameters + p := filenameToParams(postFname) + p.Created = &f.Modified + p.Title, p.Content = posts.ExtractTitle(string(content)) + + t_coll[collAlias].Posts = append(t_coll[collAlias].Posts, p) + + fmt.Printf("%s - %s - %+v\n", f.Name, collAlias, p) + } + return nil +} + +// filenameToParams returns PostParams with the ID and slug derived from the given filename. +func filenameToParams(fname string) *writeas.PostParams { + baseParts := strings.Split(fname, ".") + // This assumes there's at least one '.' in the filename, e.g. abc123.txt + // TODO: handle the case where len(baseParts) != 2 + baseName := baseParts[0] + + p := &writeas.PostParams{} + + parts := strings.Split(baseName, "_") + if len(parts) == 1 { + // There's no slug -- only an ID + p.ID = parts[0] + } else { + // len(parts) > 1 + p.Slug = parts[0] + p.ID = parts[1] + } + return p +} + // Temporarily using an ini file to store instance tokens. // This is probably not what the rest of the code does, // but I need some way to handle this for now. From ff443c9ded9f9b7e0a717f0e235069b2dbc96cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noe=CC=88lle=20Anthony?= Date: Tue, 2 Jul 2019 16:24:20 -0400 Subject: [PATCH 17/17] Add ParseWFJSONFile --- main.go | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/main.go b/main.go index 216b424..4c9a5ed 100644 --- a/main.go +++ b/main.go @@ -51,13 +51,13 @@ type SingleBlog struct { } // Preparing to be able to handle multiple types of files. -// For right now, just verify that it's a valid WordPress WXR file. -// Do this two ways: check that the file extension is "xml", and -// verify that the word "WordPress" appears in the first 200 characters. +// If the extension is "xml", verify that it's a valid WordPress WXR file: +// Check to see if the word "WordPress" appears in the first 200 characters. +// If the extension is "zip", parse it as a ZIP file. func identifyFile(fname string) *ImportedBlogs { log.Printf("Reading %s...\n", fname) parts := strings.Split(fname, ".") - extension := parts[len(parts)-1] + extension := strings.ToLower(parts[len(parts)-1]) if extension == "xml" { raw, _ := ioutil.ReadFile(fname) @@ -76,6 +76,10 @@ func identifyFile(fname string) *ImportedBlogs { } else if extension == "zip" { log.Println("This looks like a Zip archive. Parsing...") return ParseZipFile(fname) + } else if extension == "json" { + // TODO: Identify specifically as a WriteFreely JSON file + log.Println("This looks like a WriteFreely JSON file. Parsing...") + return ParseWFJSONFile(fname) } else { errQuit("I can't tell what kind of file this is.") } @@ -151,8 +155,14 @@ func ParseWPFile(raw []byte) *ImportedBlogs { return coll } +// Read through the ZIP file, converting text files to posts +// and directories to blogs. +// If the filename ends in a / it's a directory. +// Otherwise, filenames have the format "[directory/]postname.txt" +// If there's no directory, then it's a draft post +// If there is a directory, the directory is the blog the post goes in func ParseZipFile(fname string) *ImportedBlogs { - return &ImportedBlogs{} + //return &ImportedBlogs{} zf, err := zip.OpenReader(fname) if err != nil { errQuit(err.Error()) @@ -172,6 +182,8 @@ func ParseZipFile(fname string) *ImportedBlogs { isEmptyDir := strings.HasSuffix(f.Name, "/") if isEmptyDir { title := f.Name[:len(f.Name)-1] + // I think this will work. &SingleBlog{} should be the null value + // If there isn't already a blog with this name, make one if (t_coll[title] == &SingleBlog{}) { t_coll[title] = &SingleBlog{ Params: &writeas.CollectionParams{ @@ -181,6 +193,8 @@ func ParseZipFile(fname string) *ImportedBlogs { Posts: make([]*writeas.PostParams, 0, 0), } } + // If there is, we don't need to do anything. + // Either way, skip the rest of the block and go to the next file continue } @@ -202,10 +216,10 @@ func ParseZipFile(fname string) *ImportedBlogs { // the first post in the collection. But we can't rely on a zip // file's ordering to be deterministic. So just in case, we do this // check twice. - if (t_coll[title] == &SingleBlog{}) { - t_coll[title] = &SingleBlog{ + if (t_coll[collAlias] == &SingleBlog{}) { + t_coll[collAlias] = &SingleBlog{ Params: &writeas.CollectionParams{ - Title: title, + Title: collAlias, Description: "", }, Posts: make([]*writeas.PostParams, 0, 0), @@ -215,13 +229,13 @@ func ParseZipFile(fname string) *ImportedBlogs { // Get file contents fc, err := f.Open() if err != nil { - fmt.Fprintf(os.Stderr, "open file failed: %s: %v", f.Name, err) + fmt.Fprintf(os.Stderr, "Couldn't open file: %s: %v", f.Name, err) continue } defer fc.Close() content, err := ioutil.ReadAll(fc) if err != nil { - fmt.Fprintf(os.Stderr, "read file failed: %s: %v", f.Name, err) + fmt.Fprintf(os.Stderr, "Opened file but couldn't read it: %s: %v", f.Name, err) continue } @@ -234,6 +248,19 @@ func ParseZipFile(fname string) *ImportedBlogs { fmt.Printf("%s - %s - %+v\n", f.Name, collAlias, p) } + + for k, v := range t_coll { + coll.Collections = append(coll.Collections, v) + } + return coll +} + +// Turn WriteFreely JSON file into an ImportedBlogs struct. +// TODO: Find out how our JSON files are structured! +// +func ParseWFJSONFile(fname string) *ImportedBlogs { + return &ImportedBlogs{} + return nil }