Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extractor number pad file names and do not override cache #78

Merged
merged 1 commit into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pkg/anki/anki_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"path"
"testing"
Expand All @@ -16,13 +15,16 @@ import (
"github.com/s12chung/text2anki/pkg/util/logg"
"github.com/s12chung/text2anki/pkg/util/test"
"github.com/s12chung/text2anki/pkg/util/test/fixture"
"github.com/s12chung/text2anki/pkg/util/test/fixture/flog"
)

var plog = flog.FixtureUpdateNoWrite()

func init() {
dir := path.Join(os.TempDir(), test.GenerateName("anki.TestMain"))
c := Config{ExportPrefix: "t2a-", NotesCacheDir: dir}
if err := os.MkdirAll(dir, ioutil.OwnerRWXGroupRX); err != nil {
slog.Error("anki.init()", logg.Err(err)) //nolint:forbidigo // used in init only
plog.Error("anki.init()", logg.Err(err)) //nolint:forbidigo // used in init only
os.Exit(-1)
}
SetConfig(c)
Expand Down
31 changes: 8 additions & 23 deletions pkg/extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,19 @@ func (e Extractor) Extract(s string) (SourceExtraction, error) {

hash := source.ID()
cacheDir := filepath.Join(e.cacheDir, hash)
if err := os.MkdirAll(cacheDir, ioutil.OwnerRWXGroupRX); err != nil {
return SourceExtraction{}, err
}
if err := source.ExtractToDir(cacheDir); err != nil {
return SourceExtraction{}, err
if _, err := os.Stat(cacheDir); os.IsNotExist(err) {
if err := os.MkdirAll(cacheDir, ioutil.OwnerRWXGroupRX); err != nil {
return SourceExtraction{}, err
}
if err := source.ExtractToDir(cacheDir); err != nil {
return SourceExtraction{}, err
}
}

entries, err := os.ReadDir(cacheDir)
if err != nil {
return SourceExtraction{}, err
}
filenames := filenamesWithExtensions(entries, e.factory.Extensions())
filenames := ioutil.FilenamesWithExtensions(entries, e.factory.Extensions())
if len(filenames) == 0 {
return SourceExtraction{}, fmt.Errorf("no filenames that match extensions extracted: %v", strings.Join(e.factory.Extensions(), ", "))
}
Expand All @@ -103,22 +104,6 @@ func (e Extractor) Extract(s string) (SourceExtraction, error) {
return SourceExtraction{Info: info, Parts: parts}, nil
}

func filenamesWithExtensions(entries []os.DirEntry, extensions []string) []string {
filenames := make([]string, 0, len(entries))
for _, file := range entries {
if file.IsDir() {
continue
}
for _, ext := range extensions {
if strings.HasSuffix(file.Name(), ext) {
filenames = append(filenames, file.Name())
break
}
}
}
return filenames
}

// Map is a map of extractor name to Extractor
type Map map[string]Extractor

Expand Down
40 changes: 25 additions & 15 deletions pkg/extractor/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,30 +53,40 @@ func TestExtractor_Extract(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
require := require.New(t)

source, err := NewExtractor(cacheDir, extractortest.NewFactory(testName)).Extract(tc.s)
extractor := NewExtractor(cacheDir, extractortest.NewFactory(testName))
source, err := extractor.Extract(tc.s)
if tc.err != nil {
require.Equal(tc.err, err)
return
}
require.NoError(err)
fixture.CompareReadOrUpdate(t, path.Join(testName, tc.name+"_info.json"), fixture.JSON(t, source.Info))

partMap := map[string]string{}
for _, part := range source.Parts {
require.Nil(part.AudioFile)

file := part.ImageFile
info, err := file.Stat()
require.NoError(err)
bytes, err := io.ReadAll(file)
require.NoError(err)
partMap[info.Name()] = string(bytes)
}
fixture.CompareReadOrUpdate(t, path.Join(testName, tc.name+"_parts.json"), fixture.JSON(t, partMap))
testSource(t, testName, tc.name, source)

secondSource, err := extractor.Extract(tc.s)
require.NoError(err)
testSource(t, testName, tc.name, secondSource)
})
}
}

func testSource(t *testing.T, testName, name string, src SourceExtraction) {
require := require.New(t)
fixture.CompareReadOrUpdate(t, path.Join(testName, name+"_info.json"), fixture.JSON(t, src.Info))

partMap := map[string]string{}
for _, part := range src.Parts {
require.Nil(part.AudioFile)

file := part.ImageFile
info, err := file.Stat()
require.NoError(err)
bytes, err := io.ReadAll(file)
require.NoError(err)
partMap[info.Name()] = string(bytes)
}
fixture.CompareReadOrUpdate(t, path.Join(testName, name+"_parts.json"), fixture.JSON(t, partMap))
}

func TestVerify(t *testing.T) {
testName := "TestVerify"
cacheDir := path.Join(os.TempDir(), test.GenerateName(testName))
Expand Down
11 changes: 9 additions & 2 deletions pkg/extractor/extractortest/extractortest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
package extractortest

import (
"fmt"
"os"
"path/filepath"

Expand Down Expand Up @@ -50,7 +51,14 @@ func (t Source) ExtractToDir(cacheDir string) error {
if t.s == SkipExtractString {
return nil
}
err := filepath.Walk(t.fixturePath, func(path string, info os.FileInfo, err error) error {
items, err := os.ReadDir(cacheDir)
if err != nil {
return err
}
if len(items) != 0 {
return fmt.Errorf("extracting to non-empty cacheDir")
}
return filepath.Walk(t.fixturePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
Expand All @@ -59,7 +67,6 @@ func (t Source) ExtractToDir(cacheDir string) error {
}
return ioutil.CopyFile(filepath.Join(cacheDir, info.Name()), path, ioutil.OwnerGroupR)
})
return err
}

// Info returns the info from the extraction
Expand Down
46 changes: 42 additions & 4 deletions pkg/extractor/instagram/instagram.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@ import (
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

"github.com/s12chung/text2anki/db/pkg/db"
"github.com/s12chung/text2anki/pkg/extractor"
"github.com/s12chung/text2anki/pkg/util/archive/xz"
"github.com/s12chung/text2anki/pkg/util/ioutil"
)

var extensions = []string{".jpg"}

// GetLoginFromEnv gets the login from the default ENV var
func GetLoginFromEnv() string { return os.Getenv("INSTAGRAM_LOGIN") }

Expand All @@ -28,7 +32,7 @@ type Factory struct{ login string }
func (f Factory) NewSource(url string) extractor.Source { return &Post{login: f.login, url: url} }

// Extensions returns the extensions the extractor returns
func (f Factory) Extensions() []string { return []string{".jpg"} }
func (f Factory) Extensions() []string { return append([]string{}, extensions...) }

// Post represents an instagram post
type Post struct {
Expand Down Expand Up @@ -67,14 +71,48 @@ func (s *Post) ID() string {
return s.id
}

var extractToDirArgs = func(login, id string) []string {
return []string{"instaloader", "--login", login, "--dirname-pattern", ".", "--", "-" + id}
}

// ExtractToDir extracts the post to the directory
func (s *Post) ExtractToDir(cacheDir string) error {
if ok := s.Verify(); !ok {
return fmt.Errorf("url is not vertified for instagram: %v", s.url)
return fmt.Errorf("url is not verified for instagram: %v", s.url)
}
cmd := exec.Command("instaloader", "--login", s.login, "--dirname-pattern", ".", "--", "-"+s.ID()) //nolint:gosec //this is how it works
args := extractToDirArgs(s.login, s.ID())
cmd := exec.Command(args[0], args[1:]...) //nolint:gosec //this is how it works
cmd.Dir = cacheDir
return cmd.Run()
if err := cmd.Run(); err != nil {
return err
}
return numberPadFilenames(cacheDir)
}

func numberPadFilenames(cacheDir string) error {
entries, err := os.ReadDir(cacheDir)
if err != nil {
return err
}
filenames := ioutil.FilenamesWithExtensions(entries, extensions)

for _, filename := range filenames {
parts := strings.Split(filename, "_")
if len(parts) < 2 {
return fmt.Errorf("file found with no underscore")
}
numberPart := strings.Split(parts[len(parts)-1], ".")[0]
number, err := strconv.Atoi(numberPart)
if err != nil {
return err
}
newFilename := strings.Join(parts[:len(parts)-1], "_") + "_" + fmt.Sprintf("%03d", number) + filepath.Ext(filename)

if err = os.Rename(filepath.Join(cacheDir, filename), filepath.Join(cacheDir, newFilename)); err != nil {
return err
}
}
return nil
}

const infoGlob = "*.xz"
Expand Down
45 changes: 43 additions & 2 deletions pkg/extractor/instagram/instagram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,18 @@ package instagram

import (
"fmt"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"testing"

"github.com/stretchr/testify/require"

"github.com/s12chung/text2anki/pkg/extractor"
"github.com/s12chung/text2anki/pkg/util/ioutil"
"github.com/s12chung/text2anki/pkg/util/test"
"github.com/s12chung/text2anki/pkg/util/test/fixture"
)

Expand Down Expand Up @@ -60,19 +65,55 @@ func TestSource_ID(t *testing.T) {
}
}

var extractToDirSuffixes = []int{0, 1, 2, 9, 10, 11, 99, 100, 101, 110}

const extractToDirPrefix = "2023-11-21_10-42-44_UTC_"

func init() {
args := make([]string, len(extractToDirSuffixes)+1)
args[0] = "touch"
for i, suffix := range extractToDirSuffixes {
args[i+1] = extractToDirPrefix + strconv.Itoa(suffix) + extensions[0]
}

extractToDirArgs = func(login, id string) []string { return args }
}

func TestPost_ExtractToDir(t *testing.T) {
cacheDir := path.Join(os.TempDir(), test.GenerateName("Instagram"))
require.NoError(t, os.MkdirAll(cacheDir, ioutil.OwnerRWXGroupRX))

testCases := []struct {
name string
url string
err error
}{
{name: "broken", url: "https://waka.com", err: fmt.Errorf("url is not vertified for instagram: https://waka.com")},
{name: "broken", url: "https://waka.com", err: fmt.Errorf("url is not verified for instagram: https://waka.com")},
{name: "fake", url: testURL},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
require := require.New(t)
require.Equal(tc.err, NewPost(tc.url).ExtractToDir(""))
err := NewPost(tc.url).ExtractToDir(cacheDir)
if tc.err != nil {
require.Equal(tc.err, err)
return
}
require.NoError(err)

entries, err := os.ReadDir(cacheDir)
require.NoError(err)
entryNames := make([]string, len(entries))
for i, entry := range entries {
entryNames[i] = entry.Name()
}

filenames := make([]string, len(extractToDirSuffixes))
for i, suffix := range extractToDirSuffixes {
filenames[i] = extractToDirPrefix + fmt.Sprintf("%03d", suffix) + extensions[0]
}
require.Equal(filenames, entryNames)
})
}
}
Expand Down
18 changes: 18 additions & 0 deletions pkg/util/ioutil/ioutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"io"
"os"
"path/filepath"
"strings"
)

// OwnerGroupR is the file mode for Owners and Group read
Expand Down Expand Up @@ -53,3 +54,20 @@ func CopyFile(dst, src string, perm os.FileMode) error {
}
return os.Rename(tmp.Name(), dst)
}

// FilenamesWithExtensions returns the file names with the given extensions in the directory (non-recursive)
func FilenamesWithExtensions(entries []os.DirEntry, extensions []string) []string {
filenames := make([]string, 0, len(entries))
for _, file := range entries {
if file.IsDir() {
continue
}
for _, ext := range extensions {
if strings.HasSuffix(file.Name(), ext) {
filenames = append(filenames, file.Name())
break
}
}
}
return filenames
}
Loading