Skip to content

Commit

Permalink
refactored exif processing to allow extensible parsers (#54)
Browse files Browse the repository at this point in the history
  • Loading branch information
bzimmer authored Nov 5, 2021
1 parent 679ba7a commit 17c4133
Show file tree
Hide file tree
Showing 14 changed files with 383 additions and 130 deletions.
7 changes: 6 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
{
"go.buildTags": "integration",
"gopls": {
"buildFlags": [
"-tags",
"integration,exiftool"
],
}
}
10 changes: 9 additions & 1 deletion cmd/ma/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ func flags() []cli.Flag {
Value: false,
Required: false,
},
&cli.BoolFlag{
Name: "monochrome",
Required: false,
Usage: "disable colored output",
Value: false,
},
&cli.BoolFlag{
Name: "debug",
Required: false,
Expand All @@ -74,7 +80,7 @@ func initLogging(c *cli.Context) {
log.Logger = log.Output(
zerolog.ConsoleWriter{
Out: c.App.ErrWriter,
NoColor: false,
NoColor: c.Bool("monochrome"),
TimeFormat: time.RFC3339,
},
)
Expand Down Expand Up @@ -144,6 +150,7 @@ func main() {
Metrics: metric,
Encoder: enc,
Fs: afero.NewOsFs(),
Exif: ma.NewGoExif(),
},
}

Expand All @@ -152,6 +159,7 @@ func main() {
After: ma.Stats,
Commands: []*cli.Command{
ma.CommandCopy(),
ma.CommandExif(),
ma.CommandExport(),
ma.CommandFind(),
ma.CommandList(),
Expand Down
173 changes: 55 additions & 118 deletions cp.go
Original file line number Diff line number Diff line change
@@ -1,189 +1,135 @@
package ma

import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"path/filepath"
"sort"
"strings"
"time"

"github.com/armon/go-metrics"
"github.com/rs/zerolog/log"
"github.com/rwcarlsen/goexif/exif"
"github.com/spf13/afero"
"github.com/urfave/cli/v2"
"golang.org/x/sync/errgroup"
)

const (
defaultBufferSize = 1024 * 1024
defaultDateFormat = "2006/2006-01/02"
)

func defaultImages() []string {
return []string{".raf", ".nef", ".dng", ".jpg", ".jpeg"}
}
const defaultDateFormat = "2006/2006-01/02"

func split(fullname string) (dirname, basename string) {
func split(fullname string) identifier {
dirname, filename := filepath.Split(fullname)
n := strings.LastIndexFunc(filename, func(s rune) bool {
return s == '.'
})
var basename string
switch n {
case -1:
basename = filename
default:
basename = filename[0:n]
}
dirname = filepath.Clean(dirname)
return
}

type dateTimeExif struct {
fs afero.Fs
src string
ext string
info fs.FileInfo
}

func (b *dateTimeExif) bufferSize() int64 {
switch b.ext {
case ".orf", ".dng", ".nef":
return b.info.Size()
default:
return defaultBufferSize
}
return identifier{dirname: filepath.Clean(dirname), basename: basename}
}

func (b *dateTimeExif) dateTime() (time.Time, error) {
fp, err := b.fs.Open(filepath.Join(b.src, b.info.Name()))
if err != nil {
return time.Time{}, err
}
defer fp.Close()
data := make([]byte, b.bufferSize())
_, err = fp.Read(data)
if err != nil {
return time.Time{}, err
}
x, err := exif.Decode(bytes.NewBuffer(data))
if err != nil {
return time.Time{}, err
}
tm, err := x.DateTime()
if err != nil {
return time.Time{}, err
}
return tm, err
type identifier struct {
dirname string
basename string
}

type fileSet struct {
files []fs.FileInfo
type fileset struct {
identifier identifier
files []fs.FileInfo
}

func (f *fileSet) add(info fs.FileInfo) {
f.files = append(f.files, info)
}

func (f *fileSet) dateTime(afs afero.Fs, dirname string) (time.Time, error) {
// for every file in the fileset attempt to create a time.Time
times := make(map[string]time.Time)
// dateTime attempts to create a time.Time for for every file in the fileset
func (f *fileset) dateTime(afs afero.Fs, ex Exif) (time.Time, error) {
var infos []fs.FileInfo
for i := range f.files {
info := f.files[i]
ext := strings.ToLower(filepath.Ext(info.Name()))
switch ext {
case ".jpg", ".jpeg", ".raf", ".dng", ".nef":
dt := &dateTimeExif{fs: afs, src: dirname, ext: ext, info: info}
t, err := dt.dateTime()
if err != nil {
return time.Time{}, err
}
times[ext] = t
case ".mp4", ".mov", ".avi":
// @todo(movies)
case ".orf":
// @todo(orf)
case "", ".xmp":
// not trustworthy for valid dates
default:
infos = append(infos, info)
}
}

// in priority order, find the first non-zero time.Time
for _, ext := range defaultImages() {
t, ok := times[ext]
if ok {
return t, nil
if len(infos) == 0 {
return time.Time{}, nil
}
var times []time.Time
mds := ex.Extract(afs, f.identifier.dirname, infos...)
for i := range mds {
if mds[i].Err != nil {
return time.Time{}, mds[i].Err
}
times = append(times, mds[i].DateTime)
}

// found no time
return time.Time{}, nil
}

type entangle struct {
source string
fileSet *fileSet
sort.SliceStable(times, func(i, j int) bool {
return times[i].Before(times[j])
})
// @todo(bzimmer) ensure dates are consistent (within a ~second or so)
return times[0], nil
}

type entangler struct {
fs afero.Fs
exif Exif
metrics *metrics.Metrics
concurrency int
dryrun bool
dateFormat string
}

func (c *entangler) cp(ctx context.Context, sources []string, destination string) error {
q := make(chan *entangle)
q := make(chan *fileset)
grp, ctx := errgroup.WithContext(ctx)
grp.Go(func() error {
defer close(q)
sets := make(map[string]map[string]*fileSet)
sets := make(map[identifier][]fs.FileInfo)
for i := range sources {
select {
case <-ctx.Done():
return ctx.Err()
default:
if err := afero.Walk(c.fs, sources[i], c.fileSets(sets)); err != nil {
if err := afero.Walk(c.fs, sources[i], c.filesets(sets)); err != nil {
return err
}
}
}
for dirname, filesets := range sets {
for _, fileset := range filesets {
select {
case <-ctx.Done():
return ctx.Err()
case q <- &entangle{source: dirname, fileSet: fileset}:
c.metrics.IncrCounter([]string{"cp", "filesets"}, 1)
}
for id, files := range sets {
select {
case <-ctx.Done():
return ctx.Err()
case q <- &fileset{identifier: id, files: files}:
c.metrics.IncrCounter([]string{"cp", "filesets"}, 1)
}
}
return nil
})
for i := 0; i < c.concurrency; i++ {
grp.Go(c.copyFileSet(q, destination))
grp.Go(c.copyFileset(q, destination))
}
return grp.Wait()
}

func (c *entangler) copyFileSet(q <-chan *entangle, destination string) func() error {
func (c *entangler) copyFileset(q <-chan *fileset, destination string) func() error {
return func() error {
for ent := range q {
for x := range q {
c.metrics.IncrCounter([]string{"cp", "fileset", "attempt"}, 1)
dt, err := ent.fileSet.dateTime(c.fs, ent.source)
dt, err := x.dateTime(c.fs, c.exif)
if err != nil {
c.metrics.IncrCounter([]string{"cp", "fileset", "failed", "exif"}, 1)
return err
}
if dt.IsZero() {
c.metrics.IncrCounter([]string{"cp", "fileset", "skip", "unsupported"}, 1)
for i := range ent.fileSet.files {
filename := ent.fileSet.files[i].Name()
for i := range x.files {
filename := filepath.Join(x.identifier.dirname, x.files[i].Name())
ext := filepath.Ext(filename)
ext = strings.TrimPrefix(ext, ".")
if ext == "" {
Expand All @@ -195,9 +141,9 @@ func (c *entangler) copyFileSet(q <-chan *entangle, destination string) func() e
continue
}
df := dt.Format(c.dateFormat)
for i := range ent.fileSet.files {
src := filepath.Join(ent.source, ent.fileSet.files[i].Name())
dst := filepath.Join(destination, df, ent.fileSet.files[i].Name())
for i := range x.files {
src := filepath.Join(x.identifier.dirname, x.files[i].Name())
dst := filepath.Join(destination, df, x.files[i].Name())
if err := c.copyFile(src, dst); err != nil {
return err
}
Expand Down Expand Up @@ -265,8 +211,8 @@ func (c *entangler) copyFile(source, destination string) error {
return nil
}

// fileSets creates fileSets from a directory traversal
func (c *entangler) fileSets(sets map[string]map[string]*fileSet) filepath.WalkFunc {
// filesets creates filesets from a directory traversal
func (c *entangler) filesets(sets map[identifier][]fs.FileInfo) filepath.WalkFunc {
return func(path string, info fs.FileInfo, err error) error {
if err != nil {
if errors.Is(err, fs.ErrPermission) {
Expand All @@ -286,18 +232,8 @@ func (c *entangler) fileSets(sets map[string]map[string]*fileSet) filepath.WalkF
}
c.metrics.IncrCounter([]string{"cp", "visited", "files"}, 1)

dirname, basename := split(path)
dirs, ok := sets[dirname]
if !ok {
dirs = make(map[string]*fileSet)
sets[dirname] = dirs
}
fileset, ok := dirs[basename]
if !ok {
fileset = new(fileSet)
dirs[basename] = fileset
}
fileset.add(info)
id := split(path)
sets[id] = append(sets[id], info)

return nil
}
Expand All @@ -314,6 +250,7 @@ func cp(c *cli.Context) error {
concurrency: c.Int("concurrency"),
dryrun: c.Bool("dryrun"),
dateFormat: c.String("format"),
exif: runtime(c).Exif,
}
args := c.Args().Slice()
destination, err := filepath.Abs(args[len(args)-1])
Expand All @@ -328,7 +265,7 @@ func CommandCopy() *cli.Command {
Name: "cp",
HelpName: "cp",
Usage: "copy files to a the directory structure of `--format`",
Description: "copy files from a source(s) to a destination using the Exif format to create the directory structure",
Description: "copy files from a source(s) to a destination using the image date to layout the directory structure",
ArgsUsage: "<file-or-directory> [, <file-or-directory>] <file-or-directory>",
Flags: []cli.Flag{
&cli.BoolFlag{
Expand Down
26 changes: 26 additions & 0 deletions cp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,32 @@ func TestCopy(t *testing.T) { //nolint
return nil
},
},
{
name: "two valid files",
args: []string{"ma", "cp", "/foo/bar", "/foo/baz"},
counters: map[string]int{
"ma.cp.visited.directories": 1,
"ma.cp.visited.files": 2,
},
before: func(c *cli.Context) error {
fp := createTestFile(t, runtime(c).Fs)
a.NoError(fp.Close())
fp, err := runtime(c).Fs.Create("/foo/bar/Nikon_D70_0.jpg")
a.NoError(err)
a.NoError(copyFile(fp, "testdata/Nikon_D70.jpg"))
a.NoError(fp.Close())
return nil
},
after: func(c *cli.Context) error {
stat, err := runtime(c).Fs.Stat("/foo/baz/2008/2008-03/15/Nikon_D70.jpg")
a.NoError(err)
a.NotNil(stat)
stat, err = runtime(c).Fs.Stat("/foo/baz/2008/2008-03/15/Nikon_D70_0.jpg")
a.NoError(err)
a.NotNil(stat)
return nil
},
},
{
name: "image + xmp dry-run",
args: []string{"ma", "cp", "-n", "/foo/bar", "/foo/baz"},
Expand Down
3 changes: 2 additions & 1 deletion docs/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ All your media archiving needs!
|```smugmug-access-token```||smugmug access token|
|```smugmug-token-secret```||smugmug token secret|
|```json```|```j```|encode all results as JSON and print to stdout|
|```monochrome```||disable colored output|
|```debug```||enable debugging of http requests|
|```help```|```h```|show help|

Expand Down Expand Up @@ -61,7 +62,7 @@ $ ma commands [flags]

**Description**

copy files from a source(s) to a destination using the Exif format to create the directory structure
copy files from a source(s) to a destination using the image date to layout the directory structure


**Syntax**
Expand Down
Loading

0 comments on commit 17c4133

Please sign in to comment.