-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
141 lines (121 loc) · 3.66 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package main
import (
"flag"
"fmt"
"image"
_ "image/jpeg"
_ "image/png"
"os"
"path/filepath"
"sort"
"github.com/corona10/goimagehash"
)
type HashResult struct {
FilePath string
Hash *goimagehash.ImageHash
}
func calculateHash(filePaths chan string, hashCh chan<- HashResult) {
// Range to continously receive from the channel filePaths, looping until it is closed
for filePath := range filePaths {
file, err := os.Open(filePath)
if err != nil {
fmt.Printf("[!] Error opening file %s: %v\n", filePath, err)
return
}
defer file.Close()
img, _, err := image.Decode(file)
if err != nil {
fmt.Printf("[!] Error decoding image %s: %v\n", filePath, err)
return
}
hash, err := goimagehash.DifferenceHash(img)
if err != nil {
fmt.Printf("[!] Error calculating hash for %s: %v\n", filePath, err)
return
}
// Send perceptual hash result to the channel as a struct
hashCh <- HashResult{FilePath: filePath, Hash: hash}
}
}
func mvDups(dirPath string, maxWorkers int, quiet bool) error {
// Buffered channel - can hold the value of maxWorkers before the sender will block
filePaths := make(chan string, maxWorkers)
hashCh := make(chan HashResult)
// Create an empty map where keys are hash values (string) and values are slices of file paths ([]string)
imgHashes := make(map[string][]string)
imgPaths, err := getImgPaths(dirPath)
if err != nil {
return err
}
// Create worker goroutines as a resource pool
for i := 0; i < cap(filePaths); i++ {
go calculateHash(filePaths, hashCh)
}
// Send to the workers in a separate goroutine
// The result-gathering loop needs to start before more than maxWorkers items of work can continue
go func() {
for _, path := range imgPaths {
filePaths <- path
}
close(filePaths) // Close filePaths channel after sending all paths
}()
// Result-gathering loop that receives on the results channel until calculateHash goroutines are done
for range imgPaths {
result := <-hashCh
imgHashes[result.Hash.ToString()] = append(imgHashes[result.Hash.ToString()], result.FilePath)
}
close(hashCh)
// Process each group of hashed images
dupsDir := filepath.Join(dirPath, "hashed")
if err := os.Mkdir(dupsDir, os.ModePerm); err != nil && !os.IsExist(err) {
return err
}
for hash, paths := range imgHashes {
if len(paths) > 0 {
sort.Strings(paths)
// Use the hash of the first file as the folder name
hashDir := filepath.Join(dupsDir, hash[2:])
if err := os.Mkdir(hashDir, os.ModePerm); err != nil && !os.IsExist(err) {
return err
}
// Move similar to the hash-named folder
for _, p := range paths {
newPath := filepath.Join(hashDir, filepath.Base(p))
if !quiet {
fmt.Printf("[-] Moving: %s to %s\n", p, newPath)
}
if err := os.Rename(p, newPath); err != nil {
return err
}
}
}
}
return nil
}
func getImgPaths(dirPath string) ([]string, error) {
var imgPaths []string
err := filepath.Walk(dirPath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() && (filepath.Ext(info.Name()) == ".jpg" || filepath.Ext(info.Name()) == ".jpeg" || filepath.Ext(info.Name()) == ".png") {
imgPaths = append(imgPaths, path)
}
return nil
})
return imgPaths, err
}
func main() {
dir := flag.String("dir", "", "Images folder path")
quiet := flag.Bool("quiet", false, "If true, won't print the moved files (default false)")
maxWorkers := flag.Int("workers", 100, "Number of workers to run concurrently")
flag.Parse()
if *dir == "" {
fmt.Printf("Usage: %s -dir <folder-path>\n", os.Args[0])
os.Exit(1)
}
err := mvDups(*dir, *maxWorkers, *quiet)
if err != nil {
fmt.Printf("[!] Error: %v\n", err)
}
}