查找目录中的重复文件

这是我的第一个 Go 程序。我正在学习这门语言,但理解所有概念有点困难,所以为了练习我写了一个代码来检测相同的文件。这是一个简单的程序,可以递归地检查目录中的重复文件。

但:

如何检测目录文件中的重复文件

问题不是递归目录。问题是如何比较


阿波罗的战车
浏览 114回答 2
2回答

繁花如伊

您可以获取每个文件主体的哈希值,然后比较字典/映射中的哈希值。package mainimport (    "crypto/md5"    "fmt"    "io"    "io/ioutil"    "log"    "os")func main() {    contentHashes := make(map[string]string)    if err := readDir("./", contentHashes); err != nil {        log.Fatal(err)    }}func readDir(dirName string, contentHashes map[string]string) (err error) {    filesInfos, err := ioutil.ReadDir(dirName)    if err != nil {        return    }    for _, fi := range filesInfos {        if fi.IsDir() {            err := readDir(dirName+fi.Name()+"/", contentHashes)            if err != nil {                return err            }        } else {            // The important bits for this question            location := dirName + fi.Name()            // open the file            f, err := os.Open(location)            if err != nil {                return err            }            h := md5.New()            // copy the file body into the hash function            if _, err := io.Copy(h, f); err != nil {                return err            }            // Check if a file body with the same hash already exists            key := fmt.Sprintf("%x", h.Sum(nil))            if val, exists := contentHashes[key]; exists {                fmt.Println("Duplicate found", val, location)            } else {                contentHashes[key] = location            }        }    }    return}

肥皂起泡泡

使用 sha256 比较文件例子:package mainimport (&nbsp; &nbsp; "crypto/sha256"&nbsp; &nbsp; "encoding/hex"&nbsp; &nbsp; "fmt"&nbsp; &nbsp; "os"&nbsp; &nbsp; "path/filepath"&nbsp; &nbsp; "sync"&nbsp; &nbsp; "flag"&nbsp; &nbsp; "runtime"&nbsp; &nbsp; "io")var dir stringvar workers inttype Result struct {&nbsp; &nbsp; file&nbsp; &nbsp;string&nbsp; &nbsp; sha256 [32]byte}func worker(input chan string, results chan<- *Result, wg *sync.WaitGroup) {&nbsp; &nbsp; for file := range input {&nbsp; &nbsp; &nbsp; &nbsp; var h = sha256.New()&nbsp; &nbsp; &nbsp; &nbsp; var sum [32]byte&nbsp; &nbsp; &nbsp; &nbsp; f, err := os.Open(file)&nbsp; &nbsp; &nbsp; &nbsp; if err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; fmt.Fprintln(os.Stderr, err)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; continue&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; if _, err = io.Copy(h, f); err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; fmt.Fprintln(os.Stderr, err)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; f.Close()&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; continue&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; f.Close()&nbsp; &nbsp; &nbsp; &nbsp; copy(sum[:], h.Sum(nil))&nbsp; &nbsp; &nbsp; &nbsp; results <- &Result{&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; file:&nbsp; &nbsp;file,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; sha256: sum,&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }&nbsp; &nbsp; wg.Done()}func search(input chan string) {&nbsp; &nbsp; filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {&nbsp; &nbsp; &nbsp; &nbsp; if err != nil {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; fmt.Fprintln(os.Stderr, err)&nbsp; &nbsp; &nbsp; &nbsp; } else if info.Mode().IsRegular() {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; input <- path&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; return nil&nbsp; &nbsp; })&nbsp; &nbsp; close(input)}func main() {&nbsp; &nbsp; flag.StringVar(&dir, "dir", ".", "directory to search")&nbsp; &nbsp; flag.IntVar(&workers, "workers", runtime.NumCPU(), "number of workers")&nbsp; &nbsp; flag.Parse()&nbsp; &nbsp; fmt.Printf("Searching in %s using %d workers...\n", dir, workers)&nbsp; &nbsp; input := make(chan string)&nbsp; &nbsp; results := make(chan *Result)&nbsp; &nbsp; wg := sync.WaitGroup{}&nbsp; &nbsp; wg.Add(workers)&nbsp; &nbsp; for i := 0; i < workers; i++ {&nbsp; &nbsp; &nbsp; &nbsp; go worker(input, results, &wg)&nbsp; &nbsp; }&nbsp; &nbsp; go search(input)&nbsp; &nbsp; go func() {&nbsp; &nbsp; &nbsp; &nbsp; wg.Wait()&nbsp; &nbsp; &nbsp; &nbsp; close(results)&nbsp; &nbsp; }()&nbsp; &nbsp; counter := make(map[[32]byte][]string)&nbsp; &nbsp; for result := range results {&nbsp; &nbsp; &nbsp; &nbsp; counter[result.sha256] = append(counter[result.sha256], result.file)&nbsp; &nbsp; }&nbsp; &nbsp; for sha, files := range counter {&nbsp; &nbsp; &nbsp; &nbsp; if len(files) > 1 {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; fmt.Printf("Found %d duplicates for %s: \n", len(files), hex.EncodeToString(sha[:]))&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; for _, f := range files {&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; fmt.Println("-> ", f)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; &nbsp; &nbsp; }&nbsp; &nbsp; }}
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Go