annotate doodle/utils/prog/duplicates.d @ 114:b87e2e0a046a

Cleanup of duplicates.d
author David Bryant <bagnose@gmail.com>
date Fri, 15 Apr 2011 11:07:47 +0930
parents 9cc6c428fdbe
children d7330cc52622
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
1 import std.stdio;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
2 import std.string;
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
3 import std.exception;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
4 import std.algorithm;
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
5 import std.file;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
6 import std.c.stdio;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
7 import std.c.string;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
8 import std.cstream;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
9 import core.sys.posix.dirent;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
10 import std.md5;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
11
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
12 // This program recursively processes files in a list
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
13 // of directories, computing an MD5 digest on each file
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
14 // and then informing the user of files with duplicate content.
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
15 // Only duplicate files over a certain size are reported.
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
16
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
17 class DuplicateFinder {
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
18 this(in string[] dirs) {
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
19 // First pass to gather the number of files and bytes
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
20 // so that we are able to convey progress to the user
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
21
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
22 writeln("Accumulating total bytes / files");
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
23
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
24 uint total_files = 0;
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
25
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
26 try {
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
27 foreach (string dir; dirs) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
28 foreach (string name; dirEntries(dir, SpanMode.depth, false)) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
29 try {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
30 if (isFile(name)) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
31 _total_bytes += getSize(name);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
32 ++total_files;
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
33 }
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
34 }
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
35 catch (Exception ex) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
36 writefln("Skipping %s", name);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
37 //writefln("Exception %s", ex);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
38 // TODO accumulate errors and print after traversal
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
39 }
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
40 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
41 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
42 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
43 catch (FileException ex) {
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
44 // ignore
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
45 writefln("dirEntries bailed out. Continuing anyway");
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
46 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
47
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
48 writefln("Files %s, bytes %s", total_files, _total_bytes);
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
49
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
50 // Go through the files again, but this time
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
51 // compute the MD5 digests and build our data structures
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
52
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
53 writeln("Accumulating MD5 digests");
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
54
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
55 foreach (string dir; dirs) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
56 foreach (string name; dirEntries(dir, SpanMode.depth, false)) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
57 if (isFile(name)) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
58 try {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
59 //writefln("MD5'ing %s", name);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
60 compute_md5(name);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
61 }
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
62 catch (ErrnoException ex) {
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
63 //writefln("Skipping file: %s, %s", name, ex);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
64 //writefln("(errno) Skipping file: %s", name);
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
65 // TODO accumulate errors and print after traversal is complete
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
66 }
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
67 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
68 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
69 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
70
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
71 writefln("");
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
72
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
73 // Sort our duplicate digests by size so that we print
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
74 // the biggest duplicate file offenders first
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
75
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
76 writeln("Sorting duplicate digests by size");
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
77
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
78 ubyte[16][] keys = _duplicate_digests.keys;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
79 bool compare_by_size(const ref ubyte[16] a, const ref ubyte[16] b) { return _file_info_map[a].size > _file_info_map[b].size; }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
80 sort!(compare_by_size)(keys);
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
81
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
82 // Print the results out the user, in descending order
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
83 // of file size
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
84
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
85 writeln("Printing results");
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
86
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
87 foreach (digest; keys) {
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
88 auto file_info = _file_info_map[digest];
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
89 /*
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
90 writefln("Size %s, Count %s, Digest %s",
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
91 file_info.size, file_info.names.length, digestToString(digest));
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
92 */
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
93 writefln("Size %s, Count %s", file_info.size, file_info.names.length);
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
94 foreach (name; file_info.names) {
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
95 writefln("\t%s", name);
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
96 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
97 }
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
98
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
99 writeln("Done");
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
100 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
101
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
102 private {
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
103 struct FileInfo {
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
104 this(in ulong size_, string first_name) {
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
105 size = size_;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
106 names ~= first_name;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
107 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
108
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
109 ulong size;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
110 string[] names;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
111 };
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
113 //static const ulong SIZE_THRESHOLD = 1_000;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
114 static const ulong SIZE_THRESHOLD = 0;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
115
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
116 bool[ubyte[16]] _duplicate_digests; // set of all duplicate digests
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
117 FileInfo[ubyte[16]] _file_info_map; // map of digest to file info
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
118
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
119 ulong _total_bytes;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
120 ulong _current_byte;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
121 double _last_progress = -1.0;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
122
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
123 void bytes_chewed(ulong bytes) {
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
124 _current_byte += bytes;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
125 double progress = cast(double)_current_byte / cast(double)_total_bytes;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
126 if (progress - _last_progress > 0.0005) {
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
127 writef("\rProgress %.1f%%", 100.0 * progress);
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
128 std.stdio.stdout.flush();
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
129 _last_progress = progress;
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
130 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
131 }
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
132
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
133 void compute_md5(in string filename) {
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
134 auto file = File(filename, "r");
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
135 scope(exit) file.close;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
136
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
137 ubyte[16] digest;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
138
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
139 MD5_CTX context;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
140 context.start();
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
141 foreach (ubyte[] buffer; chunks(file, 4096 * 1024)) {
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
142 bytes_chewed(buffer.length);
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
143 context.update(buffer);
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
144 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
145 context.finish(digest);
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
146
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
147 if (FileInfo * file_info = (digest in _file_info_map)) {
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
148 // This is a duplicate digest, append the subsequent name
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
149 file_info.names ~= filename;
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
150
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
151 // Record the duplicate as an offender if its size exceeds the threshold
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
152 if (file_info.size >= SIZE_THRESHOLD) {
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
153 _duplicate_digests[digest] = true;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
154 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
155 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
156 else {
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
157 // We have not seen this digest before
113
9cc6c428fdbe Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents: 112
diff changeset
158 _file_info_map[digest] = FileInfo(getSize(filename), filename);
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
159 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
160 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
161 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
162 }
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
163
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
164 int main(string[] args) {
114
b87e2e0a046a Cleanup of duplicates.d
David Bryant <bagnose@gmail.com>
parents: 113
diff changeset
165 new DuplicateFinder(args[1..$]);
112
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
166
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
167 return 0;
b569d7d5064f Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
168 }