annotate doodle/utils/prog/dupes.d @ 123:0d427170a805

Move to 64-bit
author David Bryant <bagnose@gmail.com>
date Wed, 04 May 2011 22:19:44 +0930
parents f1cf62339ed5
children 89016abde9fe
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
1 import std.stdio;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
2 import std.string;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
3 import std.exception;
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
4 import std.algorithm;
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
5 import std.file;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
6 import std.md5;
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
7 import std.getopt;
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
8 import std.conv;
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
9 import std.ctype;
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
10 import std.c.stdlib;
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
11
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
12 ulong string_to_size(string s) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
13 // Convert strings to sizes, eg:
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
14 // "50" -> 50
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
15 // "80B" -> 80
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
16 // "10K" -> 10240
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
17 // "1M" -> 1048576
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
18 // Throws ConvException
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
19
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
20 immutable map = [ 'B':1UL, 'K':1UL<<10, 'M':1UL<<20, 'G':1UL<<30, 'T':1UL<<40 ];
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
21
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
22 if (s.length == 0) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
23 throw new ConvException("Empty string");
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
24 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
25 else {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
26 ulong multiplier = 1;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
27
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
28 if (isalpha(s[$-1])) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
29 immutable ulong * m = (s[$-1] in map);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
30
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
31 if (m) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
32 multiplier = *m;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
33 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
34 else {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
35 throw new ConvException(format("Bad size unit character: %s", s[$-1]));
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
36 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
37
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
38 s = s[0..$-1];
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
39 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
40
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
41 return multiplier * to!ulong(s);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
42 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
43 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
44
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
45 string size_to_string(in ulong size) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
46 /+
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
47 immutable array = [ 'B', 'K', 'M', 'G', 'T' ];
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
48 size_t index = 0;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
49
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
50 foreach (i, c; array) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
51 if (size / (1UL << i
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
52
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
53 writefln("%s %s", i, c);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
54 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
55 +/
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
56
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
57 return format("%sK", size / 1024);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
58 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
59
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
60 void find_duplicates(in string[] dirs,
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
61 in ulong file_size,
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
62 in ulong digest_size,
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
63 bool verbose) {
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
64 static ubyte[16] compute_md5(in string filename, in ulong max_bytes) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
65 size_t chunk_size = min(max_bytes, 4096 * 1024);
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
66 ubyte[16] digest;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
67
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
68 auto file = File(filename, "r");
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
69 scope(exit) file.close;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
70
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
71 MD5_CTX context;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
72 context.start();
118
94233d54e16a Cleanup of dupes
David Bryant <bagnose@gmail.com>
parents: 117
diff changeset
73 ulong byte_count = 0;
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
74 foreach (ubyte[] buffer; chunks(file, chunk_size)) {
118
94233d54e16a Cleanup of dupes
David Bryant <bagnose@gmail.com>
parents: 117
diff changeset
75 context.update(buffer);
94233d54e16a Cleanup of dupes
David Bryant <bagnose@gmail.com>
parents: 117
diff changeset
76 byte_count += buffer.length;
94233d54e16a Cleanup of dupes
David Bryant <bagnose@gmail.com>
parents: 117
diff changeset
77 if (byte_count >= max_bytes) {
94233d54e16a Cleanup of dupes
David Bryant <bagnose@gmail.com>
parents: 117
diff changeset
78 break;
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
79 }
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
80 }
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
81 context.finish(digest);
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
82
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
83 return digest;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
84 }
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
85
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
86 struct FileInfo {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
87 string name;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
88 ulong size;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
89 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
90
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
91 FileInfo[] file_array;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
92
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
93 writefln("Accumulating file list");
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
94
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
95 foreach (string dir; dirs) {
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
96 if (isDir(dir)) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
97 string last_entry;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
98 try {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
99 foreach (string filename; dirEntries(dir, SpanMode.depth, false)) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
100 last_entry = filename;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
101 try {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
102 if (!isSymLink(filename) && isFile(filename)) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
103 ulong size = getSize(filename);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
104 if (size >= file_size) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
105 file_array ~= FileInfo(filename, size);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
106 }
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
107 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
108 }
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
109 catch (Exception ex) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
110 writefln("Skipping %s", filename);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
111 //writefln("Exception %s", ex);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
112 // TODO accumulate errors and print after traversal
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
113 }
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
114 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
115 }
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
116 catch (FileException ex) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
117 // ignore
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
118 writefln("Error, dirEntries bailed out after: %s. Continuing anyway", last_entry);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
119 }
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
120 }
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
121 else {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
122 writefln("Not a dir: %s", dir);
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
123 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
124 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
125
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
126 writefln("Processing %s files", file_array.length);
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
127
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
128 uint[][ulong] size_to_file_indices;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
129 bool[ulong] duplicate_sizes;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
130
123
0d427170a805 Move to 64-bit
David Bryant <bagnose@gmail.com>
parents: 121
diff changeset
131 foreach (uint index, file; file_array) {
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
132 //writefln("%s %s %s", index, file.name, file.size);
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
133
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
134 if (uint[] * indices = (file.size in size_to_file_indices)) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
135 if (indices.length == 1) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
136 // Second time we've seen a file of this size,
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
137 // record it in the duplicate_sizes array
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
138 duplicate_sizes[file.size] = true;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
139 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
140
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
141 (*indices) ~= index;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
142 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
143 else {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
144 size_to_file_indices[file.size] = [ index ];
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
145 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
146 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
147
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
148 writefln("Number of files of duplicate size %s", duplicate_sizes.length);
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
149
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
150 ulong total_waste = 0;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
151
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
152 foreach_reverse (size; duplicate_sizes.keys.sort) {
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
153 uint[] indices = size_to_file_indices[size];
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
154 //writefln("For size %s there are %s files", size, indices.length);
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
155
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
156 uint[][ubyte[16]] digest_to_indices;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
157
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
158 foreach (index; indices) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
159 const FileInfo file_info = file_array[index];
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
160
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
161 try {
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
162 ubyte[16] digest = compute_md5(file_info.name, digest_size);
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
163
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
164 if (uint[] * duplicate_indices = (digest in digest_to_indices)) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
165 // A true duplicate
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
166 // index and index2 are the same
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
167
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
168 (*duplicate_indices) ~= index;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
169 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
170 else {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
171 digest_to_indices[digest] ~= index;
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
172 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
173 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
174 catch (ErrnoException ex) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
175 //writefln("Skipping: %s", file_info.name);
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
176 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
177
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
178 //writefln("\t%s", file_info.name);
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
179 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
180
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
181 foreach (indices2; digest_to_indices) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
182 if (indices2.length > 1) {
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
183 // List the duplicates
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
184 foreach (i, index; indices) {
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
185 FileInfo file_info = file_array[index];
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
186 if (i == 0) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
187 writefln("%s", size_to_string(file_info.size));
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
188 total_waste += file_info.size;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
189 }
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
190 writefln(" %s", file_info.name);
119
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
191 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
192 writefln("");
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
193 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
194 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
195 }
8343c1dafac6 Make it compile under latest GtkD. Rewrote dupes.d as a single function
David Bryant <bagnose@gmail.com>
parents: 118
diff changeset
196
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
197 writefln("Done, total waste: %s", size_to_string(total_waste));
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
198 }
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
199
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
200 int main(string[] args) {
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
201 ulong file_size;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
202 ulong digest_size;
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
203 bool verbose;
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
204
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
205 try {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
206 void help(in string) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
207 writefln("Usage: dupes [OPTION]... DIR...\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
208 "Recursively locate duplicate files in a list of directories\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
209 "\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
210 "Options\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
211 " -d, --digest-size=SIZE size of digest used for comparison\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
212 " -f, --file-size=SIZE minimum size of files searched for duplication\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
213 " -v, --verbose be verbose\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
214 " --help display this help and exit\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
215 "\n"
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
216 "SIZE is an integer, optionally followed by K, M, G, T");
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
217 exit(1);
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
218 }
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
219
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
220 string file_size_string = "100K";
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
221 string digest_size_string = "100K";
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
222
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
223 getopt(args,
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
224 "file-size|f", &file_size_string,
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
225 "digest-size|d", &digest_size_string,
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
226 "verbose|v", &verbose,
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
227 "help", &help);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
228
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
229 file_size = string_to_size(file_size_string);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
230 digest_size = string_to_size(digest_size_string);
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
231 }
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
232 catch (ConvException ex) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
233 writefln("Conversion error: %s", ex);
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
234 exit(2);
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
235 }
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
236
121
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
237 if (verbose) {
f1cf62339ed5 More tweaking
David Bryant <bagnose@gmail.com>
parents: 120
diff changeset
238 writefln("file-size=%s, digest-size=%s", size_to_string(file_size), size_to_string(digest_size));
120
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
239 }
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
240
c275f26399c6 Tinkerings
David Bryant <bagnose@gmail.com>
parents: 119
diff changeset
241 find_duplicates(args[1..$], file_size, digest_size, verbose);
117
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
242
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
243 return 0;
c566cdbccaeb Added dupes, the rewrite.
David Bryant <bagnose@gmail.com>
parents:
diff changeset
244 }