Mercurial > projects > doodle
annotate doodle/utils/prog/duplicates.d @ 113:9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
no longer needed. Still blows the hood on memory usage.
author | David Bryant <bagnose@gmail.com> |
---|---|
date | Thu, 14 Apr 2011 19:10:46 +0930 |
parents | b569d7d5064f |
children | b87e2e0a046a |
rev | line source |
---|---|
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
1 import std.stdio; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
2 import std.string; |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
3 import std.exception; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
4 import std.algorithm; |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
5 import std.file; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
6 import std.c.stdio; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
7 import std.c.string; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
8 import std.cstream; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
9 import core.sys.posix.dirent; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
10 import std.md5; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
11 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
12 class DuplicateFinder { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
13 this(in string dir) { |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
14 // First pass to gather the number of files and bytes |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
15 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
16 writeln("Accumulating total bytes / files"); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
17 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
18 uint total_files = 0; |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
19 |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
20 try { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
21 foreach (string name; dirEntries(dir, SpanMode.depth, false)) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
22 try { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
23 if (isFile(name)) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
24 _total_bytes += getSize(name); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
25 ++total_files; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
26 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
27 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
28 catch (Exception ex) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
29 writefln("Skipping %s", name); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
30 //writefln("Exception %s", ex); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
31 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
32 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
33 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
34 catch (FileException ex) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
35 // ignore |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
36 writefln("dirEntries bailed out. Continuing anyway"); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
37 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
38 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
39 writefln("Files %s, bytes %s", total_files, _total_bytes); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
40 writeln("Accumulating MD5 sums"); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
41 |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
42 foreach (string name; dirEntries(dir, SpanMode.depth, false)) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
43 if (isFile(name)) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
44 try { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
45 //writefln("MD5'ing %s", name); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
46 compute_md5(name); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
47 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
48 catch (ErrnoException ex) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
49 //writefln("Skipping file: %s, %s", name, ex); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
50 //writefln("(errno) Skipping file: %s", name); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
51 // TODO accumulate errors and print after traversal is complete |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
52 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
53 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
54 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
55 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
56 writefln(""); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
57 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
58 writeln("Sorting keys"); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
59 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
60 ubyte[16][] keys = _duplicate_digests.keys; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
61 bool compare_by_size(const ref ubyte[16] a, const ref ubyte[16] b) { return _file_info_map[a].size > _file_info_map[b].size; } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
62 sort!(compare_by_size)(keys); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
63 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
64 writeln("Printing results"); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
65 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
66 foreach (digest; keys) { |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
67 auto file_info = _file_info_map[digest]; |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
68 /* |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
69 writefln("Size %s, Count %s, Digest %s", |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
70 file_info.size, file_info.names.length, digestToString(digest)); |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
71 */ |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
72 writefln("Size %s, Count %s", file_info.size, file_info.names.length); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
73 foreach (name; file_info.names) { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
74 writefln("\t%s", name); |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
75 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
76 } |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
77 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
78 writeln("Done"); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
79 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
80 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
81 private { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
82 struct FileInfo { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
83 this(in ulong size_, string first_name) { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
84 size = size_; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
85 names ~= first_name; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
86 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
87 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
88 ulong size; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
89 string[] names; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
90 }; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
91 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
92 //static const ulong SIZE_THRESHOLD = 1_000; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
93 static const ulong SIZE_THRESHOLD = 0; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
94 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
95 bool[ubyte[16]] _duplicate_digests; // set of all duplicate digests |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
96 FileInfo[ubyte[16]] _file_info_map; // map of digest to file info |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
97 |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
98 ulong _total_bytes; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
99 ulong _current_byte; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
100 double _last_progress = -1.0; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
101 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
102 void bytes_chewed(ulong bytes) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
103 _current_byte += bytes; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
104 double progress = cast(double)_current_byte / cast(double)_total_bytes; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
105 if (progress - _last_progress > 0.0005) { |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
106 writef("\rProgress %3.1f%%", 100.0 * progress); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
107 std.stdio.stdout.flush(); |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
108 _last_progress = progress; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
109 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
110 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
111 } |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
112 |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
113 void compute_md5(in string filename) { |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
114 //writefln("%s", filename); |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
115 auto file = File(filename, "r"); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
116 scope(exit) file.close; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
117 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
118 ubyte[16] digest; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
119 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
120 MD5_CTX context; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
121 context.start(); |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
122 foreach (ubyte[] buffer; chunks(file, 4096 * 1024)) { |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
123 bytes_chewed(buffer.length); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
124 context.update(buffer); |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
125 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
126 context.finish(digest); |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
127 //writefln("%s: %s", digestToString(digest), filename); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
128 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
129 if (FileInfo * file_info = (digest in _file_info_map)) { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
130 // duplicate |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
131 file_info.names ~= filename; |
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
132 assert(file_info.names.length > 1); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
133 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
134 if (file_info.size >= SIZE_THRESHOLD) { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
135 _duplicate_digests[digest] = true; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
136 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
137 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
138 else { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
139 // unseen |
113
9cc6c428fdbe
Rewrote duplicates.d based on dirEntries. Removed all idup/dup calls
David Bryant <bagnose@gmail.com>
parents:
112
diff
changeset
|
140 _file_info_map[digest] = FileInfo(getSize(filename), filename); |
112
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
141 //writefln("%s", _file_info_map.length); |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
142 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
143 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
144 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
145 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
146 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
147 int main(string[] args) { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
148 foreach (string arg; args[1..$]) { |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
149 new DuplicateFinder(arg); |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
150 } |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
151 |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
152 return 0; |
b569d7d5064f
Added some utilities that are a work in progress.
David Bryant <bagnose@gmail.com>
parents:
diff
changeset
|
153 } |