# HG changeset patch # User David Bryant # Date 1303375333 -34200 # Node ID 8343c1dafac6b1928fc62447b5eeb81b00651688 # Parent 94233d54e16a392e3f141321b3b71f97c2ea65de Make it compile under latest GtkD. Rewrote dupes.d as a single function diff -r 94233d54e16a -r 8343c1dafac6 doodle/gtk/palette.d --- a/doodle/gtk/palette.d Wed Apr 20 22:37:16 2011 +0930 +++ b/doodle/gtk/palette.d Thu Apr 21 18:12:13 2011 +0930 @@ -51,7 +51,7 @@ button.setTooltipText(item.tooltipText); _buttons[item.t] = button; - button.objectGSetDataFull(_indexStr, cast(gpointer)item.t); + button.setDataFull(_indexStr, cast(gpointer)item.t, null); button.addOnClicked(&onClicked); insert(button); diff -r 94233d54e16a -r 8343c1dafac6 doodle/utils/prog/dupes.d --- a/doodle/utils/prog/dupes.d Wed Apr 20 22:37:16 2011 +0930 +++ b/doodle/utils/prog/dupes.d Thu Apr 21 18:12:13 2011 +0930 @@ -4,117 +4,7 @@ import std.file; import std.md5; -class DuplicateFinder { - this(in string[] dirs) { - FileInfo[] _file_array; - - writefln("Accumulating files"); - - string last_name; - - foreach (string dir; dirs) { - try { - foreach (string name; dirEntries(dir, SpanMode.depth, false)) { - last_name = name; - try { - if (isFile(name)) { - ulong size = getSize(name); - if (size >= SIZE_THRESHOLD) { - _file_array ~= FileInfo(name, size); - } - } - } - catch (Exception ex) { - writefln("Skipping %s", name); - //writefln("Exception %s", ex); - // TODO accumulate errors and print after traversal - } - } - } - catch (FileException ex) { - // ignore - writefln("dirEntries bailed out (%s). Continuing anyway", last_name); - } - } - - writefln("Processing %s files", _file_array.length); - - uint[][ulong] size_to_file_indices; - bool[ulong] duplicate_sizes; - - foreach (index, file; _file_array) { - //writefln("%s %s %s", index, file.name, file.size); - - if (uint[] * indices = (file.size in size_to_file_indices)) { - if (indices.length == 1) { - // Second time we've seen a file of this size, - // record it in the duplicate_sizes array - duplicate_sizes[file.size] = true; - } - - (*indices) ~= index; - } - else { - size_to_file_indices[file.size] = [ index ]; - } - } - - writefln("Number of files of duplicate size %s", duplicate_sizes.length); - - foreach (size; duplicate_sizes.keys) { - uint[] indices = size_to_file_indices[size]; - //writefln("For size %s there are %s files", size, indices.length); - - uint[][ubyte[16]] digest_to_indices; - - foreach (index; indices) { - FileInfo file_info = _file_array[index]; - - try { - ubyte[16] digest = compute_md5(file_info.name, MD5_AMOUNT); - - if (uint[] * duplicate_indices = (digest in digest_to_indices)) { - // A true duplicate - // index and index2 are the same - - (*duplicate_indices) ~= index; - } - else { - digest_to_indices[digest] ~= index; - } - } - catch (ErrnoException ex) { - //writefln("Skipping: %s", file_info.name); - } - - //writefln("\t%s", file_info.name); - } - - foreach (indices2; digest_to_indices) { - if (indices2.length > 1) { - // List the duplicates - foreach (index; indices) { - FileInfo file_info = _file_array[index]; - writefln("%s %s", file_info.size, file_info.name); - } - writefln(""); - } - } - } - - writefln("Done\n"); - } - - struct FileInfo { - this(in string name_, in ulong size_) { - name = name_; - size = size_; - } - - string name; - ulong size; - } - +void find_duplicates(in string[] dirs) { immutable ulong KILO = 1 << 10; immutable ulong MEGA = 1 << 20; @@ -142,10 +32,113 @@ return digest; } + + struct FileInfo { + string name; + ulong size; + } + + FileInfo[] file_array; + + writefln("Accumulating file list"); + + string last_name; + + foreach (string dir; dirs) { + try { + foreach (string name; dirEntries(dir, SpanMode.depth, false)) { + last_name = name; + try { + if (!isSymLink(name) && isFile(name)) { + ulong size = getSize(name); + if (size >= SIZE_THRESHOLD) { + file_array ~= FileInfo(name, size); + } + } + } + catch (Exception ex) { + writefln("Skipping %s", name); + //writefln("Exception %s", ex); + // TODO accumulate errors and print after traversal + } + } + } + catch (FileException ex) { + // ignore + writefln("dirEntries bailed out (%s). Continuing anyway", last_name); + } + } + + writefln("Processing %s files", file_array.length); + + uint[][ulong] size_to_file_indices; + bool[ulong] duplicate_sizes; + + foreach (index, file; file_array) { + //writefln("%s %s %s", index, file.name, file.size); + + if (uint[] * indices = (file.size in size_to_file_indices)) { + if (indices.length == 1) { + // Second time we've seen a file of this size, + // record it in the duplicate_sizes array + duplicate_sizes[file.size] = true; + } + + (*indices) ~= index; + } + else { + size_to_file_indices[file.size] = [ index ]; + } + } + + writefln("Number of files of duplicate size %s", duplicate_sizes.length); + + foreach (size; duplicate_sizes.keys) { + uint[] indices = size_to_file_indices[size]; + //writefln("For size %s there are %s files", size, indices.length); + + uint[][ubyte[16]] digest_to_indices; + + foreach (index; indices) { + const FileInfo file_info = file_array[index]; + + try { + ubyte[16] digest = compute_md5(file_info.name, MD5_AMOUNT); + + if (uint[] * duplicate_indices = (digest in digest_to_indices)) { + // A true duplicate + // index and index2 are the same + + (*duplicate_indices) ~= index; + } + else { + digest_to_indices[digest] ~= index; + } + } + catch (ErrnoException ex) { + //writefln("Skipping: %s", file_info.name); + } + + //writefln("\t%s", file_info.name); + } + + foreach (indices2; digest_to_indices) { + if (indices2.length > 1) { + // List the duplicates + foreach (index; indices) { + FileInfo file_info = file_array[index]; + writefln("%s %s", file_info.size, file_info.name); + } + writefln(""); + } + } + } + + writefln("Done"); } int main(string[] args) { - new DuplicateFinder(args[1..$]); + find_duplicates(args[1..$]); return 0; } diff -r 94233d54e16a -r 8343c1dafac6 doodle/utils/prog/hash_test.d --- a/doodle/utils/prog/hash_test.d Wed Apr 20 22:37:16 2011 +0930 +++ b/doodle/utils/prog/hash_test.d Thu Apr 21 18:12:13 2011 +0930 @@ -24,8 +24,8 @@ */ aa[digest] = true; - if (count % 1000 == 0) { - writefln("%s %s", count, digest); + if (count % 10000 == 0) { + writefln("%s %3s", count, digest); } //writefln("%s %s", count, digest);