# HG changeset patch # User David Bryant # Date 1303304836 -34200 # Node ID 94233d54e16a392e3f141321b3b71f97c2ea65de # Parent c566cdbccaeb3ad2b392611470710bcc69253ad9 Cleanup of dupes diff -r c566cdbccaeb -r 94233d54e16a doodle/utils/prog/dupes.d --- a/doodle/utils/prog/dupes.d Tue Apr 19 23:11:03 2011 +0930 +++ b/doodle/utils/prog/dupes.d Wed Apr 20 22:37:16 2011 +0930 @@ -1,18 +1,13 @@ import std.stdio; import std.string; import std.exception; -import std.random; -import std.algorithm; import std.file; -import std.c.stdio; -import std.c.string; -import std.cstream; -import core.sys.posix.dirent; import std.md5; - class DuplicateFinder { this(in string[] dirs) { + FileInfo[] _file_array; + writefln("Accumulating files"); string last_name; @@ -76,7 +71,7 @@ FileInfo file_info = _file_array[index]; try { - ubyte[16] digest = compute_md5(file_info.name); + ubyte[16] digest = compute_md5(file_info.name, MD5_AMOUNT); if (uint[] * duplicate_indices = (digest in digest_to_indices)) { // A true duplicate @@ -110,7 +105,23 @@ writefln("Done\n"); } - ubyte[16] compute_md5(in string name) { + struct FileInfo { + this(in string name_, in ulong size_) { + name = name_; + size = size_; + } + + string name; + ulong size; + } + + immutable ulong KILO = 1 << 10; + immutable ulong MEGA = 1 << 20; + + immutable ulong SIZE_THRESHOLD = 100 * KILO; + immutable ulong MD5_AMOUNT = 10 * KILO; + + static ubyte[16] compute_md5(in string name, in ulong max_bytes) { ubyte[16] digest; auto file = File(name, "r"); @@ -118,42 +129,19 @@ MD5_CTX context; context.start(); - { // Block 1: - // Compute the actual digest - ulong amount = 0; - foreach (ubyte[] buffer; chunks(file, 1024)) { - context.update(buffer); - //bytes_chewed(buffer.length); - amount += buffer.length; - if (amount >= MD5_AMOUNT) { - break; - } + ulong byte_count = 0; + foreach (ubyte[] buffer; chunks(file, 1024)) { + context.update(buffer); + byte_count += buffer.length; + if (byte_count >= max_bytes) { + break; } } + context.finish(digest); return digest; } - - private { - immutable ulong KILO = 1 << 10; - immutable ulong MEGA = 1 << 20; - - immutable ulong SIZE_THRESHOLD = 100 * KILO; - immutable ulong MD5_AMOUNT = 10 * KILO; - - struct FileInfo { - this(in string name_, in ulong size_) { - name = name_; - size = size_; - } - - string name; - ulong size; - }; - - FileInfo[] _file_array; - } } int main(string[] args) {