changeset 118:94233d54e16a

Cleanup of dupes
author David Bryant <bagnose@gmail.com>
date Wed, 20 Apr 2011 22:37:16 +0930
parents c566cdbccaeb
children 8343c1dafac6
files doodle/utils/prog/dupes.d
diffstat 1 files changed, 27 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/doodle/utils/prog/dupes.d	Tue Apr 19 23:11:03 2011 +0930
+++ b/doodle/utils/prog/dupes.d	Wed Apr 20 22:37:16 2011 +0930
@@ -1,18 +1,13 @@
 import std.stdio;
 import std.string;
 import std.exception;
-import std.random;
-import std.algorithm;
 import std.file;
-import std.c.stdio;
-import std.c.string;
-import std.cstream;
-import core.sys.posix.dirent;
 import std.md5;
 
-
 class DuplicateFinder {
     this(in string[] dirs) {
+        FileInfo[] _file_array;
+
         writefln("Accumulating files");
 
         string last_name;
@@ -76,7 +71,7 @@
                 FileInfo file_info = _file_array[index];
 
                 try {
-                    ubyte[16] digest = compute_md5(file_info.name);
+                    ubyte[16] digest = compute_md5(file_info.name, MD5_AMOUNT);
 
                     if (uint[] * duplicate_indices = (digest in digest_to_indices)) {
                         // A true duplicate
@@ -110,7 +105,23 @@
         writefln("Done\n");
     }
 
-    ubyte[16] compute_md5(in string name) {
+    struct FileInfo {
+        this(in string name_, in ulong size_) {
+            name = name_;
+            size = size_;
+        }
+
+        string name;
+        ulong  size;
+    }
+
+    immutable ulong KILO = 1 << 10;
+    immutable ulong MEGA = 1 << 20;
+
+    immutable ulong SIZE_THRESHOLD = 100 * KILO;
+    immutable ulong MD5_AMOUNT     = 10 * KILO;
+
+    static ubyte[16] compute_md5(in string name, in ulong max_bytes) {
         ubyte[16] digest;
 
         auto file = File(name, "r");
@@ -118,42 +129,19 @@
 
         MD5_CTX context;
         context.start();
-        { // Block 1:
-            // Compute the actual digest
-            ulong amount = 0;
-            foreach (ubyte[] buffer; chunks(file, 1024)) {
-                context.update(buffer);
-                //bytes_chewed(buffer.length);
-                amount += buffer.length;
-                if (amount >= MD5_AMOUNT) {
-                    break;
-                }
+        ulong byte_count = 0;
+        foreach (ubyte[] buffer; chunks(file, 1024)) {
+            context.update(buffer);
+            byte_count += buffer.length;
+            if (byte_count >= max_bytes) {
+                break;
             }
         }
+
         context.finish(digest);
 
         return digest;
     }
-
-    private {
-        immutable ulong KILO = 1 << 10;
-        immutable ulong MEGA = 1 << 20;
-
-        immutable ulong SIZE_THRESHOLD = 100 * KILO;
-        immutable ulong MD5_AMOUNT     = 10 * KILO;
-
-        struct FileInfo {
-            this(in string name_, in ulong size_) {
-                name = name_;
-                size = size_;
-            }
-
-            string name;
-            ulong  size;
-        };
-
-        FileInfo[] _file_array;
-    }
 }
 
 int main(string[] args) {