projects/doodle: doodle/utils/prog/duplicates.d comparison

comparison doodle/utils/prog/duplicates.d @ 115:d7330cc52622

Added instructions to duplicates.d on the smallest changes required to trigger/untrigger the memory blowout. Interestingly the blowout only occurs when compiled with -m32, not -m64.

author	David Bryant <bagnose@gmail.com>
date	Sat, 16 Apr 2011 19:48:33 +0930
parents	b87e2e0a046a
children	31c27f4f3bbc

comparison

equal deleted inserted replaced

-:b87e2e0a046a
+:d7330cc52622
 import std.stdio;
 import std.string;
 import std.exception;
+import std.random;
 import std.algorithm;
 import std.file;
 import std.c.stdio;
 import std.c.string;
 import std.cstream;
 writeln("Accumulating MD5 digests");
 foreach (string dir; dirs) {
 foreach (string name; dirEntries(dir, SpanMode.depth, false)) {
-if (isFile(name)) {
+try {
-try {
+if (isFile(name)) {
 //writefln("MD5'ing %s", name);
-compute_md5(name);
+compute_md5(name, getSize(name));
 }
-catch (ErrnoException ex) {
+}
-//writefln("Skipping file: %s, %s", name, ex);
+catch (FileException ex) {
-//writefln("(errno) Skipping file: %s", name);
+writefln("Skipping %s", name);
-// TODO accumulate errors and print after traversal is complete
+}
-}
+catch (ErrnoException ex) {
+//writefln("Skipping file: %s, %s", name, ex);
+//writefln("(errno) Skipping file: %s", name);
+// TODO accumulate errors and print after traversal is complete
 }
 }
 }
 writefln("");
 // Print the results out the user, in descending order
 // of file size
 writeln("Printing results");
+writefln("Number of duplicate files: %s", _duplicate_digests.length);
 foreach (digest; keys) {
 auto file_info = _file_info_map[digest];
 /*
 writefln("Size %s, Count %s, Digest %s",
 ulong               _total_bytes;
 ulong               _current_byte;
 double              _last_progress = -1.0;
-void bytes_chewed(ulong bytes) {
+void compute_md5(in string filename, in ulong size) {
-_current_byte += bytes;
+void bytes_chewed(ulong bytes) {
-double progress = cast(double)_current_byte / cast(double)_total_bytes;
+_current_byte += bytes;
-if (progress - _last_progress > 0.0005) {
+double progress = cast(double)_current_byte / cast(double)_total_bytes;
-writef("\rProgress %.1f%%", 100.0 * progress);
+if (progress - _last_progress > 0.0005) {
-std.stdio.stdout.flush();
+writef("\rProgress %.1f%%", 100.0 * progress);
-_last_progress = progress;
+std.stdio.stdout.flush();
-}
+_last_progress = progress;
 }
+}
-void compute_md5(in string filename) {
-auto file = File(filename, "r");
-scope(exit) file.close;
 ubyte[16] digest;
-MD5_CTX context;
+// If Block 1 and Block 2 are both uncommented then there is a memory explosion.
-context.start();
+// However, if either one is commented out there there isn't...
-foreach (ubyte[] buffer; chunks(file, 4096 * 1024)) {
-bytes_chewed(buffer.length);
+{
-context.update(buffer);
+auto file = File(filename, "r");
-}
+scope(exit) file.close;
-context.finish(digest);
+MD5_CTX context;
-if (FileInfo * file_info = (digest in _file_info_map)) {
+context.start();
-// This is a duplicate digest, append the subsequent name
+{ // Block 1:
-file_info.names ~= filename;
+// Compute the actual digest
+foreach (ubyte[] buffer; chunks(file, 4096 * 1024)) {
-// Record the duplicate as an offender if its size exceeds the threshold
+context.update(buffer);
-if (file_info.size >= SIZE_THRESHOLD) {
+bytes_chewed(buffer.length);
-_duplicate_digests[digest] = true;
+}
 }
-}
+context.finish(digest);
-else {
-// We have not seen this digest before
+/+
-_file_info_map[digest] = FileInfo(getSize(filename), filename);
+{ // Block 1 alternative:
-}
+// Create a random digest
+digest = make_random_digest;
+bytes_chewed(size);
+}
++/
+}
+{ // Block 2:
+// Update the data structures
+if (FileInfo * file_info = (digest in _file_info_map)) {
+// This is a duplicate digest, append the subsequent name
+file_info.names ~= filename;
+// Record the duplicate as an offender if its size exceeds the threshold
+if (file_info.size >= SIZE_THRESHOLD) {
+_duplicate_digests[digest] = true;
+}
+}
+else {
+// We have not seen this digest before
+_file_info_map[digest] = FileInfo(size, filename);
+}
+}
+}
+ubyte[16] make_random_digest() {
+ubyte[16] digest;
+foreach (ref a; digest) {
+a = cast(ubyte)uniform(0, 256);
+}
+return digest;
 }
 }
 }
 int main(string[] args) {

Mercurial > projects > doodle

comparison doodle/utils/prog/duplicates.d @ 115:d7330cc52622