Mercurial > projects > doodle
changeset 116:31c27f4f3bbc
Not sure.
author | David Bryant <bagnose@gmail.com> |
---|---|
date | Sun, 17 Apr 2011 23:23:29 +0930 |
parents | d7330cc52622 |
children | c566cdbccaeb |
files | doodle/utils/prog/duplicates.d |
diffstat | 1 files changed, 19 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/doodle/utils/prog/duplicates.d Sat Apr 16 19:48:33 2011 +0930 +++ b/doodle/utils/prog/duplicates.d Sun Apr 17 23:23:29 2011 +0930 @@ -15,6 +15,14 @@ // and then informing the user of files with duplicate content. // Only duplicate files over a certain size are reported. +// Thoughts: +// Size threshold of files we care about: +// Accumulate array of all files above the size threshold. +// Only files that have matching sizes can possibly be duplicates. +// Hash them on size? +// Compute md5sum of first N bytes for files where there is more than +// one of the same size + class DuplicateFinder { this(in string[] dirs) { // First pass to gather the number of files and bytes @@ -116,8 +124,8 @@ string[] names; }; - //static const ulong SIZE_THRESHOLD = 1_000; - static const ulong SIZE_THRESHOLD = 0; + static const ulong SIZE_THRESHOLD = 100 * 1_024; + static const ulong AMOUNT_SUMMED = 100 * 1_024; bool[ubyte[16]] _duplicate_digests; // set of all duplicate digests FileInfo[ubyte[16]] _file_info_map; // map of digest to file info @@ -131,7 +139,7 @@ _current_byte += bytes; double progress = cast(double)_current_byte / cast(double)_total_bytes; if (progress - _last_progress > 0.0005) { - writef("\rProgress %.1f%%", 100.0 * progress); + writef("\rProgress %.1f%% %s ", 100.0 * progress, filename); std.stdio.stdout.flush(); _last_progress = progress; } @@ -150,10 +158,16 @@ context.start(); { // Block 1: // Compute the actual digest - foreach (ubyte[] buffer; chunks(file, 4096 * 1024)) { + ulong amount = 0; + foreach (ubyte[] buffer; chunks(file, 1024)) { context.update(buffer); - bytes_chewed(buffer.length); + //bytes_chewed(buffer.length); + amount += buffer.length; + if (amount >= AMOUNT_SUMMED) { + break; + } } + bytes_chewed(size); } context.finish(digest);