Mercurial > projects > doodle
view doodle/utils/prog/duplicates.d @ 112:b569d7d5064f
Added some utilities that are a work in progress.
author | David Bryant <bagnose@gmail.com> |
---|---|
date | Thu, 14 Apr 2011 11:27:17 +0930 |
parents | |
children | 9cc6c428fdbe |
line wrap: on
line source
import std.stdio; import std.string; import std.file; import std.c.stdio; import std.c.string; import std.cstream; import core.sys.posix.dirent; import std.md5; class DuplicateFinder { this(in string dir) { recurse_directory(dir.dup); writefln("\n"); foreach (digest; _duplicate_digests.keys) { writefln("%s", digestToString(digest)); auto file_info = _file_info_map[digest]; writefln("Size %s, Count %s, Digest %s", file_info.size, file_info.names.length, digestToString(digest)); foreach (name; file_info.names) { writefln("\t%s", name); } } } private { struct FileInfo { this(in ulong size_, string first_name) { size = size_; names ~= first_name; } ulong size; string[] names; }; //static const ulong SIZE_THRESHOLD = 1_000; static const ulong SIZE_THRESHOLD = 0; bool[ubyte[16]] _duplicate_digests; // set of all duplicate digests FileInfo[ubyte[16]] _file_info_map; // map of digest to file info void compute_md5(in char[] filename, in ulong filesize) { //writefln("%s", filename); auto file = File(filename.idup, "r"); scope(exit) file.close; ubyte[16] digest; MD5_CTX context; context.start(); foreach (ubyte[] buffer; chunks(file, 4096 * 1024)) { context.update(buffer); } context.finish(digest); writefln("%s: %s", digestToString(digest), filename); if (FileInfo * file_info = (digest in _file_info_map)) { // duplicate file_info.names ~= filename.idup; if (file_info.size >= SIZE_THRESHOLD) { _duplicate_digests[digest] = true; } } else { // unseen _duplicate_digests[digest] = true; _file_info_map[digest] = FileInfo(filesize, filename.idup); //writefln("%s", _file_info_map.length); } } bool entry_callback(DirEntry * de) { //writefln("File: %s", de.name); if (de.isdir) { recurse_directory(de.name); } else if (de.isfile) { compute_md5(de.name, de.size); } return true; } void recurse_directory(in char[] dirname) { //writefln("Dir: %s", dirname); try { listdir(dirname, &entry_callback); } catch (FileException ex) { //writefln("Skipping: %s", dirname); } } } } int main(string[] args) { foreach (string arg; args[1..$]) { new DuplicateFinder(arg); } return 0; }