From 9fa6f184f33a8bc30d4bf5d8c6ef2769af4e602d Mon Sep 17 00:00:00 2001 From: hypercube Date: Sun, 24 May 2026 13:08:31 +0000 Subject: [PATCH] Update main.zig --- main.zig | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 7 deletions(-) diff --git a/main.zig b/main.zig index 43946a7..e448f7c 100644 --- a/main.zig +++ b/main.zig @@ -1,13 +1,43 @@ // using zig-0.15.2 const std = @import("std"); +const sha256 = std.crypto.hash.sha2.Sha256; +const stdout = std.fs.File.stdout(); +const stderr = std.fs.File.stderr(); + +fn err_print(comptime fmt: []const u8, args: anytype) void { + var writer = stderr.writer(&.{}); + writer.interface.print(fmt, args) catch unreachable; +} + +fn out_print(comptime fmt: []const u8, args: anytype) void { + var writer = stdout.writer(&.{}); + writer.interface.print(fmt, args) catch unreachable; +} const File = struct { - path: [:0]u8, + path: []u8, size: u64, - duplicate_size: ?usize = null, + solved: bool = false, hash: ?u256 = null, }; +var hashcount: usize = 0; + +fn hashFile(f: std.fs.File, allocator: std.mem.Allocator) !u256 { + hashcount += 1; + var buf: [1024 * 32]u8 = undefined; + var reader = f.reader(&buf); + const file_contents = try reader.interface.allocRemaining(allocator, .unlimited); + defer allocator.free(file_contents); + var hash_buff: [sha256.digest_length]u8 = undefined; + sha256.hash(file_contents, &hash_buff, .{}); + return std.mem.bytesToValue(u256, &hash_buff); +} + +fn cmp_fn(_: u8, lhs: File, rhs: File) bool { + return lhs.size < rhs.size; +} + pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); @@ -17,29 +47,80 @@ pub fn main() !void { defer list.deinit(allocator); var args_it = std.process.args(); _ = args_it.next(); // skip program name + + var file_count: usize = 0; + const start_time = try std.time.Instant.now(); + while (args_it.next()) |arg| { const dir = try std.fs.cwd().openDir(arg, .{ .iterate = true }); var walker = try dir.walk(allocator); defer walker.deinit(); while (try walker.next()) |entry| { if (entry.kind == .file) { + file_count += 1; const stat = try entry.dir.statFile(entry.basename); + const path = try entry.dir.realpathAlloc(allocator, entry.basename); try list.append(allocator, .{ - .path = try allocator.dupeZ(u8, entry.path), + .path = path, .size = stat.size, }); } } } + std.mem.sort(File, list.items, @as(u8, 0), cmp_fn); + for (list.items) |*item| { + err_print("file: {any}\n", .{item.size}); + } + for (list.items, 0..) |*item, idx| { - for (list.items) |*item2| { + if (item.solved) continue; + var duplicates: std.ArrayList(usize) = .empty; + defer duplicates.deinit(allocator); + for (list.items, 0..) |*item2, idx2| { + if (item2.solved) continue; + if (std.mem.eql(u8, item.path, item2.path)) continue; + if (item.size == item2.size) { - item.duplicate_size = idx; - item2.duplicate_size = idx; + if (item.hash == null) { + const file = try std.fs.openFileAbsolute(item.path, .{}); + defer file.close(); + item.hash = try hashFile(file, allocator); + } + if (item2.hash == null) { + const file = try std.fs.openFileAbsolute(item2.path, .{}); + defer file.close(); + item2.hash = try hashFile(file, allocator); + } + if (item.hash.? == item2.hash.?) { + if (duplicates.items.len == 0) { + try duplicates.append(allocator, idx); + item.solved = true; + } + try duplicates.append(allocator, idx2); + item2.solved = true; + } } } - std.debug.print("path: {s}, size {}\n", .{ item.path, item.size }); + if (duplicates.items.len > 0) { + for (duplicates.items, 0..) |dup_idx, i| { + if (i == 0) { + out_print("duplicate ({}): \n {s}\n", .{ duplicates.items.len, list.items[duplicates.items[0]].path }); + } else if (i == duplicates.items.len - 1) { + out_print(" {s}\n", .{list.items[dup_idx].path}); + } else { + out_print(" {s}\n", .{list.items[dup_idx].path}); + } + } + } + } + + for (list.items) |*item| { allocator.free(item.path); } + + const end_time = try std.time.Instant.now(); + const elapsed_nanos = end_time.since(start_time); + const elapsed_seconds = @as(f64, @floatFromInt(elapsed_nanos)) / std.time.ns_per_s; + err_print("found {} files in {:.2}s and hashcount is {}\n", .{ file_count, elapsed_seconds, hashcount }); }