Files
duplicate-finder/main.zig
hypercube 71eeee87a9 fixed stupid bug
fixed incorrectly outputting to stdout and stderr causing incorrect output when redirecting into a file
2026-05-24 18:41:47 +00:00

114 lines
4.2 KiB
Zig

//! using zig-0.15.2
//! zig build-exe main.zig -fsingle-threaded -OReleaseFast
//! ReleaseFast is recomended but do whatever you want
//! usage: main dirname1 dirname2 ...
//! found duplicates in stdout
//! report in stderr
//! if it doesn't work fuck you
const std = @import("std");
const File = struct {
path: []u8,
size: u64,
solved: bool = false,
};
fn cmp_file(path1: []const u8, path2: []const u8) !bool {
var buf1: [1024 * 16]u8 = undefined;
var buf2: [1024 * 16]u8 = undefined;
const file1 = try std.fs.openFileAbsolute(path1, .{});
defer file1.close();
var f1reader = file1.reader(&buf1);
const file2 = try std.fs.openFileAbsolute(path2, .{});
var f2reader = file2.reader(&buf2);
defer file2.close();
while (true) {
var contents1: [1024 * 16]u8 = undefined;
var contents2: [1024 * 16]u8 = undefined;
const n1 = try f1reader.interface.readSliceShort(&contents1);
const n2 = try f2reader.interface.readSliceShort(&contents2);
if (n1 != n2) return false;
if (!std.mem.eql(u8, contents1[0..n1], contents2[0..n2])) return false;
if (n1 < contents1.len) {
return true;
}
}
}
pub fn main() !void {
var stdout_buf: [1024 * 16]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buf);
const stdout = &stdout_writer.interface;
var stderr_buf: [1024 * 16]u8 = undefined;
var stderr_writer = std.fs.File.stderr().writer(&stderr_buf);
const stderr = &stderr_writer.interface;
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
var files_list: std.ArrayList(File) = .empty;
defer files_list.deinit(allocator);
var args_it = std.process.args();
_ = args_it.next(); // skip program name
var file_count: usize = 0;
var dup_count: usize = 0;
const start_time = try std.time.Instant.now();
while (args_it.next()) |arg| {
const dir = try std.fs.cwd().openDir(arg, .{ .iterate = true });
var walker = try dir.walk(allocator);
defer walker.deinit();
while (try walker.next()) |entry| {
if (entry.kind == .file) {
file_count += 1;
try files_list.append(allocator, .{
.path = try entry.dir.realpathAlloc(allocator, entry.basename),
.size = (try entry.dir.statFile(entry.basename)).size,
});
}
}
}
for (files_list.items, 0..) |*item, idx| {
if (item.solved) continue;
var duplicates: std.ArrayList(usize) = .empty;
defer duplicates.deinit(allocator);
for (files_list.items, 0..) |*item2, idx2| {
if (item2.solved) continue;
if (std.mem.eql(u8, item.path, item2.path)) continue;
if (item.size == item2.size) {
if (try cmp_file(item.path, item2.path)) {
if (duplicates.items.len == 0) {
try duplicates.append(allocator, idx);
item.solved = true;
}
try duplicates.append(allocator, idx2);
dup_count += 1;
item2.solved = true;
}
}
}
if (duplicates.items.len > 0) {
for (duplicates.items, 0..) |dup_idx, i| {
if (i == 0) {
try stdout.print("duplicate ({}): \n {s}\n", .{ duplicates.items.len, files_list.items[duplicates.items[0]].path });
} else if (i == duplicates.items.len - 1) {
try stdout.print(" {s}\n", .{files_list.items[dup_idx].path});
} else {
try stdout.print(" {s}\n", .{files_list.items[dup_idx].path});
}
try stdout.flush();
}
}
}
for (files_list.items) |*item| allocator.free(item.path);
const elapsed_seconds = @as(f64, @floatFromInt((try std.time.Instant.now()).since(start_time))) / std.time.ns_per_s;
try stderr.print("found {} duplicates in {} files in {:.2}s\n", .{ dup_count, file_count, elapsed_seconds });
try stderr.flush();
}