feat: add basic csv loading

This commit is contained in:
2024-08-21 21:33:21 +02:00
parent 3d582893aa
commit 0969a81104
4 changed files with 364 additions and 21 deletions

96
src/csv.zig Normal file
View File

@@ -0,0 +1,96 @@
const std = @import("std");
const CsvError = error{
NoDelimiterFound,
};
fn contains(arr: []const u8, target: u8) bool {
for (arr) |element| {
if (element == target) {
return true;
}
}
return false;
}
pub fn determineDelimiter(str: []const u8) !u8 {
const possibleDelimiter = []u8{ ',', ';', '\t', '|' };
var countMap = std.AutoHashMap(u8, u32);
for (possibleDelimiter) |del| {
countMap.put(del, 0);
}
for (str) |c| {
if (!contains(possibleDelimiter, c)) {
continue;
}
const current = countMap.get(c);
countMap.put(c, current + 1);
}
var currDel: u8 = ' ';
var highest: u32 = 0;
for (countMap.keyIterator()) |key| {
const val = try countMap.get(key);
if (val > highest) {
currDel = key;
highest = val;
}
}
if (currDel == ' ') {
return CsvError.NoDelimiterFound;
}
return currDel;
}
pub fn loadFile(filepath: []const u8) !void {
const alloc = std.heap.page_allocator;
var file = try std.fs.cwd().openFile(filepath, .{});
defer file.close();
var buf_reader = std.io.bufferedReader(file.reader());
var in_stream = buf_reader.reader();
var buffer: [4096]u8 = undefined;
var readHeader = false;
var headerList = undefined;
var entries = std.ArrayList(std.ArrayList([]const u8)).init(alloc);
var delimiter = ' ';
while (try in_stream.readUntilDelimiterOrEof(&buffer, '\n')) |line| {
if (delimiter == ' ') {
delimiter = determineDelimiter(line);
}
var entr = std.ArrayList([]const u8).init(alloc);
const splitIt = std.mem.split(u8, line, delimiter);
for (splitIt) |part| {
_ = try entr.append(part);
}
if (!readHeader) {
headerList = entr;
readHeader = true;
continue;
}
_ = try entries.append(entr);
}
}
test "Determine delimiter" {
const del = try determineDelimiter("this,is,a,test");
std.testing.expect(del == ',');
const del2 = try determineDelimiter("th#is; is,a; test; with,many;symbols");
std.testing.expect(del2 == ';');
determineDelimiter("This does not have an delimiter") catch |err| {
try std.testing.expect(err == CsvError.NoDelimiterFound);
};
}

View File

@@ -1,24 +1,28 @@
const std = @import("std");
const csv = @import("csv.zig");
pub fn main() !void {
// Prints to stderr (it's a shortcut based on `std.io.getStdErr()`)
std.debug.print("All your {s} are belong to us.\n", .{"codebase"});
// stdout is for the actual output of your application, for example if you
// are implementing gzip, then only the compressed bytes should be sent to
// stdout, not any debugging messages.
const stdout_file = std.io.getStdOut().writer();
var bw = std.io.bufferedWriter(stdout_file);
const stdout = bw.writer();
const alloc = std.heap.page_allocator;
try stdout.print("Run `zig build test` to run the tests.\n", .{});
var args = try std.process.ArgIterator.initWithAllocator(alloc);
_ = args.next();
try bw.flush(); // don't forget to flush!
}
test "simple test" {
var list = std.ArrayList(i32).init(std.testing.allocator);
defer list.deinit(); // try commenting this out and see if zig detects the memory leak!
try list.append(42);
try std.testing.expectEqual(@as(i32, 42), list.pop());
var filepath: [:0]const u8 = "";
if (args.next()) |value| {
filepath = value;
}
if (std.mem.eql(u8, filepath, "")) {
_ = try stdout.write("No file specified");
_ = try bw.flush();
return;
}
try csv.loadFile(filepath);
_ = try bw.flush();
}

View File

@@ -1,10 +1,7 @@
const std = @import("std");
const csv = @import("csv.zig");
const testing = std.testing;
export fn add(a: i32, b: i32) i32 {
return a + b;
}
test "basic add functionality" {
try testing.expect(add(3, 7) == 10);
test {
testing.refAllDecls(@This());
}