Saturday, January 26, 2008

C code for reading a CSV file

The world definitely needs another implementation of this.

There are a lot of places where it might be better (more efficient) to keep an extra copy of a value rather looking it up in the struct. I was originally writing this for someone else though and thought it was easier to understand as it is. Maybe I am wrong. Suggestions for improvement are always welcome.

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
int columns;
int rows;

double* data;
} Matrix;

void freeMatrix(Matrix* aMatrix)
{
if (aMatrix) {
free(aMatrix->data);
free(aMatrix);
}
}

int countChar(const char* str, char c)
{
int count = 0;
while (*str) {
if (*str == c) {
++count;
}
++str;
}
return count;
}

#define MAX_LINE 2000

/**
* input -- a stream of comma-separated numbers, rows terminated
* by new lines. The number of rows is determined from
* the first line read.
* naValue -- the value that will be used to replace
* fields left blank.
*
* Returns NULL if there is a problem reading from the stream.
* Check errno for details.
*/
Matrix* readCsv(FILE* input, double naValue) {
Matrix* result = 0;
char buffer[MAX_LINE];
char* line = fgets(buffer, MAX_LINE, input);

if (!line) {
return 0;
}
else {
result = malloc(sizeof(Matrix));
int total = 0;
int capacity = 0;

result->rows = 0;
result->columns = countChar(line, ',') + 1;
capacity = result->columns * 20;

result->data = malloc(sizeof(double) * capacity);

do {
int i;

if ((total + result->columns) > capacity) {
capacity *= 2;
result->data = realloc(result->data,
sizeof(double) * capacity);
}

for (i = 0; i < result->columns; ++i) {
if (!line || ',' == line[0]
|| '\n' == line[0] || '\r' == line[0]) {
result->data[total] = naValue;
}
else {
double value;

if (sscanf(line, "%lf", &value) == 1) {
result->data[total] = value;
}
else {
freeMatrix(result);
return 0;
}
}
++total;

if (line) {
line = index(line, ',');
if (line) {
++line;
}
}
}

++(result->rows);
line = fgets(buffer, MAX_LINE, input);
} while (line && ('\n' != line[0])
&& ('\r' != line[0]));
}

return result;
}

void printMatrix(FILE* out, const Matrix* aMatrix)
{
int i;
for (i = 0; i < aMatrix->rows; ++i) {
int j;
const int offset = i * aMatrix->columns;
for (j = 0; j < aMatrix->columns; ++j) {
fprintf(out, "%f ", aMatrix->data[offset + j]);
}
fprintf(out, "\n");
}
}

int main(int argc, char** argv)
{
if (argc == 2) {
FILE* in = fopen(argv[1], "r");
Matrix* m = readCsv(in, -HUGE_VAL);
fclose(in);

if (m) {
printMatrix(stdout, m);
freeMatrix(m);
}
else {
printf("problem reading %s\n", argv[1]);
}
}

return 0;
}

What is with Blogger clipping wide posts? Very helpful! As is inserting BR tags into a textarea. Use some of that magic DHTML to let people fold the sidebar out of the way. (Well, I suppose it's possible for me to add this myself. As newbie I shouldn't be complaining. But I think I am talking to myself now anyway. Hello, you, me!)

1 comment:

Unknown said...

Hello

what about of How use this code?, how compile? or how parameters use for run?

ThankÅ› for your time.