Friday, April 11, 2008

weighted random sample in C

/**
* Sample compile:
* gcc -Wall -lRmath weighted_sample.c -I/Library/Frameworks/R.framework/Resources/include -L/Library/Frameworks/R.framework/Resources/lib
* $ ./a.out
mean: 0.409
*/

#define MATHLIB_STANDALONE
#include <Rmath.h>

#include <stdio.h>
#include <stdlib.h>

/**
* Compute a cumulative sum of y, ie,
* result[j] = sum(i=0:j) y[i].
* The caller is responsible for freeing the returned
* object.
*/
double* normedCumSum(const double* y, int len)
{
double* result = malloc(len * sizeof(double));
if (len > 0) {
double norm;
int i;

result[0] = y[0];

for (i = 1; i < len; ++i) {
result[i] = result[i - 1] + y[i];
}

norm = 1.0 / result[len - 1];
for (i = 0; i < len; ++i) {
result[i] *= norm;
}
}

return result;
}

/**
* Sample reps elements from y (with replacement) with the given
* weights w. The caller is responsible for freeing the returned
* object.
*/
double* sample(const double* y, const double* w, int len, int reps)
{
double* result = malloc(reps * sizeof(double));
double* probs = normedCumSum(w, len);
int i;

for (i = 0; i < reps; ++i) {
int j;

const double u = runif(0.0, 1.0);
for (j = 0; j < len; ++j) {
if (u <= probs[j]) {
result[i] = y[j];
break;
}
}
}

free(probs);

return result;
}

int main()
{
double y[2] = {0, 1};
double w[2] = {0.6, 0.4};
double* x = sample(y, w, 2, 1000);
double sum = 0.0;
int i;
for (i = 0; i < 1000; ++i) {
sum += x[i];
}
printf("mean: %g\n", sum / 1000);

return 0;
}
Could be quite a bit more efficient. It should work though.

No comments: