Commit 8f227f95 authored by bbguimaraes's avatar bbguimaraes
Browse files

21st_century_c: eleventh chapter

_Generic, extending structs, dict, substring, agents.
parent 0603af00
CFLAGS = -Wall -g -std=gnu11
CFLAGS = `pkg-config --cflags gsl` `pkg-config --cflags glib-2.0` -I../9
CFLAGS += -Wall -g -std=c11 -fms-extensions
LDLIBS = `pkg-config --libs gsl` `pkg-config --libs glib-2.0`
all: dict.a keyval.o dict.o
OBJECTS = dict.a simple_cplx seamlessone seamlesstwo dict_use cetology groupabm
all: $(OBJECTS)
dict.a: keyval.o dict.o
ar rcs -o $@ $^
dict_use: keyval.o dict.o
simple_cplx: complex.o
moby:
curl http://www.gutenberg.org/cache/epub/2701/pg2701.txt > moby
cetology: fstr.o ../9/string_utilities.o
../9/string_utilities.o:
make -C ../9/string_utilities.o
groupabm: groupabm.c groups.o apop.c
test: moby
./simple_cplx
./seamlessone
./seamlesstwo
./dict_use
./cetology
clean:
rm -f dict.a keyval.o dict.o
rm -f \
$(OBJECTS) keyval.o dict.o complex.o fstr.o moby \
../9/string_utilities.o groupabm.c groups.c groups.o
#include "apop.h"
void apop_vector_print(gsl_vector *data) {
if(!data)
printf("NULL\n");
else {
for(size_t i = 0; i < data->size; i++) {
if(data->data[i] == (int) data->data[i])
printf("% 5i", (int) data->data[i]);
else
printf("% 5f", data->data[i]);
if(i < data->size - 1)
printf(", ");
}
printf("\n");
}
}
gsl_vector *apop_vector_copy(const gsl_vector *in) {
if(!in)
return NULL;
gsl_vector *out = gsl_vector_alloc(in->size);
if(!out)
return NULL;
gsl_vector_memcpy(out, in);
return out;
}
double apop_vector_distance(
const gsl_vector *ina,
const gsl_vector *inb,
const char metric,
const double norm) {
double dist = 0;
for (size_t i=0; i< ina->size; i++)
dist += pow(
fabs(gsl_vector_get(ina, i) - gsl_vector_get(inb, i)),
norm);
return pow(dist, 1./norm);
}
gsl_rng *apop_rng_alloc(int seed) {
static int first_use = 1;
if (first_use) {
first_use = 0;
gsl_rng_env_setup();
}
gsl_rng *setme = gsl_rng_alloc(gsl_rng_taus2);
gsl_rng_set(setme, seed);
return setme;
}
#include <math.h>
#include <gsl/gsl_nan.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_sys.h>
#include <gsl/gsl_vector.h>
void apop_vector_print(gsl_vector *data);
gsl_vector *apop_vector_copy(const gsl_vector *in);
double apop_vector_distance(
const gsl_vector *ina, const gsl_vector *inb,
const char metric, const double norm);
gsl_rng *apop_rng_alloc(int seed);
#include "fstr.h"
int main() {
fstr_s * fstr = fstr_new("moby");
fstr_list chapters = fstr_split(fstr, "\nCHAPTER");
for(int i = 0; i < chapters.count; ++i) {
fstr_list for_the_title = fstr_split(chapters.strings[i], "\\.");
fstr_show(for_the_title.strings[1]);
fstr_list me = fstr_split(chapters.strings[i], "\\WI\\W");;
fstr_list whales = fstr_split(chapters.strings[i], "whale(s|)");;
fstr_list words = fstr_split(chapters.strings[i], "\\W");;
printf(
"\nch %i, words: %i.\t Is: %i\t whales: %i\n",
i, words.count - 1, me.count - 1, whales.count - 1);
fstr_list_free(for_the_title);
fstr_list_free(me);
fstr_list_free(whales);
fstr_list_free(words);
}
fstr_list_free(chapters);
fstr_free(fstr);
}
#include "cplx.h"
#include <gsl/gsl_blas.h> // gsl_blas_ddot
#include <gsl/gsl_complex_math.h> // gsl_complex_mul(_real)
gsl_vector_complex * cvec_dot_gslcplx(gsl_vector_complex * v, gsl_complex x) {
gsl_vector_complex * out = gsl_vector_complex_alloc(v->size);
for(int i = 0; i < v->size; ++i)
gsl_vector_complex_set(
out, i, gsl_complex_mul(x, gsl_vector_complex_get(v, i)));
return out;
}
gsl_vector_complex * vec_dot_gslcplx(gsl_vector * v, gsl_complex x) {
gsl_vector_complex * out = gsl_vector_complex_alloc(v->size);
for(int i = 0; i < v->size; ++i)
gsl_vector_complex_set(
out, i, gsl_complex_mul_real(x, gsl_vector_get(v, i)));
return out;
}
gsl_vector_complex * cvec_dot_c(gsl_vector_complex * v, complex double x) {
return cvec_dot_gslcplx(v, gsl_cplx_from_c99(x));
}
gsl_vector_complex * vec_dot_c(gsl_vector * v, complex double x) {
return vec_dot_gslcplx(v, gsl_cplx_from_c99(x));
}
complex double ddot(complex double x, complex double y) { return x * y; }
void gsl_vector_complex_print(gsl_vector_complex * v) {
for(int i = 0; i < v->size; ++i) {
gsl_complex x = gsl_vector_complex_get(v, i);
printf(
"%4g + %4gi %c",
GSL_REAL(x),
GSL_IMAG(x),
(i < v->size - 1) ? '\t' : '\n');
}
}
#include <complex.h> // nice names for C's complex types
#include <gsl/gsl_vector.h> // gsl_vector_complex
gsl_vector_complex * cvec_dot_gslcplx(gsl_vector_complex * v, gsl_complex x);
gsl_vector_complex * vec_dot_gslcplx(gsl_vector * v, gsl_complex x);
gsl_vector_complex * cvec_dot_c(gsl_vector_complex * v, complex double x);
gsl_vector_complex * vec_dot_c(gsl_vector * v, complex double x);
void gsl_vector_complex_print(gsl_vector_complex * v);
#define gsl_cplx_from_c99(x) (gsl_complex) { .dat={ creal(x), cimag(x) } }
complex double ddot (complex double x, complex double y);
#define dot(x, y) _Generic( \
(x), \
gsl_vector *: dot_given_vec(y), \
gsl_vector_complex *: dot_given_cplx_vec(y), \
default: ddot)((x), (y))
#define dot_given_vec(y) _Generic( \
(y), \
gsl_complex: vec_dot_gslcplx, \
default: vec_dot_c)
#define dot_given_cplx_vec(y) _Generic( \
(y), \
gsl_complex: cvec_dot_gslcplx, \
default: cvec_dot_c)
#include "fstr.h"
#include "string_utilities.h"
fstr_s * fstr_new(char const * filename) {
fstr_s * out = malloc(sizeof(fstr_s));
*out = (fstr_s) { .start=0, .refs=malloc(sizeof(int)) };
out->data = string_from_file(filename);
out->end = out->data ? strlen(out->data) : 0;
*out->refs = 1;
return out;
}
/**
* Make a new fstr_s that is a substring of the input fstr_s.
* \param in The parent string.
* \param start The offset in the original string where the substring starts.
* \param len The length of the substring. If longer than the available text,
* the substring will only go to the end of the parent string.
*/
fstr_s * fstr_copy(fstr_s const * in, size_t start, size_t len) {
fstr_s * out = malloc(sizeof(fstr_s));
*out = *in;
out->start += start;
if(in->end > out->start + len)
out->end = out->start + len;
(*out->refs)++;
return out;
}
void fstr_free(fstr_s * in) {
(*in->refs)--;
if(!*in->refs) {
free(in->data);
free(in->refs);
}
free(in);
}
/**
* Split an input string into a sequence of substrings.
* \param in The input string to split.
* \param start_pattern The regex marking the beginning of a new substring.
* \return A list of substrings.
*/
fstr_list fstr_split(fstr_s const * in, gchar const * start_pattern) {
fstr_s ** out = malloc(sizeof(fstr_s *));
int outlen = 1;
out[0] = fstr_copy(in, 0, in->end);
GRegex * start_regex = g_regex_new(start_pattern, 0, 0, NULL);
gint mstart = 0, mend = 0;
fstr_s * remaining = fstr_copy(in, 0, in->end);
do {
GMatchInfo * start_info;
g_regex_match(
start_regex, &remaining->data[remaining->start], 0, &start_info);
g_match_info_fetch_pos(start_info, 0, &mstart, &mend);
g_match_info_free(start_info);
if(mend > 0 && mend < remaining->end - remaining->start) {
out = realloc(out, ++outlen * sizeof(fstr_s *));
out[outlen - 1] = fstr_copy(remaining, mend, remaining->end - mend);
out[outlen - 2]->end = remaining->start + mstart;
remaining->start += mend;
} else
break;
} while(1);
fstr_free(remaining);
g_regex_unref(start_regex);
return (fstr_list) { .strings=out, .count=outlen };
}
void fstr_list_free(fstr_list in) {
for(int i = 0; i < in.count; ++i)
fstr_free(in.strings[i]);
free(in.strings);
}
void fstr_show(fstr_s const * fstr) {
printf("%.*s", (int) (fstr->end - fstr->start), &fstr->data[fstr->start]);
}
#include <stdio.h>
#include <stdlib.h>
#include <glib.h>
typedef struct {
char * data;
size_t start, end;
int * refs;
} fstr_s;
fstr_s * fstr_new(char const * filename);
fstr_s * fstr_copy(fstr_s const * in, size_t start, size_t len);
void fstr_show(fstr_s const * fstr);
void fstr_free(fstr_s * in);
typedef struct {
fstr_s ** strings;
int count;
} fstr_list;
fstr_list fstr_split(fstr_s const * in, gchar const * start_pattern);
void fstr_list_free(fstr_list in);
@* Initializations.
@ This is the part of the agent-based model with the handlers for the |people|
structures and the procedure itself.
At this point all the interface with the groups happens via the
new/join/exit/print functions from |groups.cweb.c|. This, there is zero memory
management code in this file--the reference counting guarantees us that when the
last member exits a group, the group will be freed.
@c
#include "groups.h"
int pop=2000,
periods=200,
dimension=2;
@ In |main|, we'll initialize a few constants that we can't have as static
variables because they require math.
@<set up more constants@>=
double new_group_odds = 1./pop,
mass_benefit = .7/pop;
gsl_rng * r = apop_rng_alloc(1234);
@* The |person_s| structure.
@ The people in this simulation are pretty boring: they do not die, and do not
move. So the struct that represents them is simple, with just |position| and a
pointer to the group of which the agent is currently a member.
@c
typedef struct {
gsl_vector * position;
group_s * group;
} person_s;
@ The setup routine is also pretty boring, and really consists of allocating a
uniform random vector in two dimensions.
@c
person_s person_setup(gsl_rng * r) {
gsl_vector * posn = gsl_vector_alloc(dimension);
for(int i = 0; i < dimension; ++i)
gsl_vector_set(posn, i, 2 * gsl_rng_uniform(r) - 1);
return (person_s) { .position=posn };
}
@* Group membership.
@ At the outset of this function, the person leaves this group. Then, the
decision is only whether to form a new group or join an existing one.
@c
void check_membership(
person_s * p, gsl_rng * r, double mass_benefit,
double new_group_odds) {
group_exit(p->group, p->position);
p->group = (gsl_rng_uniform(r) < new_group_odds)
? @<form a new group@>
: @<join the closest group@>;
}
@
@<form a new group@>=
group_new(p->position)
@
@<join the closest group@>=
group_join(group_closest(p->position, mass_benefit), p->position)
@* Setting up.
@ The initialization of the population. Using CWEB's macros, it is at this point
self-documenting.
@c
void init(person_s * people, int pop, gsl_rng * r) {
@<position everybody@>
@<start with ten groups@>
@<everybody joins a group@>
}
@
@<position everybody@>=
for(int i = 0; i < pop; ++i)
people[i] = person_setup(r);
@ The first ten people in our list form new groups, but because everybody's
position is random, this is assigning the ten groups at random.
@<start with ten groups@>=
for(int i = 0; i < 10; ++i)
people[i].group = group_new(people[i].position);
@
@<everybody joins a group@>=
for(int i = 10; i < pop; ++i)
people[i].group = group_join(people[i % 10].group, people[i].position);
@* Plotting with Gnuplot.
@ This is the header for Gnuplot. I arrived at it by playing around on Gnuplot's
command line, the writing down my final picks for settings here.
@<print the Gnuplot header@>=
printf("unset key;set xrange [-1:1]\nset yrange [-1:1]\n");
@ Gnuplot animation simply consists of sending a sequence of plot statements.
@<plot one animation frame@>=
print_groups();
@* |main|.
@ The |main| routine consists of a few setup steps, and a simple loop: calculate
a new state, then plot it.
@c
int main() {
@<set up more constants@>
person_s people[pop];
init(people, pop, r);
@<print the Gnuplot header@>
for(int t = 0; t < periods; ++t) {
for(int i = 0; i < pop; ++i)
check_membership(&people[i], r, mass_benefit, new_group_odds);
@<plot one animation frame@>
}
}
#include <glib.h>
#include "apop.h"
typedef struct {
gsl_vector * position;
int id, size;
} group_s;
group_s * group_new(gsl_vector * position);
group_s * group_join(group_s * joinme, gsl_vector * position);
void group_exit(group_s * leaveme, gsl_vector * position);
group_s * group_closest(gsl_vector * position, double mb);
void print_groups();
@ Here in the introductory material, we include the header and specify the
global list of groups that the program makes use of. We'll need new/copy/free
functions for each group.
@c
#include "groups.h"
GList * group_list;
@<new group@>
@<copy group@>
@<free group@>
@ The new group method is pretty boilerplate: we |malloc| some space, fill the
struct using designated initializers, and append the newly-formed group to the
list.
@<new group@>=
group_s * group_new(gsl_vector * position) {
static int id = 0;
group_s * out = malloc(sizeof(group_s));
*out =
(group_s) { .position=apop_vector_copy(position), .id=++id, .size=1, };
group_list = g_list_append(group_list, out);
return out;
}
@ When an agent joins a group, the group is `copied' to the agent, but there
isn't any memory being copied: the group is simply modified to accomodate the
new person. We have to increment the reference count, which is easy enough, and
then modify the mean position. If the mean position without the $n$th person is
$P_{n-1}$, and the $n$th person is at position $p$, the the new mean position
with the person, $P_n$ is the weighted sum.
$$P_n = \left( (n-1)P_{n-1}/n \right) + p/n.$$
We calculate that for each dimension.
@<copy group@>=
group_s * group_join(group_s * joinme, gsl_vector * position) {
int n = ++joinme->size; // increment the reference count
for(int i = 0; i < joinme->position->size; ++i) {
joinme->position->data[i] *= (n - 1.) / n;
joinme->position->data[i] += position->data[i] / n;
}
return joinme;
}
@ The `free' function really only frees the group when the reference count is
zero. When it isn't, then we need to run the data-augmenting formula for the
mean in reverse to remove a person.
@<free group@>=
void group_exit(group_s * leaveme, gsl_vector * position) {
int n = leaveme->size--; // lower the reference count
for(int i = 0; i < leaveme->position->size; ++i) {
leaveme->position->data[i] -= position->data[i] / n;
leaveme->position->data[i] *= n / (n - 1.);
}
if(leaveme->size == 0) { // garbage collect?
gsl_vector_free(leaveme->position);
group_list = g_list_remove(group_list, leaveme);
free(leaveme);
}
}
@ I played around a lot with different rules for how exactly people evaluate the
distance to the groups. In the end, I wound up using the $L_3$ norm. The
standard distance is the $L_2$ norm, aka Euclidean distance, meaning that the
distance between $(x_1, y_1)$ and $(x_2, y_2)$ is
$\sqrt{(x_1-x_2)^2+(y_1-y_2^2}$. This is $L_3$,
${\sqrt[3]{(x_1-x_2)^3+(y_1-y_2)^3}$. This and the call to |apop_copy| above are
the only calls to the Apophenia library; you could write around them if you
don't have that library on hand.
@<distance@>=
apop_vector_distance(g->position, position, 'L', 3)
@ By `closest', I really mean the group that provides the greatest benefit, by
having the smallest distance minus weighted size. Given the utility function
represented by the |dist| line, this is just a simple |for| loop to find the
smallest distance.
@c
group_s * group_closest(gsl_vector * position, double mass_benefit) {
group_s * fave = NULL;
double smallest_dist = GSL_POSINF;
for(GList * gl = group_list; gl != NULL; gl = gl->next) {
group_s * g = gl->data;
double dist = @<distance@> - mass_benefit * g->size;
if(dist < smallest_dist) {
smallest_dist = dist;
fave = g;
}
}
return fave;
}
@ Gnuplot is automation-friendly. Here we get an animated simulation with four
lines of plotting code. The header |plot '-'| tells the system to plot the data
to follow, then we print the $(X, Y)$ positions, one to a line. The final |e|
indicates the end of the data set. The main program will set some initial
Gnuplot settings.
@c
void print_groups() {
printf("plot '-' with points pointtype 6\n");
for(GList * gl = group_list; gl != NULL; gl = gl->next) {
apop_vector_print(((group_s *) gl->data)->position);
}
printf("e\n");
}
#include <stdio.h>
#include <math.h>
typedef struct point {
double x, y;
} point;
typedef struct {
struct point;
double z;
} threepoint;
double threelength(threepoint p) {
return sqrt(p.x * p.x + p.y * p.y + p.z * p.z);
}
int main() {
threepoint p = { .x=3, .y=0, .z=4 };
printf("p is %g units from the origin\n", threelength(p));
}
#include <stdio.h>
#include <math.h>
typedef struct point {
double x, y;
} point;
typedef struct {
union {
struct point;
point p2;
};
double