Commit d4fdff4e authored by bbguimaraes's avatar bbguimaraes
Browse files

21st_century_c: twelfth chapter

This chapter requires libapophenia. It can be found on
http://apophenia.info/. After downloading, extracting and compiling, the
Makefile has to be tweaked, adding the appropriate -I and -L flags to
CFLAGS and LDFLAGS (respectively).
parent e8d576de
CFLAGS = `pkg-config --cflags glib-2.0` `curl-config --cflags` -I../2 -I../9
CFLAGS += -I/usr/include/libxml2 -Wall -g -std=c11
LDFLAGS = `pkg-config --libs glib-2.0` `curl-config --libs` -pthread -lxml2
OBJECTS = mmap pthreads mutex_wc gsl_distance nyt_feed
all: $(OBJECTS)
pthreads: ../9/string_utilities.o
mutex_wc: ../9/string_utilities.o
../9/string_utilities.o:
make -C ../9 string_utilities.o
gsl_distance: gsl_distance.c
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -lapophenia
test:
./mmap
./pthreads pthreads.c mutex_wc.c
./mutex_wc pthreads.c mutex_wc.c
./gsl_distance
./nyt_feed
clean:
rm -f \
$(OBJECTS) mmapped.bin ../9/string_utilities.o \
now.html nytimes_feeds.rss
#include <apop.h>
double one_dist(gsl_vector * v1, void * v2) {
return apop_vector_distance(v1, v2);
}
long double distance(apop_data * data, apop_model * model) {
gsl_vector * target = model->parameters->vector;
return -apop_map_sum(data, .fn_vp=one_dist, .param=target, .part='r');
}
apop_model min_distance = {
.name="Minimum distance to a set of input points.",
.p=distance,
.vsize=-1
};
int main() {
apop_data * locations = apop_data_fill(
apop_data_alloc(5, 2),
1.1, 2.2,
4.8, 7.4,
2.9, 8.6,
-1.3, 3.7,
2.9, 1.1);
Apop_model_add_group(
&min_distance, apop_mle, .method="NM simplex", .tolerance=1e-5);
Apop_model_add_group(&min_distance, apop_parts_wanted);
apop_model * est = apop_estimate(locations, &min_distance);
apop_model_show(est);
}
#include <stdio.h>
#include <unistd.h> // lseek, write, close
#include <stdlib.h> // exit
#include <fcntl.h> // open
#include <sys/mman.h>
#include "stopif.h"
#define Mapmalloc(number, type, filename, fd) \
load_mmap((filename), &(fd), (number) * sizeof(type), 'y')
#define Mapload(number, type, filename, fd) \
load_mmap((filename), &(fd), (number) * sizeof(type), 'n')
#define Mapfree(number, type, fd, pointer) \
releasemmap((pointer), (number) * sizeof(type), (fd))
void * load_mmap(char const * filename, int * fd, size_t size, char make_room) {
*fd = open(
filename,
make_room == 'y' ? O_RDWR | O_CREAT | O_TRUNC : O_RDWR,
(mode_t) 0600);
Stopif(*fd == -1, return NULL, "Error opening file");
// Stretch the file size to the size of the (mmaped) array
if(make_room == 'y') {
int result = lseek(*fd, size - 1, SEEK_SET);
Stopif(
result == -1,
close(*fd); return NULL,
"Error stretching file with lseek");
result = write(*fd, "", 1);
Stopif(
result == -1,
close(*fd); return NULL,
"Error writing last byte of the file");
}
void * map = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
Stopif(map == MAP_FAILED, return NULL, "Error mapping the file");
return map;
}
int releasemmap(void * map, size_t size, int fd) {
Stopif(munmap(map, size) == -1, return -1, "Error un-mmapping the file");
close(fd);
return 0;
}
int main(int argc, char * argv[]) {
int fd;
long int N = 1e5 + 6;
int * map = Mapmalloc(N, int, "mmapped.bin", fd);
for(long int i = 0; i < N; ++i)
map[i] = i;
Mapfree(N, int , fd, map);
// Now reopen and do some counting.
int * readme = Mapload(N, int, "mmapped.bin", fd);
long long int oddsum = 0;
for(long int i = 0; i < N; ++i)
if(readme[i] % 2)
oddsum += i;
printf("The sum of odd numbers up to %li: %lli\n", N, oddsum);
Mapfree(N, int, fd, readme);
}
#include "string_utilities.h"
#include <pthread.h>
#include <glib.h> // mutexes
long int global_wc;
typedef struct {
int wc;
char * docname;
} wc_struct;
void * wc(void * voidin) {
wc_struct * in = voidin;
char * doc = string_from_file(in->docname);
if(!doc)
return NULL;
static GMutex count_lock;
char * delimiters = " `~!@#$%^&*()_-+={[]}|\\;:\",<>./?\n\t";
ok_array * words = ok_array_new(doc, delimiters);
if(!words)
return NULL;
in->wc = words->length;
ok_array_free(words);
g_mutex_lock(&count_lock);
for(int i = 0; i < in->wc; ++i)
++global_wc; // a slow global_wc += in->wc;
g_mutex_unlock(&count_lock);
return NULL;
}
int main(int argc, char ** argv) {
argc--;
argv++; // step past the name of the program.
pthread_t threads[argc];
wc_struct s[argc];
for(int i = 0; i < argc; ++i) {
s[i] = (wc_struct) { .docname=argv[i] };
pthread_create(&threads[i], NULL, wc, &s[i]);
}
for(int i = 0; i < argc; ++i)
pthread_join(threads[i], NULL);
for(int i = 0; i < argc; i++)
printf("%s:\t%i\n", argv[i], s[i].wc);
printf("The total: %li\n", global_wc);
}
/**
* \file
* A program to read in the NYT's headline feed and produce a simple HTML page
* from the headlines.
*/
#include <stdio.h>
#include <curl/curl.h>
#include <libxml2/libxml/xpath.h>
#include "stopif.h"
/**
* \mainpage
* The front page of the Grey Lady's web site is as gaudy as can be, including
* several headlines and sections trying to get your attention, various
* formatting schemes, and even photographs--in<em>color</em>.
*
* This program reads in the NYT Headlines RSS feed, and writes a simple list in
* plain HTML. You can then click through to the headline that modestly piques
* your attention.
*
* For notes on compilation, see the \ref compilation page.
*/
/**
* \page compilation Compiling the program
*
* Save the following code to \c makefile.
*
* Notice that cURL has a program, \c curl-config, that behaves like \c
* pkg-config, but is cURL-specific.
*
* \code
* CFLAGS = -g -Wall -O3 `curl-config --cflags` -I/usr/include/libxml2
* LDLIBS = `curl-config --libs` -lxml2 -lpthread
* CC=c99
*
* nyt_feed:
* \endcode
*
* Having saved your makefile, use <tt>make nyt_feed</tt> to compile.
*
* Of course, you have to have the development packages for libcurl and libxml2
* installed for this to work.
*/
// These have in-line Doxygen documentation. The < points to the prior text
// being documented.
char * rss_url = "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml";
/**< The URL for an NYT RSS. */
char * rssfile = "nytimes_feeds.rss"; /**< A local file to write the RSS to. */
char * outfile = "now.html"; /**< The output file to open in your browser. */
/**
* Print a list of headlines in HTML format to the outfile, which is
* overwritten.
*
* \param urls The list of urls. This should have been tested for non-NULLness.
* \param titles The list of titles, also pre-tested to be non-NULL. If the
* length of the \c urls list or the \c titles list is \c NULL, this will crash.
*/
void print_to_html(xmlXPathObjectPtr urls, xmlXPathObjectPtr titles) {
FILE * f = fopen(outfile, "w");
for(int i = 0; i < titles->nodesetval->nodeNr; ++i)
fprintf(
f, "<a href=\"%s\">%s</a><br>\n",
xmlNodeGetContent(urls->nodesetval->nodeTab[i]),
xmlNodeGetContent(titles->nodesetval->nodeTab[i]));
fclose(f);
}
/**
* Parse an RSS feed on the hard drive. This will parse the XML, then find all
* node matching the XPath for the title elements and all nodes matching the
* XPath for the links. Then, it will write those to the outfile.
*
* \param infile The RSS file in.
*/
int parse(char const * infile) {
const xmlChar * titlepath = (xmlChar *) "//item/title";
const xmlChar * linkpath = (xmlChar *) "//item/link";
xmlDocPtr doc = xmlParseFile(infile);
Stopif(!doc, return -1, "Error: unable to parse file \"%s\"\n", infile);
xmlXPathContextPtr context = xmlXPathNewContext(doc);
Stopif(!context, return -2, "Error: unable to create new XPath context\n");
xmlXPathObjectPtr titles = xmlXPathEvalExpression(titlepath, context);
xmlXPathObjectPtr urls = xmlXPathEvalExpression(linkpath, context);
Stopif(
!titles || !urls,
return -3,
"Either the Xpath '//item/title' or '//item/link' failed.");
print_to_html(urls, titles);
xmlXPathFreeObject(titles);
xmlXPathFreeObject(urls);
xmlXPathFreeContext(context);
xmlFreeDoc(doc);
return 0;
}
/**
* Use cURL's easy interface to download the current RSS feed.
*
* \param url The URL of the NY Times RSS feed. Any of the ones listed at \url
* http://www.nytimes.com/services/xml/rss/nyt/ should work.
* \param outfile The headline file to write to your hard drive. First save the
* RSS feed to this location, then overwrite it with the short list of links.
* \return 1 == OK, 0 == failure.
*/
int get_rss(char const * url, char const * outfile) {
FILE * feedfile = fopen(outfile, "w");
if(!feedfile)
return -1;
CURL * curl = curl_easy_init();
if(!curl)
return -1;
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, feedfile);
CURLcode res = curl_easy_perform(curl);
if(res)
return -1;
curl_easy_cleanup(curl);
fclose(feedfile);
return 0;
}
int main(void) {
Stopif(
get_rss(rss_url, rssfile),
return 1,
"failed to download %s to %s.\n", rss_url, rssfile);
parse(rssfile);
printf(
"Write headlines to %s. Have a look at it in your browser.\n",
outfile);
}
#include "stopif.h"
#include "string_utilities.h"
#include <pthread.h>
typedef struct {
int wc;
char * docname;
} wc_struct;
void * wc(void * voidin) {
wc_struct * in = voidin;
char * doc = string_from_file(in->docname);
if(!doc)
return NULL; // in->wc remains zero.
char * delimiters = " `~!@#$%^&*()_-+={[]}|\\;:\",<>./?\n";
ok_array * words = ok_array_new(doc, delimiters);
if(!words)
return NULL;
in->wc = words->length;
ok_array_free(words);
return NULL;
}
int main(int argc, char ** argv) {
argc--;
argv++;
Stopif(!argc, return 0, "Please give some file names on the command line.");
pthread_t threads[argc];
wc_struct s[argc];
for(int i = 0; i < argc; ++i) {
s[i] = (wc_struct) { .docname=argv[i] };
pthread_create(&threads[i], NULL, wc, &s[i]);
}
for(int i = 0; i < argc; ++i)
pthread_join(threads[i], NULL);
for(int i = 0; i < argc; ++i)
printf("%s:\t%i\n", argv[i], s[i].wc);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment