<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://thechange.wiki/index.php?action=history&amp;feed=atom&amp;title=Code%3Asr28-collate.c</id>
	<title>Code:sr28-collate.c - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://thechange.wiki/index.php?action=history&amp;feed=atom&amp;title=Code%3Asr28-collate.c"/>
	<link rel="alternate" type="text/html" href="https://thechange.wiki/index.php?title=Code:sr28-collate.c&amp;action=history"/>
	<updated>2026-04-22T15:18:32Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.41.0</generator>
	<entry>
		<id>https://thechange.wiki/index.php?title=Code:sr28-collate.c&amp;diff=273&amp;oldid=prev</id>
		<title>Elie: Created page with &quot;This program converts USDA nutrition data into an easier table to run SQL queries on. * Input: :File:sr28asc.zip| (must be unzipped in a subfolder &lt;code&gt;data/sr28asc/&lt;/code&gt;) * Output: :File:sr28-collated.csv|  &lt;syntaxhighlight lang=&quot;c&quot;&gt; // sr28-collate.c // This program converts USDA nutrition data into an easier table to run SQL queries on. // Input: sr28asc.zip (USDA Standard Reference [SR Legacy] ascii version 28) // Output: sr28-collated.csv // Instructions:...&quot;</title>
		<link rel="alternate" type="text/html" href="https://thechange.wiki/index.php?title=Code:sr28-collate.c&amp;diff=273&amp;oldid=prev"/>
		<updated>2022-08-27T03:31:54Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;This program converts USDA nutrition data into an easier table to run SQL queries on. * Input: &lt;a href=&quot;/index.php?title=File:sr28asc.zip&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;File:sr28asc.zip (page does not exist)&quot;&gt;File:sr28asc.zip&lt;/a&gt; (must be unzipped in a subfolder &amp;lt;code&amp;gt;data/sr28asc/&amp;lt;/code&amp;gt;) * Output: &lt;a href=&quot;/File:sr28-collated.csv&quot; title=&quot;File:sr28-collated.csv&quot;&gt;File:sr28-collated.csv&lt;/a&gt;  &amp;lt;syntaxhighlight lang=&amp;quot;c&amp;quot;&amp;gt; // sr28-collate.c // This program converts USDA nutrition data into an easier table to run SQL queries on. // Input: sr28asc.zip (USDA Standard Reference [SR Legacy] ascii version 28) // Output: sr28-collated.csv // Instructions:...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;This program converts USDA nutrition data into an easier table to run SQL queries on.&lt;br /&gt;
* Input: [[:File:sr28asc.zip|sr28asc.zip]] (must be unzipped in a subfolder &amp;lt;code&amp;gt;data/sr28asc/&amp;lt;/code&amp;gt;)&lt;br /&gt;
* Output: [[:File:sr28-collated.csv|sr28-collated.csv]]&lt;br /&gt;
&lt;br /&gt;
&amp;lt;syntaxhighlight lang=&amp;quot;c&amp;quot;&amp;gt;&lt;br /&gt;
// sr28-collate.c&lt;br /&gt;
// This program converts USDA nutrition data into an easier table to run SQL queries on.&lt;br /&gt;
// Input: sr28asc.zip (USDA Standard Reference [SR Legacy] ascii version 28)&lt;br /&gt;
// Output: sr28-collated.csv&lt;br /&gt;
// Instructions:&lt;br /&gt;
// - Unzip sr28asc.zip to subfolder: data/sr28asc/&lt;br /&gt;
// - Run this program: type into a Linux terminal:  gcc sr28-collate.c -O1 &amp;amp;&amp;amp; ./a.out&lt;br /&gt;
// - When it finishes, sr28-collated.csv will appear in the subfolder 'data'.&lt;br /&gt;
// Author: Elie Goldman Smith &amp;lt;elie@olam.wiki&amp;gt;&lt;br /&gt;
// License: Creative Commons License&lt;br /&gt;
&lt;br /&gt;
#include &amp;lt;stdio.h&amp;gt;&lt;br /&gt;
#include &amp;lt;stdlib.h&amp;gt;&lt;br /&gt;
#include &amp;lt;string.h&amp;gt;&lt;br /&gt;
&lt;br /&gt;
#define OUTPUT_FILENAME &amp;quot;data/sr28-collated.csv&amp;quot;&lt;br /&gt;
#define MAXNUTRIENTS  888 // all nutrient id's must be smaller than this&lt;br /&gt;
#define MAXFOODS    94444 // all food id's must be smaller than this&lt;br /&gt;
int nNutrients=0;&lt;br /&gt;
int nFoods=0;&lt;br /&gt;
char *food_lines[MAXFOODS] = {NULL};&lt;br /&gt;
char *nutrient_names[MAXNUTRIENTS] = {NULL};&lt;br /&gt;
char *nutrient_units[MAXNUTRIENTS] = {NULL};&lt;br /&gt;
char **food_nutrients[MAXFOODS] = {NULL};&lt;br /&gt;
FILE *_out;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
// Loads an entire file into memory (uses malloc)&lt;br /&gt;
char *read_all(const char *filename, int *size)&lt;br /&gt;
{&lt;br /&gt;
 char *data = NULL;&lt;br /&gt;
 FILE *f = fopen(filename, &amp;quot;r&amp;quot;);&lt;br /&gt;
 if (f) {&lt;br /&gt;
  fseek(f,0,SEEK_END);&lt;br /&gt;
  *size = ftell(f);&lt;br /&gt;
  fseek(f,0,SEEK_SET);&lt;br /&gt;
  data = malloc(*size+1);&lt;br /&gt;
  if (data) {&lt;br /&gt;
   printf(&amp;quot;Loading %s (%d bytes)\n&amp;quot;, filename, *size);&lt;br /&gt;
   if (!fread(data, *size, 1, f)) { printf(&amp;quot;^ failed\n&amp;quot;); *size=0; }&lt;br /&gt;
   data[*size] = '\0';&lt;br /&gt;
  }&lt;br /&gt;
  else printf(&amp;quot;Failed to allocate memory (%d bytes) for %s\n&amp;quot;, *size+1, filename);&lt;br /&gt;
  fclose(f);&lt;br /&gt;
 }&lt;br /&gt;
 else { perror(filename); *size=0; }&lt;br /&gt;
 return data;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
// Writes out the contents of a &amp;quot;cell&amp;quot; from an input file's data&lt;br /&gt;
//   input example: ~foo~&lt;br /&gt;
//   output example: foo&lt;br /&gt;
// param: p: pointer to the beginning of the cell&lt;br /&gt;
void output_cell(const char *p) {&lt;br /&gt;
 while (*p &amp;amp;&amp;amp; (*p=='~' || *p=='^' || *p=='\r' || *p=='\n')) p++;// skip special chars&lt;br /&gt;
 while (*p &amp;amp;&amp;amp;  *p!='~' &amp;amp;&amp;amp; *p!='^' &amp;amp;&amp;amp; *p!='\r' &amp;amp;&amp;amp; *p!='\n') {   // output string&lt;br /&gt;
  if (*p=='\&amp;quot;') fputs(&amp;quot;\&amp;quot;\&amp;quot;&amp;quot;,_out);                           // escape quotes&lt;br /&gt;
  else          fputc(*p,    _out);&lt;br /&gt;
  p++;&lt;br /&gt;
 }&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
// Writes out a line from an input file's data&lt;br /&gt;
//  input example:  ~foo~^~bar~^~baz~   (with newline '\n' at the end)&lt;br /&gt;
//  output example: &amp;quot;foo&amp;quot;,&amp;quot;bar&amp;quot;,&amp;quot;baz&amp;quot;   (no newline)&lt;br /&gt;
// param: p: pointer to the beginning of the line&lt;br /&gt;
// return value: number of cells written&lt;br /&gt;
int output_line(const char *p) {&lt;br /&gt;
 int n=0;&lt;br /&gt;
 while (*p &amp;amp;&amp;amp; *p != '\r' &amp;amp;&amp;amp; *p != '\n') {&lt;br /&gt;
  if(n==0)n=1;&lt;br /&gt;
  if     (*p=='^'){fputc(',',   _out);n++;}&lt;br /&gt;
  else if(*p=='~') fputc('\&amp;quot;',  _out);&lt;br /&gt;
  else if(*p=='\&amp;quot;')fputs(&amp;quot;\&amp;quot;\&amp;quot;&amp;quot;,_out);&lt;br /&gt;
  else             fputc(*p,    _out);&lt;br /&gt;
  p++;&lt;br /&gt;
 }&lt;br /&gt;
 return n;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
int main() {&lt;br /&gt;
 // load files&lt;br /&gt;
 int size1, size2, size3;&lt;br /&gt;
 char *food_des = read_all(&amp;quot;data/sr28asc/FOOD_DES.txt&amp;quot;, &amp;amp;size1);&lt;br /&gt;
 char *nutr_def = read_all(&amp;quot;data/sr28asc/NUTR_DEF.txt&amp;quot;, &amp;amp;size2);&lt;br /&gt;
 char *nut_data = read_all(&amp;quot;data/sr28asc/NUT_DATA.txt&amp;quot;, &amp;amp;size3);&lt;br /&gt;
 if (nutr_def &amp;amp;&amp;amp; food_des &amp;amp;&amp;amp; nut_data) {&lt;br /&gt;
  printf(&amp;quot;Parsing input files...\n&amp;quot;); // XXX: question for parser code below: is atoi() guaranteed to stop reading the string after the first non-numeric char (other than a minus sign at the start)? if not, then in theory, then some implementations might try to go all the way to the end of the long ass string every time it's called. this would make the program run extremely slow.&lt;br /&gt;
  int alloc_ok = 1;&lt;br /&gt;
&lt;br /&gt;
  // Food descriptions&lt;br /&gt;
  char *end = food_des+size1;&lt;br /&gt;
  for (char *p=food_des; p&amp;lt;end; p++) {&lt;br /&gt;
   char *line = p;&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp;(*p &amp;lt; '0' || *p &amp;gt; '9')) p++;// skip to next numeric char&lt;br /&gt;
   int fid = atoi(p);                         // get food id&lt;br /&gt;
   if (fid &amp;gt;= 0 &amp;amp;&amp;amp; fid &amp;lt; MAXFOODS) {&lt;br /&gt;
    nFoods++;&lt;br /&gt;
    food_lines[fid] = line;                   // save pointer to line of text&lt;br /&gt;
    if (alloc_ok) {                           // allocate memory for food nutrient amounts&lt;br /&gt;
     food_nutrients[fid] = calloc(MAXNUTRIENTS,sizeof(char*));&lt;br /&gt;
     if (!food_nutrients[fid]) {&lt;br /&gt;
      printf(&amp;quot;Not enough memory. Some foods will be missing.\n&amp;quot;);&lt;br /&gt;
      alloc_ok = 0;&lt;br /&gt;
     }&lt;br /&gt;
    }&lt;br /&gt;
   }&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp; *p != '\n') p++;           // skip to next line&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
  // Nutrient definitions&lt;br /&gt;
  end = nutr_def+size2;&lt;br /&gt;
  for (char *p=nutr_def; p&amp;lt;end; p++) {&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp;(*p &amp;lt; '0' || *p &amp;gt; '9')) p++;// skip to next numeric char&lt;br /&gt;
   int nid = atoi(p);                         // get nutrient id&lt;br /&gt;
   if (nid &amp;gt;= 0 &amp;amp;&amp;amp; nid &amp;lt; MAXNUTRIENTS) {&lt;br /&gt;
    while(p&amp;lt;end &amp;amp;&amp;amp; *p != '^') p++;            // skip to next carat&lt;br /&gt;
    if   (p&amp;lt;end) nutrient_units[nid] = ++p;   // get nutrient unitname pointer&lt;br /&gt;
    while(p&amp;lt;end &amp;amp;&amp;amp; *p != '^') p++;            // skip to next carat&lt;br /&gt;
    if   (p&amp;lt;end) p++;                         // next char&lt;br /&gt;
    while(p&amp;lt;end &amp;amp;&amp;amp; *p != '^') p++;            // skip to next carat&lt;br /&gt;
    if   (p&amp;lt;end) nutrient_names[nid] = ++p;   // get nutrient name pointer&lt;br /&gt;
    nNutrients++;&lt;br /&gt;
   }&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp; *p != '\n') p++;           // skip to next line&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
  // Nutrition data of foods&lt;br /&gt;
  printf(&amp;quot;%d nutrients, %d foods\n&amp;quot;, nNutrients, nFoods);&lt;br /&gt;
  end = nut_data+size3;&lt;br /&gt;
  for (char *p=nut_data; p&amp;lt;end; p++) {&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp;(*p &amp;lt; '0' || *p &amp;gt; '9')) p++;// skip to next numeric char&lt;br /&gt;
   int   fid = atoi(p);                       // get food id&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp; *p != '^') p++;            // skip to next carat&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp;(*p &amp;lt; '0' || *p &amp;gt; '9')) p++;// skip to next numeric char&lt;br /&gt;
   int   nid = atoi(p);                       // get nutrient id&lt;br /&gt;
   if (fid &amp;gt;= 0 &amp;amp;&amp;amp; fid &amp;lt; MAXFOODS &amp;amp;&amp;amp; nid &amp;gt;= 0 &amp;amp;&amp;amp; nid &amp;lt; MAXNUTRIENTS) {&lt;br /&gt;
    while(p&amp;lt;end &amp;amp;&amp;amp; *p != '^') p++;            // skip to next carat&lt;br /&gt;
    while(p&amp;lt;end &amp;amp;&amp;amp;(*p &amp;lt; '0' || *p &amp;gt; '9') &amp;amp;&amp;amp; *p != '.') p++; // skip to next numeric char&lt;br /&gt;
    food_nutrients[fid][nid] = p;             // get pointer to 'amount' of nutrient in food&lt;br /&gt;
   }&lt;br /&gt;
   while (p&amp;lt;end &amp;amp;&amp;amp; *p != '\n') p++;           // skip to next line&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
  // Output&lt;br /&gt;
  _out = fopen(OUTPUT_FILENAME, &amp;quot;w&amp;quot;);&lt;br /&gt;
  if (_out) {&lt;br /&gt;
   printf(&amp;quot;Writing to %s...\n&amp;quot;, OUTPUT_FILENAME);&lt;br /&gt;
   // header&lt;br /&gt;
   fputs(&amp;quot;\&amp;quot;NDB_No\&amp;quot;,\&amp;quot;FdGrp_Cd\&amp;quot;,\&amp;quot;Long_Desc\&amp;quot;,\&amp;quot;Shrt_Desc\&amp;quot;,\&amp;quot;ComName\&amp;quot;,\&amp;quot;ManufacName\&amp;quot;,\&amp;quot;Survey\&amp;quot;,\&amp;quot;Ref_desc\&amp;quot;,\&amp;quot;Refuse\&amp;quot;,\&amp;quot;SciName\&amp;quot;,\&amp;quot;N_Factor\&amp;quot;,\&amp;quot;Pro_Factor\&amp;quot;,\&amp;quot;Fat_Factor\&amp;quot;,\&amp;quot;CHO_Factor\&amp;quot;&amp;quot;,_out);&lt;br /&gt;
   for (int i=0; i&amp;lt;MAXNUTRIENTS; i++) if (nutrient_names[i]) {&lt;br /&gt;
    fputs(&amp;quot;,\&amp;quot;&amp;quot;, _out);&lt;br /&gt;
    output_cell(nutrient_names[i]);&lt;br /&gt;
    fputs(&amp;quot; (&amp;quot;,  _out);&lt;br /&gt;
    output_cell(nutrient_units[i]);&lt;br /&gt;
    fputs(&amp;quot;)\&amp;quot;&amp;quot;, _out);&lt;br /&gt;
   }&lt;br /&gt;
   fputc('\n',   _out);&lt;br /&gt;
   // data&lt;br /&gt;
   int nLines=0;&lt;br /&gt;
   for (int i=0; i&amp;lt;MAXFOODS; i++) if (food_lines[i]) {&lt;br /&gt;
    int n = output_line(food_lines[i]);&lt;br /&gt;
    if (n != 14) printf(&amp;quot;WTF? Food should have 14 fields, not %d.\n&amp;quot;, n); // sanity check&lt;br /&gt;
    for (int j=0; j&amp;lt;MAXNUTRIENTS; j++) if (nutrient_names[j]) {&lt;br /&gt;
     fputs(&amp;quot;,\&amp;quot;&amp;quot;,_out);&lt;br /&gt;
     if (food_nutrients[i][j]) output_cell(food_nutrients[i][j]);&lt;br /&gt;
     fputc('\&amp;quot;', _out);&lt;br /&gt;
    }&lt;br /&gt;
    fputc('\n',  _out);&lt;br /&gt;
    if (++nLines % 100 == 0) printf(&amp;quot;Wrote %d foods\n&amp;quot;, nLines);&lt;br /&gt;
   }&lt;br /&gt;
   printf(&amp;quot;Wrote %d foods\n&amp;quot;, nLines);&lt;br /&gt;
   if (nLines==nFoods) printf(&amp;quot;Done.\n&amp;quot;);&lt;br /&gt;
   fclose(_out);&lt;br /&gt;
  }&lt;br /&gt;
  else perror(&amp;quot;Cannot write to &amp;quot;OUTPUT_FILENAME);&lt;br /&gt;
 }&lt;br /&gt;
 free(food_des);&lt;br /&gt;
 free(nutr_def);&lt;br /&gt;
 free(nut_data);&lt;br /&gt;
 for (int i=0; i&amp;lt;MAXNUTRIENTS; i++) if (food_nutrients[i]) free(food_nutrients[i]);&lt;br /&gt;
 return 0;&lt;br /&gt;
}&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;/div&gt;</summary>
		<author><name>Elie</name></author>
	</entry>
</feed>