-- This is used in the methodology of various olam.wiki pages
-- Copyright Elie Goldman Smith, 2022 (Creative Commons license)
-- Run this in SQLite3
----------------------
-- Load data from various sources
.mode csv
-- cross-reference: crops (FAO data) with foods (USDA nutrition data):
.import fao_usda.csv fao_usda
-- nutrition (USDA):
.import data/sr28-collated.csv usda
-- crop production (FAO):
.import data/primary/fao/crops-primary.csv fao
-- country codes (FAO):
.import data/primary/fao/FAOSTAT-country-codes.csv region
-- population of regions (World Bank):
.import data/primary/API_SP.POP.TOTL_DS2_en_csv_v2_4150863.csv pop
-- global hunger index (.com)
.import data/global-hunger-index-2021.csv ghi
.mode list
-- Fix some of the region codes, to match the ISO3 standard
UPDATE pop SET `Country Code`="X01" WHERE `Country Code`="WLD";
-- Replace some empty strings with NULLs, so aggregate functions work
UPDATE fao SET `Value`=NULL WHERE `Value`='';
UPDATE pop SET `2021`=NULL WHERE `2021`='';
UPDATE pop SET `2020`=NULL WHERE `2020`='';
UPDATE pop SET `2019`=NULL WHERE `2019`='';
UPDATE pop SET `2018`=NULL WHERE `2018`='';
UPDATE pop SET `2017`=NULL WHERE `2017`='';
UPDATE pop SET `2016`=NULL WHERE `2016`='';
UPDATE pop SET `2015`=NULL WHERE `2015`='';
UPDATE pop SET `2014`=NULL WHERE `2014`='';
UPDATE pop SET `2013`=NULL WHERE `2013`='';
UPDATE pop SET `2012`=NULL WHERE `2012`='';
UPDATE pop SET `2011`=NULL WHERE `2011`='';
-- Generate a secondary table for easier querying.
-- It's big, and it intentionally contains some redundancy.
CREATE TABLE main AS
SELECT
region_name, region_m49, region_iso3, region_population, region_ghi,
crop_name, food_name, food_code,
production_food_grams_per_day,
production_food_grams_per_day * fat_fraction_of_food AS production_fat_grams_per_day,
production_food_grams_per_day * protein_fraction_of_food AS production_protein_grams_per_day,
production_food_grams_per_day * kalories_per_gram_of_food AS production_kalories_per_day,
yield_food_grams_per_day_per_acre,
yield_food_grams_per_day_per_acre * fat_fraction_of_food AS yield_fat_grams_per_day_per_acre,
yield_food_grams_per_day_per_acre * protein_fraction_of_food AS yield_protein_grams_per_day_per_acre,
yield_food_grams_per_day_per_acre * kalories_per_gram_of_food AS yield_kalories_per_day_per_acre,
farmland_acres
FROM (
SELECT
region.`M49 Code` AS region_m49,
p.`Area Code (ISO3)`AS region_iso3, -- Technically this is a "bare column", but it ALWAYS correlates with `Area`, so it's all good.
p.`Area` AS region_name,
p.`Item` AS crop_name,
usda.`Long_desc` AS food_name,
usda.`NDB_No` AS food_code,
-- population from most recent year available:
COALESCE(pop.`2021`,pop.`2020`,pop.`2019`,pop.`2018`,pop.`2017`,pop.`2016`,pop.`2015`,pop.`2014`,pop.`2013`,pop.`2012`,pop.`2011`)
AS region_population,
-- global hunger index from most recent year available:
ghi.`2021` AS region_ghi,
-- crop production...
-- in tonnes/year:
p.val
-- edible fraction of crop:
* fao_usda.`Multiplier` * 0.01*(100 - usda.`Refuse`)
-- conversion factor: "tonnes/year" to "grams/day":
* 2737.9093
-- result:
AS production_food_grams_per_day,
-- crop yields...
-- in hectograms/year/hectare:
y.val
-- edible fraction of crop:
* fao_usda.`Multiplier` * 0.01*(100 - usda.`Refuse`)
-- conversion factor: "hectograms/year/hectare" to "grams/day/acre":
* 0.11079926
-- result:
AS yield_food_grams_per_day_per_acre,
-- crop land...
-- in hectares:
a.val
-- conversion factor: "hectares" to "acres":
* 2.4710538
-- result:
AS farmland_acres,
-- protein...
-- grams per 100g food:
usda.`Protein (g)`
-- conversion factor:
* 0.01
-- result:
AS protein_fraction_of_food,
-- fat...
-- grams per 100g food:
usda.`Total lipid (fat) (g)`
-- conversion factor:
* 0.01
-- result:
AS fat_fraction_of_food,
-- kalories...
-- per 100g food:
usda.`Energy (kcal)`
-- conversion factor:
* 0.01
-- result:
AS kalories_per_gram_of_food
FROM usda
JOIN fao_usda USING(`NDB_No`)
JOIN (SELECT *, AVG(`Value`) AS val FROM fao WHERE `Element`="Production" GROUP BY `Area`,`Item`) p USING(`Item Code (FAO)`)
JOIN (SELECT *, AVG(`Value`) AS val FROM fao WHERE `Element`="Yield" GROUP BY `Area`,`Item`) y USING(`Area`,`Item`)
JOIN (SELECT *, AVG(`Value`) AS val FROM fao WHERE `Element`="Area harvested" GROUP BY `Area`,`Item`) a USING(`Area`,`Item`)
LEFT JOIN region ON p.`Area Code (ISO3)` = region.`ISO3 Code`
LEFT JOIN pop ON p.`Area Code (ISO3)` = pop.`Country Code`
LEFT JOIN ghi ON p.`Area` = ghi.`Country`
);
--- end of mandatory intro code to run ---
------------------------------------------
-- Test out the fao_usda cross-reference table --
SELECT CHAR(10)||
"Crop name: "||`Item`||CHAR(10)||
"Food name: "||`Long_Desc`||CHAR(10)||
"Harvest (as defined) is "||((100-`Refuse`)*`Multiplier`)||"% food."||CHAR(10)
FROM fao
JOIN fao_usda USING(`Item Code (FAO)`)
JOIN usda USING(`NDB_No`)
WHERE `Area`="World" AND `Element`="Production" AND `Year`="2018";
-- Test out the fao_usda cross-reference table --
SELECT CHAR(10)||
'FAO "'||`Item`||'"'||CHAR(10)||
' -' ||(100-100*`Multiplier`)||'% ='||CHAR(10)||
'USDA "'||`Long_Desc`||'"'||CHAR(10)||
' -' ||`Refuse`||'% "'||`Ref_desc`||'" ='||CHAR(10)||
'Edible portion.'||CHAR(10)
FROM fao
JOIN fao_usda USING(`Item Code (FAO)`)
JOIN usda USING(`NDB_No`)
WHERE `Area`="World" AND `Element`="Production" AND `Year`="2018";
-- Test that "World" is the only aggregate --
-- All the other rows should add up to "World"
-- Numbers don't have to match perfectly but should be pretty close
SELECT SUM(production_food_grams_per_day) FROM main WHERE region_name = "World";
SELECT SUM(production_food_grams_per_day) FROM main WHERE region_name <> "World";
-- Global primary food production, per capita, to be shown in nutrition calculator --
SELECT production_food_grams_per_day / 7.9e9, food_name
FROM main WHERE region_name="World"
ORDER BY production_food_grams_per_day DESC;
-- Overview, by country --
SELECT
CHAR(10)||
region_name||CHAR(10)||
PRINTF(" %d kalories/day/person (primary)"||CHAR(10)||
" %d grams protein/day/person (primary)"||CHAR(10)||
" farmland: %.2f acres/person"||CHAR(10),
SUM(production_kalories_per_day) / region_population,
SUM(production_protein_grams_per_day) / region_population,
SUM(farmland_acres) / region_population)||
CASE WHEN region_ghi IS NOT NULL AND region_ghi <> ''
THEN ">hunger index: "||region_ghi||CHAR(10)
ELSE "" END
FROM main GROUP BY region_name HAVING region_population;
-- How much of wheat comes from Ukraine and Russia
SELECT production_food_grams_per_day / 7.9e9 -- per capita, world population
FROM main WHERE crop_name="Wheat" AND region_name="World";
SELECT production_food_grams_per_day / 7.9e9 -- per capita, world population
FROM main WHERE crop_name="Wheat" AND region_name="Ukraine";
SELECT production_food_grams_per_day / 7.9e9 -- per capita, world population
FROM main WHERE crop_name="Wheat" AND region_name="Russian Federation";
----- olam.wiki/Crop_choices -----
--==Best yielding crops for each region==
-- Generates a wiki table (but you still have to add the {| and |} manually).
-- Ranked from best (left) to worst (right).
-- Ranking system: 2000 kalories has the same "value" as 60 grams of protein.
-- .once -e
SELECT
"|'''"||region_name||"'''"||CHAR(10)||
"|.<br />.<br /> kalories/day/acre <br /> g protein/day/acre "||CHAR(10)||
PRINTF("|''Status quo<br />all crops averaged''<br />%d<br />%d",
SUM(production_kalories_per_day )/SUM(farmland_acres),
SUM(production_protein_grams_per_day)/SUM(farmland_acres)
) ||CHAR(10)||
GROUP_CONCAT(
PRINTF("|'''%s'''<br />%d<br />%d",
crop_name, yield_kalories_per_day_per_acre, yield_protein_grams_per_day_per_acre
), CHAR(10)
) ||CHAR(10)||'|-'
FROM (
SELECT * FROM main
ORDER BY yield_kalories_per_day_per_acre/2000 + yield_protein_grams_per_day_per_acre/60 DESC
)
GROUP BY region_name;
-- Scenario: Every country grows mostly its best yielding crops --
-- Farm land is reallocated in a way that favors high-yielding crops.
-- Each crop's land use will be proportional to the square of its yield (as scored above).
-- Total farmland remains the same.
CREATE TEMPORARY TABLE scored AS
SELECT *, yield_kalories_per_day_per_acre/2000 + yield_protein_grams_per_day_per_acre/60 AS score
FROM main
WHERE region_name <> "World" -- "World" is the only aggregated region in the dataset; leave it out
AND crop_name <> "Mushrooms and truffles"; -- Mushrooms have exceptionally high yields, but this is because they grow without sun - they get their energy from the [[biomass waste]] of other food crops, generally. If you don't omit mushrooms, they end up being like 80% of the whole food supply. But this is not [[Term:viable|]] because there wouldn't be much biomass waste from other crops to grow the mushrooms in. And besides, who would want a diet of mostly mushrooms? Anyway, so mushrooms are removed from here, but are added back below, with the same production/yield/areaharvested as the status quo.
CREATE TEMPORARY TABLE scenario1 AS
SELECT *, score*score*ratio AS new_acres
FROM scored JOIN (
SELECT region_name, SUM(farmland_acres)/SUM(score*score) AS ratio
FROM scored GROUP BY region_name
) USING(region_name);
INSERT INTO scenario1 SELECT *, 0 AS score, 0 AS ratio, farmland_acres AS new_acres FROM main WHERE crop_name = "Mushrooms and truffles" AND region_name <> "World"; -- see above for explanation
DROP TABLE scored;
-- Text report, by country
SELECT PRINTF(
"%s"||CHAR(10)||
" farmland: %.2f acres per person"||CHAR(10)||
" crops per day per person:"||CHAR(10)||
" Status quo : %d kalories, %d grams of protein"||CHAR(10)||
" New scenario: %d kalories, %d grams of protein"||CHAR(10)||
" Top crop: %s"||CHAR(10),
region_name,
SUM(farmland_acres) /region_population,
SUM(production_kalories_per_day) /region_population,
SUM(production_protein_grams_per_day) /region_population,
SUM(new_acres*yield_kalories_per_day_per_acre) /region_population,
SUM(new_acres*yield_protein_grams_per_day_per_acre)/region_population
+ MAX(score)*0, -- tells sqlite to use the name of the top-scoring crop
crop_name)
FROM scenario1
GROUP BY region_name;
-- Global summary
SELECT PRINTF(
"World"||CHAR(10)||
" farmland: %.2f acres per person"||CHAR(10)||
" crops per day per person:"||CHAR(10)||
" Status quo : %d kalories, %d grams of protein, %d grams of fat"||CHAR(10)||
" New scenario: %d kalories, %d grams of protein, %d grams of fat"||CHAR(10),
SUM(farmland_acres) /7.9e9,
SUM(production_kalories_per_day) /7.9e9,
SUM(production_protein_grams_per_day) /7.9e9,-- divide by
SUM(production_fat_grams_per_day) /7.9e9,-- world population
SUM(new_acres*yield_kalories_per_day_per_acre) /7.9e9,
SUM(new_acres*yield_protein_grams_per_day_per_acre)/7.9e9,
SUM(new_acres*yield_fat_grams_per_day_per_acre) /7.9e9)
FROM scenario1;
-- Data for generating maps
-- food-production-primary1-if-crop-choices.png
SELECT region_m49 ||' '|| (SUM(new_acres*yield_kalories_per_day_per_acre)/SUM(farmland_acres))
FROM scenario1
GROUP BY region_m49
HAVING region_m49; -- use the results as input for: ./number-pairs-to-array-file crop-yields-scenario1
-- Also make a similar map of status quo, for comparison
-- food-production-primary1.png
SELECT region_m49 ||' '|| (SUM(production_kalories_per_day)/SUM(farmland_acres))
FROM scenario1
GROUP BY region_m49
HAVING region_m49; -- use the results as input for: ./number-pairs-to-array-file crop-yields
-- Data for nutrition calculator
SELECT SUM(new_acres*yield_food_grams_per_day_per_acre)/7.9e9 AS f, food_name
FROM scenario1
GROUP BY food_name
ORDER BY f DESC; -- use the results as input for: make-nutrition0.9-url-from-sql-results.html
-- Done
DROP TABLE scenario1;
-- olam.wiki/Deforestation --
-- Crop land trends --
SELECT PRINTF("%s:"||CHAR(9)||"%s%.1f%%/year",
`Area`,
CASE WHEN y2020 > y2018 THEN "+" ELSE "" END,
100.0 * (y2020 - y2018) / (2020 - 2018) / y2018)
FROM (SELECT SUM(`Value`) AS y2018, `Area` FROM fao WHERE `Element`="Area harvested" AND `Year`="2018" GROUP BY `Area`)
JOIN (SELECT SUM(`Value`) AS y2020, `Area` FROM fao WHERE `Element`="Area harvested" AND `Year`="2020" GROUP BY `Area`) USING (`Area`);
-- olam.wiki/Clothing --
-- General overview of fibre crops --
-- global production, in grams per day per capita
-- Note: some of these aren't fibre crops, it's any crops that aren't in the fao_usda cross-reference table
SELECT AVG(`Value`) -- tonnes/year
* 2737.9093 -- convert to grams/day
/ 7.9e9 -- per capita (divide by world population)
AS g,
`Item`
FROM fao WHERE `Area`="World" AND `Element`="Production" AND (
`Item Code (FAO)` NOT IN (SELECT `Item Code (FAO)` FROM fao_usda)
OR `Item`="Seed cotton")
GROUP BY `Item`
ORDER BY g;
-- keep in mind that an average t-shirt weighs 142 grams.
-- olam.wiki/calc - Data points generated for the calculator --
-- cotton.production, tonnes/year (to be fair, a more accurate number might be found in crops-processed.csv)
SELECT AVG(`Value`*(1-`Multiplier`))
FROM fao JOIN fao_usda USING (`Item Code (FAO)`)
WHERE `Area`="World" AND `Item`="Seed cotton" AND `Element`="Production";
-- cotton.farmland, hectares
SELECT AVG(`Value`) FROM fao
WHERE `Area`="World" AND `Item`="Seed cotton" AND `Element`="Area harvested";