From eefbd8be24155f5707b0f296f6e86f2c9126b4c6 Mon Sep 17 00:00:00 2001
From: jaccard <jaccard@pik-potsdam.de>
Date: Thu, 17 Dec 2020 10:42:04 -0800
Subject: [PATCH] edit full code

---
 analysis/preprocessing/full_code.Rmd | 168 ++++++++++++---------------
 1 file changed, 76 insertions(+), 92 deletions(-)

diff --git a/analysis/preprocessing/full_code.Rmd b/analysis/preprocessing/full_code.Rmd
index 866aa9a..86f41de 100644
--- a/analysis/preprocessing/full_code.Rmd
+++ b/analysis/preprocessing/full_code.Rmd
@@ -7481,6 +7481,8 @@ write_rds(results_formatted_with_direct_FD_fp, paste0(data_dir_income_stratified
 
 # European expenditure deciles
 
+- need to download EUROSTAT households and Norway households
+
 ```{r european-expenditure-deciles, eval = FALSE}
 
 # set target number of quantiles
@@ -7488,7 +7490,7 @@ target_eu_ntiles = 10
 
 ##### main paper results (main paper method, EXIOBASE industry-by-industry version)
 
-# 1) load income-stratified-footprints formatted results file
+# 1) load income-stratified-footprints formatted results file from previous code chunk
 dat_results_raw = read_rds(here("analysis", "preprocessing", "income-stratified-footprints",
                                 "results_formatted_method1_ixi.rds")) %>%
   ungroup() %>%
@@ -7498,7 +7500,7 @@ dat_results_raw = read_rds(here("analysis", "preprocessing", "income-stratified-
 # get iso3 country codes to join with household data
 country_codes = ISOcodes::ISO_3166_1 %>%
   select(iso2 = Alpha_2, iso3 = Alpha_3) %>%
-  # resolve inconsistency between Eurostat and ISO for Greece and UK/Great Britain
+  # resolve inconsistency between EUROSTAT and ISO for Greece and UK/Great Britain
   mutate(iso2 = if_else(iso2=="GR", "EL", iso2)) %>%
   mutate(iso2 = if_else(iso2=="GB", "UK", iso2))
 
@@ -7618,7 +7620,7 @@ df_expenditure_long = read_csv(here("analysis",
   select(-mean_expenditure) %>% 
   ungroup()
 
-## Calculate adult equivalents per household
+## calculate adult equivalents per household
 df_adult_e_p_hh = df_expenditure_long %>%
   rename(iso2 = geo) %>%
   pivot_wider(id_cols = c(iso2, year, quintile, imputed), 
@@ -7630,7 +7632,6 @@ df_adult_e_p_hh = df_expenditure_long %>%
   mutate(iso3 = if_else(iso2 == "XK", "XKX", iso3),
          quint = parse_number(quintile))
 
-
 ## add quintile population data
 mrio_results_with_adult_eq_all = dat_results_raw %>%
   filter(year %in% c(2005, 2010, 2015)) %>%
@@ -7644,8 +7645,7 @@ mrio_results_with_adult_eq_all = dat_results_raw %>%
   mutate(ae_quintile = hh_quintile * adult_e_p_hh) %>%
   select(-c(hh_quintile, adult_e_p_hh))
 
-
-## for the european expenditure deciles we use only countries with data in 2005, 2010 and 2015. This excludes Luxembourg and Italy. 
+## for the European expenditure deciles we use only countries with data in 2005, 2010 and 2015. This excludes Luxembourg and Italy. 
 complete_countries = mrio_results_with_adult_eq_all %>%
   group_by(year, iso2) %>%
   summarise(co2_kg = sum(co2_kg)) %>%
@@ -7661,8 +7661,8 @@ df_adult_e_p_hh %>%
   filter(iso2 %in% complete_countries, year<=2015, year>=2005) %>%
   mutate(quint = parse_number(quintile)) 
 
-# calculate EU expenditure tiles based on loaded mrio result file and adult equivalents.
-# returns country quintiles mapped to EU ntile rank and EU ntile boundaries
+# calculate European expenditure deciles based on loaded income-stratified-footprints result file and adult equivalents.
+# returns country quintiles mapped to European ntile rank and European ntile boundaries
 # helper function called by function below
 calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   
@@ -7673,14 +7673,14 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
     mutate(idx = 1:n(),
            eu_q_rank = 0) # later to be filled with euro quintile rank
   
-  # total EU adult equivalents (of included countries) in year
+  # total European adult equivalents (of included countries) in year
   total_ae_in_year = sum(country_data_annual_sorted$ae_quintile)
   
   # quantile target ae population
   eu_decile_adult_eq = total_ae_in_year/pquantile_count
   
-  # country quinitles must be split to allocate ae population accorting to eu quantile target ae population
-  # filtering by condition that cant be fulfilled is a lazy way to create an empty dataframe
+  # country quintles must be split to allocate ae population according to eu quantile target ae population
+  # filtering by condition that can't be fulfilled is a lazy way to create an empty dataframe
   # of the same structure as country_data_annual_sorted
   additional_rows = country_data_annual_sorted %>%
     filter(year==1)
@@ -7688,9 +7688,8 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   # store quantile split values
   eu_quantile_boundaries = data.frame(euro_q_rank = 1:pquantile_count, p = 0)
   
-  ## can't think of a non-loop way to do this, sorry
-  ## loops through the ordered dataset, assignes euro quantile rank
-  ## and splits quintiles where necessary
+  # loops through the ordered dataset, assigns European quantile rank
+  # and splits quintiles where necessary
   eu_ae_current = 0
   euro_q_rank_current = 1
   for (row_idx in 1:nrow(country_data_annual_sorted)) {
@@ -7700,17 +7699,17 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
       country_data_annual_sorted[row_idx, "eu_q_rank"] = euro_q_rank_current
     } else {
       ae_diff = eu_decile_adult_eq - eu_ae_current
-      ## write rest of this eu decile (split country quintile)
+      ## write rest of this European decile (split country quintile)
       new_row = country_data_annual_sorted[row_idx, ]
       new_row[1, "eu_q_rank"] = euro_q_rank_current
       new_row[1, "ae_quintile"] = ae_diff
-      ## record eu quantile boundary
+      ## record European quantile boundary
       eu_quantile_boundaries[eu_quantile_boundaries$euro_q_rank==euro_q_rank_current, "p"] =
         country_data_annual_sorted[row_idx, "fd_pae_e"]
       ## put first part of population in overflow dataframe
       additional_rows = additional_rows %>%
         bind_rows(new_row)
-      ## classify rest of country quinitle population to next euro quantile
+      ## classify rest of country quintile population to next European quantile
       country_data_annual_sorted[row_idx, "ae_quintile"] = 
         country_data_annual_sorted[row_idx, "ae_quintile"] - (ae_diff+0.0001)
       euro_q_rank_current = euro_q_rank_current + 1
@@ -7725,21 +7724,17 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   arrange(fd_pae_e, eu_q_rank) %>%
   mutate(idx = 1:n())
 
-  #ad zeroth and nth quantile (min and max)
+  # add zeroth and nth quantile (min and max)
   eu_quantile_boundaries[pquantile_count, "p"] = max(country_data_eu_quantiles$fd_pae_e)
-  #tmp = data.frame(euro_q_rank = 0, p = min(country_data_eu_quantiles$fd_pae_e)) %>%
-  #  bind_rows(eu_quantile_boundaries) 
-  
   
   list("df_q_data" = country_data_eu_quantiles, "df_q_boundaries" = eu_quantile_boundaries)
 }
 
-# maps MRIO results to EU ntile ranks, returns mapping and ntile EU boundaries
+# maps income-stratified-footprint results to European ntile ranks, returns mapping and ntile European boundaries
 map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
   
   df_eu_ntiles = calculate_eu_ntiles(pyear, pquantile_count = ptarget_ntiles)
   df_eu_ntiles_data = df_eu_ntiles$df_q_data
-  #df_eu_ntiles_p = df_eu_ntiles$df_q_boundaries
   
   sector_mapping = mrio_results_with_adult_eq %>%
     group_by(sector_id) %>%#
@@ -7763,7 +7758,7 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
            energy_use_europe_TJ,
            ae_quintile) %>%
     filter(year==pyear) %>%
-    # calc per adult aequivalent values in quintiles
+    # calculate per adult equivalent values in quintiles
     mutate(fd_pae_e = fd_me*1000000/ae_quintile,
            co2_pae_kg = co2_kg/ae_quintile,
            co2_pae_dom_kg = co2_domestic_kg/ae_quintile,
@@ -7789,7 +7784,7 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
     full_join(df_eu_ntiles_data %>%
                 rename(fd_pae_e_quint_tmp = fd_pae_e), by=c("iso2", "quint")) %>%
     rename(adult_eq = ae_quintile) %>% # country quintile and their split fraction population
-    # recalc totals
+    # recalculate totals
     mutate(fd_me = fd_pae_e*adult_eq/1000000,
            co2_kg = co2_pae_kg*adult_eq,
            co2_dom_kg = co2_pae_dom_kg*adult_eq,
@@ -7807,24 +7802,22 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
   
 }
 
-
-### Filter only countries with complete info for years 2005, 2010, 2015
+# filter only countries with complete info for years 2005, 2010, 2015
 mrio_results_with_adult_eq = mrio_results_with_adult_eq_all %>%
   filter(iso2 %in% complete_countries)
 
-## summarize final demand per adult equvalent per quintile across all sectors as basis for eurodeciles for complete countries
+# summarize final demand per adult equivalent per quintile across all sectors as basis for European deciles for complete countries
 summary_country_fd = mrio_results_with_adult_eq %>%
   group_by(iso2, year, quint) %>%
   summarise(ae_quintile = first(ae_quintile),
             fd_pae_e = sum(fd_me*1000000)/(ae_quintile))
 
-## summarize final demand per adult equvalent per quintile across all sectors as basis for eurodeciles for all countries
+# summarize final demand per adult equivalent per quintile across all sectors as basis for European deciles for all countries
 summary_country_fd_all = mrio_results_with_adult_eq_all %>%
   group_by(iso2, year, quint) %>%
   summarise(ae_quintile = first(ae_quintile),
             fd_pae_e = sum(fd_me*1000000)/(ae_quintile))
 
-
 df_mapped_result_2005 = map_mrio_results_to_eu_ntiles(2005, target_eu_ntiles)
 df_mapped_result_2005_data = df_mapped_result_2005$df_mapped_data
 df_mapped_result_2005_ntiles = df_mapped_result_2005$df_ntile_boundaries
@@ -7844,19 +7837,20 @@ df_mapped_result_data = df_mapped_result_2005_data %>%
 write_csv(df_mapped_result_data, 
           here(paste0("analysis/data/derived/mrio_results_eu_ntile_mapped_n_", target_eu_ntiles, ".csv")))
 
-###### using EXIOBASE product-by-product version
 
-# 1) load MRIO result file
+###### SI results, main paper method, EXIOBASE product-by-product version
+
+# 1) load income-stratified-footprints formatted results file
 dat_results_raw = read_rds(here("analysis", "preprocessing", "income-stratified-footprints",
                             "results_formatted_method1_pxp.rds")) %>%
   ungroup() %>%
   mutate(year= strtoi(year)) %>%
   rename(iso2 = geo)
 
-# get iso3 country codes to join with hh data
+# get iso3 country codes to join with household data
 country_codes = ISOcodes::ISO_3166_1 %>%
   select(iso2 = Alpha_2, iso3 = Alpha_3) %>%
-  # resolve inconsistency between Eurostat and ISO for Greece and UK/Great Britain
+  # resolve inconsistency between EUROSTAT and ISO for Greece and UK/Great Britain
   mutate(iso2 = if_else(iso2=="GR", "EL", iso2)) %>%
   mutate(iso2 = if_else(iso2=="GB", "UK", iso2))
 
@@ -7949,7 +7943,7 @@ write_csv(total_private_households, here("/analysis/preprocessing/income-stratif
 ## return to 2 digits per value
 options(digits=2)
 
-# 2) load Eurostat household data
+# 2) load merged total private households data 
 hh_data = read_csv(here("analysis", "preprocessing", "income-stratified-footprints",
                         "total_private_households.csv")) %>%
   mutate(imputed = if_else(is.na(total_private_households), TRUE, FALSE)) %>%
@@ -7960,7 +7954,7 @@ hh_data = read_csv(here("analysis", "preprocessing", "income-stratified-footprin
   left_join(country_codes, by="iso2") %>%
   select(-total_private_households)
 
-#3) Eurostat mean expenditures per household income quintile per household and per adult equivalent
+#3) load EUROSTAT mean expenditures per household income quintile per household and per adult equivalent (written and saved in the previous, income-stratified-footprints code chunk)
 df_expenditure_long = read_csv(here("analysis", "preprocessing", "income-stratified-footprints",
                                     "mean_expenditure_by_quintile_long.csv"),
                                na = ":") %>%
@@ -7974,7 +7968,7 @@ df_expenditure_long = read_csv(here("analysis", "preprocessing", "income-stratif
   select(-mean_expenditure) %>% 
   ungroup()
 
-## Calculate adult equivalents per household
+## calculate adult equivalents per household
 df_adult_e_p_hh = df_expenditure_long %>%
   rename(iso2 = geo) %>%
   pivot_wider(id_cols = c(iso2, year, quintile, imputed), 
@@ -7986,7 +7980,7 @@ df_adult_e_p_hh = df_expenditure_long %>%
   mutate(iso3 = if_else(iso2 == "XK", "XKX", iso3),
          quint = parse_number(quintile))
 
-# add quintile population data
+## add quintile population data
 mrio_results_with_adult_eq_all = dat_results_raw %>%
   filter(year %in% c(2005, 2010, 2015)) %>%
   left_join(hh_data, by=c("iso2", "year")) %>%
@@ -7999,8 +7993,7 @@ mrio_results_with_adult_eq_all = dat_results_raw %>%
   mutate(ae_quintile = hh_quintile * adult_e_p_hh) %>%
   select(-c(hh_quintile, adult_e_p_hh))
 
-
-#### ONLY COUNTRIES THAT HAVE DATA FOR 2005, 2010, and 2015
+## for the European expenditure deciles we use only countries with data in 2005, 2010 and 2015. This excludes Luxembourg and Italy. 
 complete_countries = mrio_results_with_adult_eq_all %>%
   group_by(year, iso2) %>%
   summarise(co2_kg = sum(co2_kg)) %>%
@@ -8016,8 +8009,8 @@ df_adult_e_p_hh %>%
   filter(iso2 %in% complete_countries, year<=2015, year>=2005) %>%
   mutate(quint = parse_number(quintile)) 
 
-# calculate EU expenditure tiles based on loaded mrio result file and adult equivalents.
-# returns country quintiles mapped to EU ntile rank and EU ntile boundaries
+# calculate European expenditure deciles based on loaded income-stratified-footprints result file and adult equivalents.
+# returns country quintiles mapped to European ntile rank and European ntile boundaries
 # helper function called by function below
 calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   
@@ -8028,13 +8021,13 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
     mutate(idx = 1:n(),
            eu_q_rank = 0) # later to be filled with euro quintile rank
   
-  # total EU adult equivalents (of included countries) in year
+  # total European adult equivalents (of included countries) in year
   total_ae_in_year = sum(country_data_annual_sorted$ae_quintile)
   
   # quantile target ae population
   eu_decile_adult_eq = total_ae_in_year/pquantile_count
   
-  # country quinitles must be split to allocate ae population accorting to eu quantile target ae population
+  # country quintiles must be split to allocate ae population according to European quantile target ae population
   # filtering by condition that cant be fulfilled is a lazy way to create an empty dataframe
   # of the same structure as country_data_annual_sorted
   additional_rows = country_data_annual_sorted %>%
@@ -8043,9 +8036,8 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   # store quantile split values
   eu_quantile_boundaries = data.frame(euro_q_rank = 1:pquantile_count, p = 0)
   
-  ## can't think of a non-loop way to do this, sorry
-  ## loops through the ordered dataset, assignes euro quantile rank
-  ## and splits quintiles where necessary
+  # loops through the ordered dataset, assignes euro quantile rank
+  # and splits quintiles where necessary
   eu_ae_current = 0
   euro_q_rank_current = 1
   for (row_idx in 1:nrow(country_data_annual_sorted)) {
@@ -8055,17 +8047,17 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
       country_data_annual_sorted[row_idx, "eu_q_rank"] = euro_q_rank_current
     } else {
       ae_diff = eu_decile_adult_eq - eu_ae_current
-      ## write rest of this eu decile (split country quintile)
+      ## write rest of this European decile (split country quintile)
       new_row = country_data_annual_sorted[row_idx, ]
       new_row[1, "eu_q_rank"] = euro_q_rank_current
       new_row[1, "ae_quintile"] = ae_diff
-      ## record eu quantile boundary
+      ## record European quantile boundary
       eu_quantile_boundaries[eu_quantile_boundaries$euro_q_rank==euro_q_rank_current, "p"] =
         country_data_annual_sorted[row_idx, "fd_pae_e"]
       ## put first part of population in overflow dataframe
       additional_rows = additional_rows %>%
         bind_rows(new_row)
-      ## classify rest of country quinitle population to next euro quantile
+      ## classify rest of country quintile population to next European quantile
       country_data_annual_sorted[row_idx, "ae_quintile"] = 
         country_data_annual_sorted[row_idx, "ae_quintile"] - (ae_diff+0.0001)
       euro_q_rank_current = euro_q_rank_current + 1
@@ -8080,21 +8072,17 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   arrange(fd_pae_e, eu_q_rank) %>%
   mutate(idx = 1:n())
 
-  #ad zeroth and nth quantile (min and max)
+  # add zeroth and nth quantile (min and max)
   eu_quantile_boundaries[pquantile_count, "p"] = max(country_data_eu_quantiles$fd_pae_e)
-  #tmp = data.frame(euro_q_rank = 0, p = min(country_data_eu_quantiles$fd_pae_e)) %>%
-  #  bind_rows(eu_quantile_boundaries) 
-  
   
   list("df_q_data" = country_data_eu_quantiles, "df_q_boundaries" = eu_quantile_boundaries)
 }
 
-# maps MRIO results to EU ntile ranks, returns mapping and ntile EU boundaries
+# maps income-stratified-footprint results to European ntile ranks, returns mapping and ntile European boundaries
 map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
   
   df_eu_ntiles = calculate_eu_ntiles(pyear, pquantile_count = ptarget_ntiles)
   df_eu_ntiles_data = df_eu_ntiles$df_q_data
-  #df_eu_ntiles_p = df_eu_ntiles$df_q_boundaries
   
   sector_mapping = mrio_results_with_adult_eq %>%
     group_by(sector_id) %>%#
@@ -8118,7 +8106,7 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
            energy_use_europe_TJ,
            ae_quintile) %>%
     filter(year==pyear) %>%
-    # calc per adult aequivalent values in quintiles
+    # calculate per adult equivalent values in quintiles
     mutate(fd_pae_e = fd_me*1000000/ae_quintile,
            co2_pae_kg = co2_kg/ae_quintile,
            co2_pae_dom_kg = co2_domestic_kg/ae_quintile,
@@ -8144,7 +8132,7 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
     full_join(df_eu_ntiles_data %>%
                 rename(fd_pae_e_quint_tmp = fd_pae_e), by=c("iso2", "quint")) %>%
     rename(adult_eq = ae_quintile) %>% # country quintile and their split fraction population
-    # recalc totals
+    # recalculate totals
     mutate(fd_me = fd_pae_e*adult_eq/1000000,
            co2_kg = co2_pae_kg*adult_eq,
            co2_dom_kg = co2_pae_dom_kg*adult_eq,
@@ -8162,17 +8150,17 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
   
 }
 
-### Filter only countries with complete info for years 2005, 2010, 2015
+# filter only countries with complete info for years 2005, 2010, 2015
 mrio_results_with_adult_eq = mrio_results_with_adult_eq_all %>%
   filter(iso2 %in% complete_countries)
 
-## summarize final demand per adult equvalent per quintile across all sectors as basis for eurodeciles for complete countries
+# summarize final demand per adult equivalent per quintile across all sectors as basis for European deciles for complete countries
 summary_country_fd = mrio_results_with_adult_eq %>%
   group_by(iso2, year, quint) %>%
   summarise(ae_quintile = first(ae_quintile),
             fd_pae_e = sum(fd_me*1000000)/(ae_quintile))
 
-## summarize final demand per adult equvalent per quintile across all sectors as basis for eurodeciles for all countries
+# summarize final demand per adult equivalent per quintile across all sectors as basis for European deciles for all countries
 summary_country_fd_all = mrio_results_with_adult_eq_all %>%
   group_by(iso2, year, quint) %>%
   summarise(ae_quintile = first(ae_quintile),
@@ -8192,9 +8180,10 @@ df_mapped_result_data = df_mapped_result_2005_data %>%
 write_csv(df_mapped_result_data, 
           here(paste0("analysis/data/derived/si/mrio_results_eu_ntile_mapped_n_", target_eu_ntiles, "_pxp.csv")))
 
-###### alternative method, EXOIBASE industry-by-industry version
 
-# 1) load MRIO result file
+##### SI results, alternative method, EXOIBASE industry-by-industry version
+
+# 1) load income-stratified-footprints formatted results file
 dat_results_raw = read_rds(here("analysis", "preprocessing", "income-stratified-footprints",
                                 "results_formatted_method2_ixi.rds")) %>%
   ungroup() %>%
@@ -8204,7 +8193,7 @@ dat_results_raw = read_rds(here("analysis", "preprocessing", "income-stratified-
 # get iso3 country codes to join with hh data
 country_codes = ISOcodes::ISO_3166_1 %>%
   select(iso2 = Alpha_2, iso3 = Alpha_3) %>%
-  # resolve inconsistency between Eurostat and ISO for Greece and UK/Great Britain
+  # resolve inconsistency between EUROSTAT and ISO for Greece and UK/Great Britain
   mutate(iso2 = if_else(iso2=="GR", "EL", iso2)) %>%
   mutate(iso2 = if_else(iso2=="GB", "UK", iso2))
 
@@ -8297,7 +8286,7 @@ write_csv(total_private_households, here("/analysis/preprocessing/income-stratif
 ## return to 2 digits per value
 options(digits=2)
 
-# 2) load Eurostat household data
+# 2) load EUROSTAT household data
 hh_data = read_csv(here("analysis", "preprocessing", "income-stratified-footprints",
                         "total_private_households.csv")) %>%
   mutate(imputed = if_else(is.na(total_private_households), TRUE, FALSE)) %>%
@@ -8308,7 +8297,7 @@ hh_data = read_csv(here("analysis", "preprocessing", "income-stratified-footprin
   left_join(country_codes, by="iso2") %>%
   select(-total_private_households)
 
-#3) Eurostat mean expenditures per household income quintile per household and per adult equivalent
+#3) EUROSTAT mean expenditures per household income quintile per household and per adult equivalent
 df_expenditure_long = read_csv(here("analysis", "preprocessing", "income-stratified-footprints",
                                     "mean_expenditure_by_quintile_long.csv"),
                                na = ":") %>%
@@ -8322,7 +8311,7 @@ df_expenditure_long = read_csv(here("analysis", "preprocessing", "income-stratif
   select(-mean_expenditure) %>% 
   ungroup()
 
-## Calculate adult equivalents per household
+## calculate adult equivalents per household
 df_adult_e_p_hh = df_expenditure_long %>%
   rename(iso2 = geo) %>%
   pivot_wider(id_cols = c(iso2, year, quintile, imputed), 
@@ -8334,7 +8323,7 @@ df_adult_e_p_hh = df_expenditure_long %>%
   mutate(iso3 = if_else(iso2 == "XK", "XKX", iso3),
          quint = parse_number(quintile))
 
-# add quintile population data
+## add quintile population data
 mrio_results_with_adult_eq_all = dat_results_raw %>%
   filter(year %in% c(2005, 2010, 2015)) %>%
   left_join(hh_data, by=c("iso2", "year")) %>%
@@ -8347,7 +8336,7 @@ mrio_results_with_adult_eq_all = dat_results_raw %>%
   mutate(ae_quintile = hh_quintile * adult_e_p_hh) %>%
   select(-c(hh_quintile, adult_e_p_hh))
 
-#### ONLY COUNTRIES THAT HAVE DATA FOR 2005, 2010, and 2015
+## for the European expenditure deciles we use only countries with data in 2005, 2010 and 2015. This excludes Luxembourg and Italy. 
 complete_countries = mrio_results_with_adult_eq_all %>%
   group_by(year, iso2) %>%
   summarise(co2_kg = sum(co2_kg)) %>%
@@ -8363,8 +8352,8 @@ df_adult_e_p_hh %>%
   filter(iso2 %in% complete_countries, year<=2015, year>=2005) %>%
   mutate(quint = parse_number(quintile))
 
-# calculate EU expenditure tiles based on loaded mrio result file and adult equivalents.
-# returns country quintiles mapped to EU ntile rank and EU ntile boundaries
+# calculate European expenditure deciles based on loaded income-stratified-footprints result file and adult equivalents.
+# returns country quintiles mapped to European ntile rank and European ntile boundaries
 # helper function called by function below
 calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   
@@ -8375,23 +8364,22 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
     mutate(idx = 1:n(),
            eu_q_rank = 0) # later to be filled with euro quintile rank
   
-  # total EU adult equivalents (of included countries) in year
+  # total European adult equivalents (of included countries) in year
   total_ae_in_year = sum(country_data_annual_sorted$ae_quintile)
   
   # quantile target ae population
   eu_decile_adult_eq = total_ae_in_year/pquantile_count
   
-  # country quinitles must be split to allocate ae population accorting to eu quantile target ae population
-  # filtering by condition that cant be fulfilled is a lazy way to create an empty dataframe
+  # country quintiles must be split to allocate ae population according to European quantile target ae population
+  # filtering by condition that can't be fulfilled is a lazy way to create an empty dataframe
   # of the same structure as country_data_annual_sorted
   additional_rows = country_data_annual_sorted %>%
     filter(year==1)
   
   # store quantile split values
   eu_quantile_boundaries = data.frame(euro_q_rank = 1:pquantile_count, p = 0)
-  
-  ## can't think of a non-loop way to do this, sorry
-  ## loops through the ordered dataset, assignes euro quantile rank
+
+  ## loops through the ordered dataset, assigns European quantile rank
   ## and splits quintiles where necessary
   eu_ae_current = 0
   euro_q_rank_current = 1
@@ -8402,17 +8390,17 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
       country_data_annual_sorted[row_idx, "eu_q_rank"] = euro_q_rank_current
     } else {
       ae_diff = eu_decile_adult_eq - eu_ae_current
-      ## write rest of this eu decile (split country quintile)
+      ## write rest of this European decile (split country quintile)
       new_row = country_data_annual_sorted[row_idx, ]
       new_row[1, "eu_q_rank"] = euro_q_rank_current
       new_row[1, "ae_quintile"] = ae_diff
-      ## record eu quantile boundary
+      ## record European quantile boundary
       eu_quantile_boundaries[eu_quantile_boundaries$euro_q_rank==euro_q_rank_current, "p"] =
         country_data_annual_sorted[row_idx, "fd_pae_e"]
       ## put first part of population in overflow dataframe
       additional_rows = additional_rows %>%
         bind_rows(new_row)
-      ## classify rest of country quinitle population to next euro quantile
+      ## classify rest of country quintile population to next European quantile
       country_data_annual_sorted[row_idx, "ae_quintile"] = 
         country_data_annual_sorted[row_idx, "ae_quintile"] - (ae_diff+0.0001)
       euro_q_rank_current = euro_q_rank_current + 1
@@ -8427,21 +8415,17 @@ calculate_eu_ntiles <- function(pyear, pquantile_count=10) {
   arrange(fd_pae_e, eu_q_rank) %>%
   mutate(idx = 1:n())
 
-  #ad zeroth and nth quantile (min and max)
+  # add zeroth and nth quantile (min and max)
   eu_quantile_boundaries[pquantile_count, "p"] = max(country_data_eu_quantiles$fd_pae_e)
-  #tmp = data.frame(euro_q_rank = 0, p = min(country_data_eu_quantiles$fd_pae_e)) %>%
-  #  bind_rows(eu_quantile_boundaries) 
-  
   
   list("df_q_data" = country_data_eu_quantiles, "df_q_boundaries" = eu_quantile_boundaries)
 }
 
-# maps MRIO results to EU ntile ranks, returns mapping and ntile EU boundaries
+# maps income-stratified-footprint results to European ntile ranks, returns mapping and ntile European boundaries
 map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
   
   df_eu_ntiles = calculate_eu_ntiles(pyear, pquantile_count = ptarget_ntiles)
   df_eu_ntiles_data = df_eu_ntiles$df_q_data
-  #df_eu_ntiles_p = df_eu_ntiles$df_q_boundaries
   
   sector_mapping = mrio_results_with_adult_eq %>%
     group_by(coicop) %>%#
@@ -8465,7 +8449,7 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
            energy_use_europe_tj,
            ae_quintile) %>%
     filter(year==pyear) %>%
-    # calc per adult aequivalent values in quintiles
+    # calculate per adult equivalent values in quintiles
     mutate(fd_pae_e = fd_me*1000000/ae_quintile,
            co2_pae_kg = co2_kg/ae_quintile,
            co2_pae_dom_kg = co2_domestic_kg/ae_quintile,
@@ -8491,7 +8475,7 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
     full_join(df_eu_ntiles_data %>%
                 rename(fd_pae_e_quint_tmp = fd_pae_e), by=c("iso2", "quint")) %>%
     rename(adult_eq = ae_quintile) %>% # country quintile and their split fraction population
-    # recalc totals
+    # recalculate totals
     mutate(fd_me = fd_pae_e*adult_eq/1000000,
            co2_kg = co2_pae_kg*adult_eq,
            co2_dom_kg = co2_pae_dom_kg*adult_eq,
@@ -8503,23 +8487,23 @@ map_mrio_results_to_eu_ntiles <- function(pyear, ptarget_ntiles) {
            energy_use_dom_tj = energy_use_dom_pae_tj*adult_eq,
            energy_use_eu_tj = energy_use_eu_pae_tj*adult_eq
            ) #%>%
-    #left_join(sector_mapping, by="sector_id")
+    #left_join(sector_mapping, by="sector_id") # comment out joining sectors (only working with coicop categories in method2)
   
   list("df_mapped_data" = df_mapped_data, "df_ntile_boundaries" = df_eu_ntiles$df_q_boundaries)
   
 }
 
-### Filter only countries with complete info for years 2005, 2010, 2015
+# filter only countries with complete info for years 2005, 2010, 2015
 mrio_results_with_adult_eq = mrio_results_with_adult_eq_all %>%
   filter(iso2 %in% complete_countries)
 
-## summarize final demand per adult equvalent per quintile across all sectors as basis for eurodeciles for complete countries
+# summarize final demand per adult equivalent per quintile across all sectors as basis for European deciles for complete countries
 summary_country_fd = mrio_results_with_adult_eq %>%
   group_by(iso2, year, quint) %>%
   summarise(ae_quintile = first(ae_quintile),
             fd_pae_e = sum(fd_me*1000000, na.rm = T)/(ae_quintile))
 
-## summarize final demand per adult equvalent per quintile across all sectors as basis for eurodeciles for all countries
+# summarize final demand per adult equivalent per quintile across all sectors as basis for European deciles for all countries
 summary_country_fd_all = mrio_results_with_adult_eq_all %>%
   group_by(iso2, year, quint) %>%
   summarise(ae_quintile = first(ae_quintile),
-- 
GitLab