diff --git a/analysis/preprocessing/full_code.Rmd b/analysis/preprocessing/full_code.Rmd index cfc2851c0a42af734c69d4811ffce426aef71166..e5e5aee96722f7b5adba8652560401130db45926 100644 --- a/analysis/preprocessing/full_code.Rmd +++ b/analysis/preprocessing/full_code.Rmd @@ -1117,6 +1117,7 @@ Would be in an 'income-stratified-footprints' preprocessing folder # income-stratified-footprints directory #data_dir_income_stratified_footprints = paste("/",file.path("data","metab","income-stratified-footprints", fsep=.Platform$file.sep),sep="") data_dir_income_stratified_footprints = here("analysis", "preprocessing", "income-stratified-footprints") +data_dir_exiobase = here("analysis", "preprocessing", "EXIOBASE") ################################################### !!!! method 1 - PPS HH - RENT NOT MAPPED TO EXIOBASE !!!! ########################################### ########################################################################################################################################################## @@ -2562,7 +2563,7 @@ fd_exiobase = disaggregated_final_demand %>% # direct from FD - to go back to results without direct FD fp, do not run this next chunk and do not bind_rows with 'results' -env_ac_pefasu_no_TR = read_csv(paste0(data_dir_income_stratified_footprints, "/data/env_ac_pefasu_1_Data.csv")) %>% +env_ac_pefasu_no_TR = read_csv(paste0(data_dir_income_stratified_footprints, "/env_ac_pefasu_1_Data.csv")) %>% filter(TIME == 2015) %>% mutate(geo = dplyr::recode(GEO,"Austria" = "AT", "Belgium" = "BE", @@ -2612,7 +2613,7 @@ env_ac_pefasu_TR = env_ac_pefasu_no_TR %>% env_ac_pefasu = rbind(env_ac_pefasu_no_TR,env_ac_pefasu_TR) %>% gather(sector,share_of_total_energy,-geo) -env_ac_ainah_r2 = read_csv(paste0(data_dir_income_stratified_footprints, "/data/env_ac_ainah_r2_1_Data.csv")) %>% +env_ac_ainah_r2 = read_csv(paste0(data_dir_income_stratified_footprints, "/env_ac_ainah_r2_1_Data.csv")) %>% filter(TIME == 2015) %>% mutate(geo = dplyr::recode(GEO,"Austria" = "AT", "Belgium" = "BE", @@ -3013,19 +3014,8 @@ results = fd_exiobase %>% results_with_direct_FD_fp = bind_rows(results,direct_FD_fp_wide) -#write.csv(results, paste0(data_dir_income_stratified_footprints, "/results_no_rent_ixi.csv")) - - ### create compressed results_ixi rds file -#if (!require("pacman")) install.packages("pacman") -#pacman::p_load(tidyverse, -# janitor, -# here) - -#dat_all = read_csv(here("data/results_ixi.csv")) %>% -# clean_names() - dat_all = results_with_direct_FD_fp %>% clean_names() @@ -3035,7 +3025,7 @@ sectors = dat_all %>% mutate(sector_id = row_number()) #write_csv(sectors, here("data/sector_labels.csv")) -write_csv(sectors, paste0(data_dir_income_stratified_footprints, "/sectors_method1_ixi_pps_hh.csv")) +write_csv(sectors, paste0(here("/analysis/data/derived/sectors_method1_ixi.csv"))) # convert aggregated sector labels to IDs sectors_agg = dat_all %>% @@ -3043,7 +3033,7 @@ sectors_agg = dat_all %>% mutate(sector_agg_id = row_number()) #write_csv(sectors_agg, here("data/sector_agg_labels.csv")) -write_csv(sectors_agg, paste0(data_dir_income_stratified_footprints, "/sectors_agg_method1_ixi_pps_hh.csv")) +write_csv(sectors_agg, paste0(here("/analysis/data/derived/sectors_agg_method1_ixi.csv"))) # convert COICOP labels to IDs coicop = dat_all %>% @@ -3051,7 +3041,7 @@ coicop = dat_all %>% mutate(coicop_id = row_number()) #write_csv(sectors_agg, here("data/sector_agg_labels.csv")) -write_csv(coicop, paste0(data_dir_income_stratified_footprints, "/coicop_method1_ixi_pps_hh.csv")) +write_csv(coicop, paste0(here("/analysis/data/derived/coicop_method1_ixi.csv"))) # replace sector text labels with numerical IDs (save space) dat_compressed = dat_all %>% @@ -3186,25 +3176,15 @@ results_recombined = tmp_fd %>% left_join(tmp_energy_domestic, by=c("year", "geo", "sector_id", "quint")) %>% left_join(tmp_energy_europe, by = c("year", "geo", "sector_id", "quint")) - - # finally re-join aggregated sector IDs results_formatted = results_recombined %>% left_join(sector_mapping, by="sector_id") %>% ungroup() %>% select(-coicop_id) -#write_rds(results_formated, here("/results_formated.rds")) - write.csv(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_ixi.csv")) - write_rds(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_ixi.rds")) - -#write.csv(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_ixi_pps_ae.csv")) -#write_rds(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_ixi_pps_ae.rds")) - - ################################################### !!!! method 1 - PXP version - PPS HH NO RENT !!!! #################################################### ########################################################################################################################################################## ########################################################################################################################################################## @@ -4544,7 +4524,7 @@ fd_exiobase = disaggregated_final_demand %>% # direct from FD - to go back to results without direct FD fp, do not run this next chunk and do not bind_rows with 'results' -env_ac_pefasu_no_TR = read_csv(paste0(data_dir_income_stratified_footprints, "/data/env_ac_pefasu_1_Data.csv")) %>% +env_ac_pefasu_no_TR = read_csv(paste0(data_dir_income_stratified_footprints, "/env_ac_pefasu_1_Data.csv")) %>% filter(TIME == 2015) %>% mutate(geo = dplyr::recode(GEO,"Austria" = "AT", "Belgium" = "BE", @@ -4594,7 +4574,7 @@ env_ac_pefasu_TR = env_ac_pefasu_no_TR %>% env_ac_pefasu = rbind(env_ac_pefasu_no_TR,env_ac_pefasu_TR) %>% gather(sector,share_of_total_energy,-geo) -env_ac_ainah_r2 = read_csv(paste0(data_dir_income_stratified_footprints, "/data/env_ac_ainah_r2_1_Data.csv")) %>% +env_ac_ainah_r2 = read_csv(paste0(data_dir_income_stratified_footprints, "/env_ac_ainah_r2_1_Data.csv")) %>% filter(TIME == 2015) %>% mutate(geo = dplyr::recode(GEO,"Austria" = "AT", "Belgium" = "BE", @@ -4994,18 +4974,8 @@ results = fd_exiobase %>% energy_total_europe = q1_energy_europe+q2_energy_europe+q3_energy_europe+q4_energy_europe+q5_energy_europe) results_with_direct_FD_fp = bind_rows(results,direct_FD_fp_wide) -#write.csv(results, paste0(data_dir_income_stratified_footprints, "/results_no_rent_ixi.csv")) - - -### create compressed results_ixi rds file - -#if (!require("pacman")) install.packages("pacman") -#pacman::p_load(tidyverse, -# janitor, -# here) -#dat_all = read_csv(here("data/results_ixi.csv")) %>% -# clean_names() +### create compressed results_pxp rds file dat_all = results_with_direct_FD_fp %>% clean_names() @@ -5015,24 +4985,22 @@ sectors = dat_all %>% distinct(sector) %>% mutate(sector_id = row_number()) -#write_csv(sectors, here("data/sector_labels.csv")) -write_csv(sectors, paste0(data_dir_income_stratified_footprints, "/sectors_method1_pxp_pps_hh.csv")) +# if interested in looking at a sectoral breakdown of the product-by-product version results, un-comment line below +#write_csv(sectors, paste0(here("/analysis/data/derived/si/sectors_method1_pxp.csv"))) # convert aggregated sector labels to IDs sectors_agg = dat_all %>% distinct(five_sectors) %>% mutate(sector_agg_id = row_number()) -#write_csv(sectors_agg, here("data/sector_agg_labels.csv")) -write_csv(sectors_agg, paste0(data_dir_income_stratified_footprints, "/sectors_agg_method1_pxp_pps_hh.csv")) +#write_csv(sectors_agg, paste0(here("analysis/data/derived/si/sectors_agg_method1_pxp.csv"))) # convert COICOP labels to IDs coicop = dat_all %>% distinct(coicop) %>% mutate(coicop_id = row_number()) -#write_csv(sectors_agg, here("data/sector_agg_labels.csv")) -write_csv(coicop, paste0(data_dir_income_stratified_footprints, "/coicop_method1_pxp_pps_hh.csv")) +#write_csv(coicop, paste0(here("analysis/data/derived/si/coicop_method1_pxp.csv"))) # replace sector text labels with numerical IDs (save space) dat_compressed = dat_all %>% @@ -5175,23 +5143,11 @@ results_formatted = results_recombined %>% ungroup() %>% select(-coicop_id) -#write_rds(results_formated, here("/results_formated.rds")) - -write.csv(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_pxp_pps_hh_no_rent.csv")) - -#write_rds(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_pxp_pps_hh_no_rent.rds")) +write.csv(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_pxp.csv")) +write_rds(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_pxp.rds")) -#write.csv(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_pxp_pps_ae.csv")) -#write_rds(results_formatted, paste0(data_dir_income_stratified_footprints, "/results_formatted_method1_pxp_pps_ae.rds")) - - - - - - - -################################################### !!!! method 2 !!!! - IXI version - PPS HH - RENT NOT MAPPED TO EXIOBASE !!!!! ############################# +################################################### !!!! method 2 !!!! - IXI version ############################# ############################################################################################################################################################### ############################################################################################################################################################### @@ -5201,14 +5157,14 @@ write.csv(results_formatted, paste0(data_dir_income_stratified_footprints, "/res # aggregate - playing around trying to go the other way # load 'mean expenditure by quintile' data -hbs_exp_t133 = read_csv(paste0(data_dir_income_stratified_footprints, "/data/hbs_exp_t133.csv")) +hbs_exp_t133 = read_csv(paste0(data_dir_income_stratified_footprints, "/hbs_exp_t133.csv")) # rename and arrange by country mean_expenditure_by_quintile = hbs_exp_t133 %>% rename(geo = 3, quintile = "quantile") %>% arrange(geo) # load 'mean expenditure by quintile and coicop' data -hbs_str_t223 = read_csv(paste0(data_dir_income_stratified_footprints, "/data/hbs_str_t223.csv")) +hbs_str_t223 = read_csv(paste0(data_dir_income_stratified_footprints, "/hbs_str_t223.csv")) # rename and arrange by country mean_expenditure_by_coicop_sector = hbs_str_t223 %>% rename(geo = 4, quintile = "quantile") %>% @@ -5273,7 +5229,7 @@ join_expenditures = mean_expenditure_by_coicop_sector_long %>% # load margin tables -trade_and_transport = read.csv(paste0(data_dir_income_stratified_footprints, "/data/SNA_TABLE45_20042020103737298.csv")) %>% +trade_and_transport = read.csv(paste0(data_dir_income_stratified_footprints, "/SNA_TABLE45_20042020103737298.csv")) %>% select(LOCATION, PRODUCT, Product, Year, Value) %>% mutate(geo = dplyr::recode(LOCATION,"AUT" = "AT", "BEL" = "BE", @@ -5317,7 +5273,7 @@ trade_and_transport = read.csv(paste0(data_dir_income_stratified_footprints, "/d -taxes_less_subsidies = read.csv(paste0(data_dir_income_stratified_footprints, "/data/SNA_TABLE45_20042020104120395.csv")) %>% +taxes_less_subsidies = read.csv(paste0(data_dir_income_stratified_footprints, "/SNA_TABLE45_20042020104120395.csv")) %>% select(LOCATION, PRODUCT, Product, Year, Value) %>% mutate(geo = dplyr::recode(LOCATION,"AUT" = "AT", "BEL" = "BE", @@ -5515,10 +5471,6 @@ shares = join_expenditures %>% ########################################################################################################################################################## ########################################################################################################################################################## -# pre-processing - -data_dir_exiobase = paste("/",file.path("data","metab","Exiobase", fsep=.Platform$file.sep),sep="") - # Exiobase - ixi version years_exb_ixi = c(2005,2010,2015) @@ -5601,7 +5553,7 @@ for (i in years_exb_ixi){ # labels - Exiobase_T_labels = read.csv(paste0(data_dir_income_stratified_footprints, "/data/Exiobase_T_labels_ixi_w_coicop_mapping_no_rent.csv")) %>% + Exiobase_T_labels = read.csv(paste0(data_dir_income_stratified_footprints, "/Exiobase_T_labels_ixi_w_coicop_mapping.csv")) %>% mutate(V1 = dplyr::recode(V1,"GR" = "EL","GB" = "UK")) # TIVs @@ -6721,22 +6673,6 @@ join_ala = mean_expenditure_by_coicop_sector_long_bp %>% pm_bp = as.numeric(pm_bp), fd_me = pm_bp*((eurostat_countries_colsums*mean_exp_shares)/1000)) - -################################################### -#%>% -# rename(coicop_level1 = coicop) - -# TIV only taking the mean - -# mean_TIV_with_labels = TIV_with_labels %>% group_by(geo,year,coicop,coicop_level1) %>% -# summarise(TIV_CO2 = mean(TIV_CO2)) - -#ok = join_ala %>% left_join(mean_TIV_with_labels, by = c("geo","year","coicop")) %>% -# mutate(CO2_normal = exp_normal*TIV_CO2, -# CO2_pe = exp_pe*TIV_CO2, -# CO2_pi = exp_pi*TIV_CO2) -################################################## - Eurostat_countries_hh_fd_mean_TIV = as.data.frame(Eurostat_countries_hh_fd) %>% select(-year) weighted_mean_TIV_with_labels = cbind(TIVs,Eurostat_countries_hh_fd_mean_TIV) %>% @@ -7098,7 +7034,7 @@ ok = join_ala %>% # direct from FD - to go back to results without direct FD fp, do not run this next chunk and do not bind_rows with 'results' -env_ac_pefasu_no_TR = read_csv(paste0(data_dir_income_stratified_footprints, "/data/env_ac_pefasu_1_Data.csv")) %>% +env_ac_pefasu_no_TR = read_csv(paste0(data_dir_income_stratified_footprints, "/env_ac_pefasu_1_Data.csv")) %>% filter(TIME == 2015) %>% mutate(geo = dplyr::recode(GEO,"Austria" = "AT", "Belgium" = "BE", @@ -7148,7 +7084,7 @@ env_ac_pefasu_TR = env_ac_pefasu_no_TR %>% env_ac_pefasu = rbind(env_ac_pefasu_no_TR,env_ac_pefasu_TR) %>% gather(sector,share_of_total_energy,-geo) -env_ac_ainah_r2 = read_csv(paste0(data_dir_income_stratified_footprints, "/data/env_ac_ainah_r2_1_Data.csv")) %>% +env_ac_ainah_r2 = read_csv(paste0(data_dir_income_stratified_footprints, "/env_ac_ainah_r2_1_Data.csv")) %>% filter(TIME == 2015) %>% mutate(geo = dplyr::recode(GEO,"Austria" = "AT", "Belgium" = "BE", @@ -7491,15 +7427,6 @@ direct_FD_fp_wide_recombined = tmp_co2 %>% clean_names() %>% mutate(year = as.numeric(year)) - - -# something is wrong with 'bah' (don't think so anymore) - the expenditures match the german and now the -#shares match the german, but some countries are clearly wrong - with almost 100% shares in CP04, whereas some look -#relatively normal - have to figure this out - huge TIVS in the CP045 sector for those weird countries. likely some weird -#sector that has a huge TIV but not much expenditure to it so need to do a weighted average - was the case, now have done -#with weighted TIV. some eastern european countries like Bulgaria still have huge CP04 emission shares - might be correct -#if their electricity is extremely dirty - need to look at the intensities of each country individually - results = ok %>% filter(!(geo %in% c("EA","EA12","EA13","EA17", "EA18","EA19","EEA28","EEA30_2007", @@ -7550,7 +7477,8 @@ results_formatted = results %>% results_formatted_with_direct_FD_fp = bind_rows(results_formatted,direct_FD_fp_wide_recombined) -write.csv(results_formatted_with_direct_FD_fp, paste0(data_dir_income_stratified_footprints, "/results_formatted_method2_ixi_pps_hh_no_rent.csv")) +write.csv(results_formatted_with_direct_FD_fp, paste0(data_dir_income_stratified_footprints, "/results_formatted_method2_ixi.csv")) +write_rds(results_formatted_with_direct_FD_fp, paste0(data_dir_income_stratified_footprints, "/results_formatted_method2_ixi.rds")) ```