Excess mortality
Data preparation
COVID-19 data
Pulling data directly from Johns Hopkins Coronavirus Resource Center github repo (using csse_covid_19_daily_reports
with appropriate time stamps).
Sources of data given for three analysed countries are:
- Spain - RTVE
- Sweden - The Swedish Public Health Agency
- Switzerland
- Federal Office Of Public Health
- Open Government Data Reported By The Swiss Cantons
<- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/12-31-2020.csv") %>%
covid_cases_deaths_jh_20 filter(Country_Region %in% c("Switzerland", "Sweden", "Spain")) %>%
group_by(Country_Region) %>%
summarise(Confirmed = sum(Confirmed, na.rm = TRUE),
Deaths = sum(Deaths, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Year = 2020)
<- bind_rows(
covid_cases_deaths_jh
covid_cases_deaths_jh_20,
read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-01-2021.csv") %>%
filter(Country_Region %in% c("Switzerland", "Sweden", "Spain")) %>%
group_by(Country_Region) %>%
summarise(Confirmed = sum(Confirmed, na.rm = TRUE),
Deaths = sum(Deaths, na.rm = TRUE)) %>%
ungroup() %>%
left_join(covid_cases_deaths_jh_20 %>%
rename (Deaths_20 = Deaths,
Confirmed_20 = Confirmed)) %>%
mutate(Deaths = Deaths - Deaths_20,
Confirmed = Confirmed - Confirmed_20) %>%
select(-ends_with("_20")) %>%
mutate(Year = 2021)
%>%
) rename(Country = Country_Region) %>%
select(-Confirmed)
write_rds(covid_cases_deaths_jh, "data/covid_cases_deaths_jh.Rds")
HMD - yearly data with age & sex structure
The Human Mortality Database is the source of the data.
Available on yearly resolution only but for one year age bands & separate by sex.
The dataset has been last updated on 2021-11-10
.
In order to update newer data the chunk option of the code snippet below has to be changed to eval=TRUE
and a password to user account on mortality.org website provided (stored in plain text file HMD.txt
in secrets
directory of the project).
p_load(HMDHFDplus)
# text file with password or modify password in calls below
<- readr::read_file("secrets/HMD.txt")
password
# CH
# pop
<- readHMDweb(CNTRY = "CHE", item ="Population" ,
hmd_ch_pop_raw username = "r.panczak@gmail.com", password = password,
fixup = FALSE) %>%
as_tibble()
write_rds(hmd_ch_pop_raw, "data-raw/mortality_org/hmd_ch_pop_raw.Rds")
# deaths
<- readHMDweb(CNTRY = "CHE", item = "Deaths_1x1",
hmd_ch_deaths_raw username = "r.panczak@gmail.com", password = password,
fixup = FALSE) %>%
as_tibble()
write_rds(hmd_ch_deaths_raw, "data-raw/mortality_org/hmd_ch_deaths_raw.Rds")
# ES
# pop
# 1975 problem resolved in issue #45
<- readHMDweb(CNTRY = "ESP", item ="Population" ,
hmd_es_pop_raw username = "r.panczak@gmail.com", password = password,
fixup = FALSE) %>%
as_tibble() %>%
mutate(Year = ifelse(Year == "1975+", 1975, Year)) %>%
filter(Year != "1975-") %>%
mutate(Year = as.numeric(Year))
write_rds(hmd_es_pop_raw, "data-raw/mortality_org/hmd_es_pop_raw.Rds")
# deaths
<- readHMDweb(CNTRY = "ESP", item = "Deaths_1x1",
hmd_es_deaths_raw username = "r.panczak@gmail.com", password = password,
fixup = FALSE) %>%
as_tibble()
write_rds(hmd_es_deaths_raw, "data-raw/mortality_org/hmd_es_deaths_raw.Rds")
# SE
# pop
<- readHMDweb(CNTRY = "SWE", item ="Population" ,
hmd_se_pop_raw username = "r.panczak@gmail.com", password = password,
fixup = FALSE) %>%
as_tibble()
write_rds(hmd_se_pop_raw, "data-raw/mortality_org/hmd_se_pop_raw.Rds")
# deaths
<- readHMDweb(CNTRY = "SWE", item = "Deaths_1x1",
hmd_se_deaths_raw username = "r.panczak@gmail.com", password = password,
fixup = FALSE) %>%
as_tibble()
write_rds(hmd_se_deaths_raw, "data-raw/mortality_org/hmd_se_deaths_raw.Rds")
p_unload(HMDHFDplus)
Yearly deaths
Combining three countries into one dataset.
<-
hmd_deaths_age_sex bind_rows(
read_rds("data-raw/mortality_org/hmd_es_deaths_raw.Rds") %>% mutate(Country = "Spain"),
read_rds("data-raw/mortality_org/hmd_se_deaths_raw.Rds") %>% mutate(Country = "Sweden"),
read_rds("data-raw/mortality_org/hmd_ch_deaths_raw.Rds") %>% mutate(Country = "Switzerland")
%>%
) select(-Male, -Female) %>%
mutate(Age = if_else(Age == "110+", "110", Age)) %>%
mutate(Year = as.integer(Year),
Age = as.integer(Age),
Deaths = as.integer(round(Total))) %>%
select(-Total) %>%
relocate(Country)
write_rds(hmd_deaths_age_sex, "data/mortality_org/hmd_deaths_age_sex.Rds")
Note: Spain has two years less of data than other two countries:
# A tibble: 3 x 3
Country Min Max
<chr> <int> <int>
1 Spain 1908 2018
2 Sweden 1751 2020
3 Switzerland 1876 2020
Yearly totals per country indicating different time frames of data.
<- hmd_deaths_age_sex %>%
hmd_deaths_year group_by(Country, Year) %>%
summarise(Deaths = as.integer(round(sum(Deaths)))) %>%
ungroup()
write_rds(hmd_deaths_year, "data/mortality_org/hmd_deaths_year.Rds")
Example of age breakdown, showing demographic transition and first, crude indication of the effects of pandemics (and also - smaller coverage of ESP data!).
Yearly population
Similarly, population data is available with 1y age bands by year.
Again, Spain has less data:
# A tibble: 3 x 3
Country Min Max
<chr> <int> <int>
1 Spain 1908 2019
2 Sweden 1751 2021
3 Switzerland 1876 2021
Yearly totals
<- hmd_pop_age_sex %>%
hmd_pop_year group_by(Country, Year) %>%
summarise(Population = as.integer(round(sum(Population)))) %>%
ungroup()
write_rds(hmd_pop_year, "data/mortality_org/hmd_pop_year.Rds")
Again, age structure is available on yearly resolution. Example of three countries (starting in 1908
- at minimum year of coverage for all countries).
Switzerland
Monthly deaths until 2020
Historical data from Todesfälle nach Monat und Sterblichkeit seit 1803.
Using data only since 1877
when monthly reporting starts.
<- read.px("data-raw/BfS/px-x-0102020206_111.px") %>%
ch_deaths_month as_tibble() %>% remove_empty() %>% clean_names() %>%
rename(Year = jahr,
Deaths = value) %>%
mutate(Year = as.numeric(as.character(Year))) %>%
filter(Year >= 1877) %>%
filter(str_detect(demografisches_merkmal_und_indikator, 'Todesfälle im')) %>%
droplevels(.) %>%
mutate(Month = rep(seq(1:12), times = nrow(.)/12) ) %>%
select(-demografisches_merkmal_und_indikator) %>%
mutate(Date = ymd(paste0(Year, "-", Month, "-01")),
Year = as.integer(Year),
Deaths = as.integer(Deaths)) %>%
relocate(Year, Month, Date)
Monthly 2021 deaths
BfS delivered custom data to supplement first six months of 2021
. Merged to dataset covering period until 2020
.
<- read_csv("data-raw/BfS/Lieferung_Staub_2021_MonatsdatenJANbisJUN_Update04NOV21.csv", col_types = cols(TodJahr = col_integer(), TodMonat = col_integer(), Alter = col_integer(), COUNT = col_integer()), trim_ws = TRUE) %>%
ch_deaths_2021 rename(Year = TodJahr, Month = TodMonat, Deaths = COUNT) %>%
filter(Year == as.integer(2021)) %>%
group_by(Year, Month) %>%
summarise(Deaths = sum(Deaths)) %>%
ungroup() %>%
mutate(Date = ymd(paste0(Year, "-", Month, "-01")),
Deaths = as.integer(round(Deaths))) %>%
select(Year, Month, Date, Deaths)
Monthly datasets combined
Yearly 2021 deaths with age & sex structure
BfS delivered data to supplement missing six months of 2021
.
<- read_csv("data-raw/BfS/Lieferung_Staub_2021_MonatsdatenJANbisJUN_Update04NOV21.csv", col_types = cols(TodJahr = col_integer(), TodMonat = col_integer(), Alter = col_integer(), COUNT = col_integer()), trim_ws = TRUE) %>%
ch_deaths_age_sex_2021 rename(Year = TodJahr, Age = Alter, Sex = Geschlecht, Deaths = COUNT) %>%
select(-TodMonat) %>%
group_by(Year, Age) %>%
summarise(Deaths = sum(Deaths)) %>%
ungroup() %>%
mutate(Country = "Switzerland",
Deaths = as.integer(round(Deaths))) %>%
relocate(Country)
Spain
Monthly deaths untill 2019
Data from INE: - data prior to 1940 was transcribed from PDFs available from INE
- Vital Statistics: Deaths (Historical series 1941-1974) 1941-1974
here
- Vital Statistics: Deaths (Series since 1975)* since 1975
here
<- read_xlsx("data-raw/INE/Spain.xlsx") %>%
es_deaths_month remove_empty(which = c("rows", "cols")) %>%
clean_names() %>%
rename(Year = jahr) %>%
pivot_longer(!Year, names_to = "Month", values_to = "Deaths") %>%
mutate(Month = as.integer(str_replace(Month, fixed("m"), "")),
Year = as.integer(Year),
Deaths = as.integer(round(Deaths))) %>%
mutate(Date = ymd(paste0(Year, "-", Month, "-01"))) %>%
relocate(Year, Month, Date) %>%
filter(Year != 2020)
Monthly 2020 deaths
2020
update is from Vital Statistics. Provisional data: Year 2020 here
source("R/fn_get_death_data_spain.R")
<- fn_get_death_data_spain("data-raw/INE/Spain_2020.xlsx") %>%
es_2020 gather(., Month, Deaths, January:December, factor_key = TRUE) %>%
group_by(Month) %>%
summarize(Deaths = sum(Deaths, na.rm = TRUE)) %>%
ungroup() %>%
mutate(Year = as.integer(2020),
Deaths = as.integer(Deaths),
Date = ymd(paste0(Year, Month, "-01")),
Month = as.integer(Month)) %>%
relocate(Year, Month, Date, Deaths)
Monthly 2021 deaths
2021
data is from Estimate of number of weekly deaths during the Covid-19 outbreak here.
Note: data are in weekly format and converted with methods (and caveats!) described STMF chapter of weekly data doc.
<- read_xlsx("data-raw/INE/Spain_2021_death_up.xlsx", skip = 9) %>%
temp remove_empty(which = c("rows", "cols")) %>%
clean_names() %>%
rename(Week = x1) %>%
filter(!is.na(from_0_to_4_years_old)) %>%
mutate(Deaths = rowSums(.[-1])) %>%
select(Week, Deaths)
<- weekToMonth(rev(temp$Deaths), datStart = "31-12-2018") %>%
es_2021 as_tibble() %>%
separate(yearMonth, c("Year", "Month"), convert = TRUE) %>%
rename(Deaths = value) %>%
mutate(Date = ymd(paste0(Year, "-", Month, "-01")),
Deaths = as.integer(Deaths)) %>%
relocate(Year, Month, Date) %>%
filter(Year == 2021) %>%
filter(Month <= 6)
Monthly datasets combined
Yearly 2019-2020 deaths with age structure
INE data to supplement missing 2019-20 years in HMD.
2019
data available in Vital Statistics: Deaths here and 2020
data available inVital Statistics. Provisional data: Year 2020 here.
<- bind_rows(
es_deaths_age_2019_2020
read_delim("data-raw/INE/Spain2019age.csv",
delim = ";",
locale = locale(grouping_mark = "."),
trim_ws = TRUE) %>%
mutate(Year = as.integer(2019)) %>%
mutate(Age = word(Edad, 1),
Age = str_replace(Age, fixed("Menores"), "0"),
Age = as.integer(Age)
%>%
) select(Year, Age, Total) %>%
mutate(Deaths = as.integer(round(Total))) %>%
select(-Total),
fn_get_death_data_spain("data-raw/INE/Spain_2020.xlsx") %>%
gather(., Month, Deaths, January:December, factor_key = TRUE) %>%
group_by(Age) %>%
::summarize(Deaths = sum(Deaths, na.rm = TRUE)) %>%
dplyrmutate(Year = as.integer(2020),
Deaths = as.integer(Deaths)) %>%
select(Year, Age, Deaths)
%>%
) mutate(Country = "Spain") %>% relocate(Country)
Yearly 2021 deaths with age structure
INE data to supplement missing 6 months of 2021
in HMD available from Weekly death estimates (EDeS) during the covid-19 outbreak here.
Note: data are in weekly format and converted with methods (and caveats!) described above in STMF chapter.
<- read_xlsx("data-raw/INE/Spain_2021_death_up.xlsx", skip = 9) %>%
temp remove_empty(which = c("rows", "cols")) %>%
clean_names() %>%
rename(Week = x1) %>%
filter(!is.na(from_0_to_4_years_old))
<- bind_rows(
es_deaths_age_2021
weekToMonth(rev(temp$from_0_to_4_years_old), datStart = "31-12-2018") %>%
mutate(Age = 0) %>% as_tibble(),
weekToMonth(rev(temp$from_5_to_9_years), datStart = "31-12-2018") %>%
mutate(Age = 5) %>% as_tibble(),
weekToMonth(rev(temp$from_10_to_14_years), datStart = "31-12-2018") %>%
mutate(Age = 10) %>% as_tibble(),
weekToMonth(rev(temp$from_15_to_19_years), datStart = "31-12-2018") %>%
mutate(Age = 15) %>% as_tibble(),
weekToMonth(rev(temp$from_20_to_24_years), datStart = "31-12-2018") %>%
mutate(Age = 20) %>% as_tibble(),
weekToMonth(rev(temp$from_25_to_29_years), datStart = "31-12-2018") %>%
mutate(Age = 25) %>% as_tibble(),
weekToMonth(rev(temp$from_30_to_34_years_old), datStart = "31-12-2018") %>%
mutate(Age = 30) %>% as_tibble(),
weekToMonth(rev(temp$from_35_to_39_years_old), datStart = "31-12-2018") %>%
mutate(Age = 35) %>% as_tibble(),
weekToMonth(rev(temp$from_40_to_44_years), datStart = "31-12-2018") %>%
mutate(Age = 40) %>% as_tibble(),
weekToMonth(rev(temp$from_45_to_49_years_old), datStart = "31-12-2018") %>%
mutate(Age = 45) %>% as_tibble(),
weekToMonth(rev(temp$from_50_to_54_years_old), datStart = "31-12-2018") %>%
mutate(Age = 50) %>% as_tibble(),
weekToMonth(rev(temp$from_55_to_59_years_old), datStart = "31-12-2018") %>%
mutate(Age = 55) %>% as_tibble(),
weekToMonth(rev(temp$from_60_to_64_years_old), datStart = "31-12-2018") %>%
mutate(Age = 60) %>% as_tibble(),
weekToMonth(rev(temp$from_65_to_69_years_old), datStart = "31-12-2018") %>%
mutate(Age = 65) %>% as_tibble(),
weekToMonth(rev(temp$from_70_to_74_years), datStart = "31-12-2018") %>%
mutate(Age = 70) %>% as_tibble(),
weekToMonth(rev(temp$from_75_to_79_years), datStart = "31-12-2018") %>%
mutate(Age = 75) %>% as_tibble(),
weekToMonth(rev(temp$from_80_to_84_years), datStart = "31-12-2018") %>%
mutate(Age = 80) %>% as_tibble(),
weekToMonth(rev(temp$from_85_to_89_years), datStart = "31-12-2018") %>%
mutate(Age = 85) %>% as_tibble(),
weekToMonth(rev(temp$x90_years_old_and_over), datStart = "31-12-2018") %>%
mutate(Age = 90) %>% as_tibble()
%>%
) as_tibble() %>%
mutate(Country = "Spain") %>%
separate(yearMonth, c("Year", "Month"), convert = TRUE) %>%
rename(Deaths = value) %>%
mutate(Deaths = as.integer(Deaths),
Age = as.integer(Age)) %>%
relocate(Country, Year, Month, Age) %>%
filter(Year == 2021) %>%
filter(Month <= 6) %>%
group_by(Country, Year, Age) %>%
summarise(Deaths = sum(Deaths)) %>%
ungroup()
rm(temp)
Sweden
Monthly deaths untill 2020
<- read_csv("data-raw/SCB/000000NF_20210315-164930.csv",
se_deaths_month skip = 2) %>%
remove_empty(which = c("rows", "cols")) %>%
clean_names() %>%
rename(Month_txt = month) %>%
filter(Month_txt != "Unknown") %>%
select(-sex) %>%
pivot_longer(!Month_txt, names_to = "Year", values_to = "Deaths") %>%
group_by(Year, Month_txt) %>%
summarise(Deaths = sum(Deaths)) %>%
ungroup() %>%
mutate(Year = as.integer(str_replace(Year, fixed("x"), "")),
Month = match(Month_txt, month.name),
Deaths = as.integer(round(Deaths))) %>%
select(-Month_txt) %>%
mutate(Date = ymd(paste0(Year, "-", Month, "-01"))) %>%
relocate(Year, Month, Date) %>%
arrange(Year, Month)
Monthly 2021 deaths
Preliminär statistik över döda (Excel) file is available from SCB. Data are available till October.
# curl command doesn't work with this URL
# please download manually using link above!
download.file(url = "https://www.scb.se/hitta-statistik/statistik-efter-amne/befolkning/befolkningens-sammansattning/befolkningsstatistik/pong/tabell-och-diagram/preliminar-statistik-over-doda/",
destfile = "data-raw/SCB/2021-08-23-preliminar_statistik_over_doda_inkl_eng.xlsx",
method = "curl")
<- read_excel("data-raw/SCB/2021-11-01-preliminar_statistik_over_doda_inkl_eng.xlsx", sheet = "Tabell 10", skip = 10) %>%
se_deaths_month_2021 filter(Region == "Hela riket" & Period == "2021 antal") %>%
select(Januari:Oktober) %>%
pivot_longer(Januari:Oktober) %>%
mutate(Year = as.integer(2021),
Month = 1:10,
Date = ymd(paste0(Year, "-", Month, "-01")),
Deaths = as.integer(value)
%>%
) select(-name, -value) %>%
filter(Month <= 6)
%<>%
se_deaths_month bind_rows(se_deaths_month_2021) %>%
mutate(Country = "Sweden") %>% relocate(Country)
Monthly deaths combined
Projected 2020-21 population numbers
Data used for projection come from HMD.
<- hmd_pop_age_sex %>%
hmd_pop_age_agg mutate(Age_cat = cut(Age, c(seq(0, 90, 10), 120),
include.lowest = TRUE, right = FALSE)) %>%
group_by(Country, Year, Age_cat) %>%
summarise(Population = sum(Population)) %>%
ungroup()
As explained above, Spain has less data:
%>%
hmd_pop_age_agg # filter(Year >= 2010 & Year <= 2019) %>%
filter(Year >= 2015) %>%
ggplot(
aes(x = Year, y = Population, fill = factor(Year))) +
geom_col() +
scale_y_continuous() +
theme_bw() +
scale_fill_viridis_d(option = "E", direction = -1) +
xlab("Age group") + ylab("Population difference between sources") +
facet_grid(vars(Country), vars(Age_cat), scales = "free_y") +
geom_vline(xintercept = 2019.5, color = "red")
For each year, age, country stratum, linear model is fit using the 2015-2019
data; then prediction is made for 2020
& 2021
years. The gist of this operation is to exclude the effect of pandemic from the population counts in years affected.
# p_load(mgcv)
# p_load(splines)
# knots <- quantile(seq.int(2010, 2019), p = c(0.25, 0.5, 0.75))
<- hmd_pop_age_agg %>%
pop_age_2020 filter(Year >= 2015 & Year <= 2019) %>%
group_by(Country, Age_cat) %>%
# do(broom::tidy(lm(population ~ year, data = .))) %>%
# do(broom::augment(lm(population ~ year, data = .))) %>%
# lm
do(lm(Population ~ Year, data = .) %>%
# polynomial
# do(lm(Population ~ poly(Year, 2, raw = TRUE), data = .) %>%
# spline
# do(lm(Population ~ bs(Year, knots = knots), data = .) %>%
# GAM
# do(gam(Population ~ s(Year), data = .) %>%
predict(., newdata = tibble(Year = c(2020, 2021))) %>%
tibble(Year = c(2020, 2021),
Population = .)
%>%
) ungroup() %>%
mutate(Population = as.integer(round(Population)),
Year = as.integer(round(Year))) %>%
arrange(Country, Year, Age_cat) %>%
relocate(Country, Year, Age_cat, Population)
For comparison against 2020
& 2021
we use data from HMD (for SWE & CHE) and INE (for ESP). For the latter we use Main series since 1971
from here. # HMD docs say that pop Spain 2013-2019 are from: “1 Jan” so using data from this time point for consistency.
<-
es_pop_age_new bind_rows(
read_excel("data-raw/INE/Spain_Population_age-2019.xlsx",
sheet=1,range="A8:H212") %>%
slice(., -(1:2)) %>%
select(1,8) %>%
rename(Population=`Both sexes...8`) %>%
filter(!is.na(Population)) %>%
mutate(Age=0:(nrow(.)-1)) %>%
select(Age, Population) %>%
mutate(Year = as.integer(2020)),
read_excel("data-raw/INE/Spain_Population_age-2019.xlsx",
sheet=1,range="A8:B212") %>%
slice(., -(1:2)) %>%
rename(Population=`Both sexes`) %>%
filter(!is.na(Population)) %>%
mutate(Age=0:(nrow(.)-1)) %>%
select(Age, Population) %>%
mutate(Year = as.integer(2021))
%>%
) mutate(Age_cat = cut(Age, c(seq(0, 90, 10), 120),
include.lowest = TRUE, right = FALSE
%>%
)) group_by(Year, Age_cat) %>%
summarise(Population = sum(Population)) %>%
ungroup() %>%
mutate(Population = as.integer(Population),
Country = "Spain") %>%
relocate(Country, Year)
Absolute difference across countries and age groups in 2020/21
:
Relative differences in 2020/21
:
Fit & prediction 2020/21
:
<- function(country) {
fn_age_plot %>%
data filter(Country == country) %>%
ggplot(data = .,
aes(x = Year, y = Population / 1000000,
color = factor(Source))) +
geom_point() +
geom_smooth(data = . %>%
filter(Source == "HMD" & Year < 2020),
colour = "grey40", size = 0.5,
method = "lm",
se = FALSE) +
# geom_smooth(data = . %>%
# filter(Source == "HMD" & Year < 2020),
# method = "lm",
# # formula = y ~ poly(x, 2, raw = TRUE),
# formula = y ~ splines::bs(x, df = 3),
# # method = "gam",
# # formula = y ~ s(x),
# se = FALSE) +
scale_y_continuous(labels = comma) +
theme_bw() +
scale_colour_discrete(hue_pal()(2), name = "Source") +
xlab("Year") + ylab("Population [millions]") +
facet_wrap(vars(Age_cat), scales = "free_y")
}
Spain
Sweden
Switzerland
Combined analysis datasets
Population - predicted figures
Merging HMD until 2019 and predictions for 2020 & 2021.
<- bind_rows(
pop_age_sex_exp
%>%
hmd_pop_age_agg filter(Year < 2020),
pop_age_2020 %>%
) arrange(Country, Year, Age_cat) %>%
rename(Population_exp = Population)
<- pop_age_sex_exp %>%
pop_yearly_exp group_by(Country, Year) %>%
summarise(Population_exp = sum(Population_exp)) %>%
ungroup()
Population - observed figures
Merging HMD until 2021 for CHE & SWE with INE data for ESP for 2020 & 2021.
<- bind_rows(
pop_age_sex_obs
hmd_pop_age_agg,
es_pop_age_new %>%
) arrange(Country, Year, Age_cat) %>%
rename(Population_obs = Population)
<- pop_age_sex_obs %>%
pop_yearly_obs group_by(Country, Year) %>%
summarise(Population_obs = sum(Population_obs)) %>%
ungroup()
Monthly deaths with yearly pops
<- bind_rows(
deaths_monthly
### ESP - starting at 1908; see issue #81
%>%
es_deaths_month filter(Year >= 1908),
# NAs for second half of year
%>%
es_deaths_month filter(Year == 2020 & Month > 6) %>%
mutate(Year = as.integer(2021),
Deaths = NA_integer_),
### SWE
se_deaths_month,
%>%
se_deaths_month filter(Year == 2020 & Month > 6) %>%
mutate(Year = as.integer(2021),
Deaths = NA_integer_,
Date = Date + years(1)),
ch_deaths_month,
%>%
ch_deaths_month filter(Year == 2020 & Month > 6) %>%
mutate(Year = as.integer(2021),
Deaths = NA_integer_,
Date = Date + years(1))
%>%
) left_join(pop_yearly_obs) %>%
left_join(pop_yearly_exp) %>%
arrange(Country, Year, Month) %>%
mutate(si_one = sin(2*pi*Month/12),
si_two = sin(4*pi*Month/12),
co_one = cos(2*pi*Month/12),
co_two = cos(4*pi*Month/12))
write_rds(deaths_monthly, "data/deaths_monthly.Rds")
Name | deaths_monthly |
Number of rows | 5160 |
Number of columns | 11 |
_______________________ | |
Column type frequency: | |
character | 1 |
Date | 1 |
numeric | 9 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
Country | 0 | 1 | 5 | 11 | 0 | 3 | 0 |
Variable type: Date
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
Date | 0 | 1 | 1851-01-01 | 2021-12-01 | 1950-04-16 | 2052 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Year | 0 | 1 | 1947.94 | 44.46 | 1851.00 | 1914.00 | 1950.0 | 1986.00 | 2021.00 | <U+2583><U+2586><U+2587><U+2587><U+2587> |
Month | 0 | 1 | 6.50 | 3.45 | 1.00 | 3.75 | 6.5 | 9.25 | 12.00 | <U+2587><U+2585><U+2585><U+2585><U+2587> |
Deaths | 18 | 1 | 12381.92 | 11683.42 | 3138.00 | 5073.00 | 6548.5 | 19991.25 | 163422.00 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
Population_obs | 0 | 1 | 12962850.43 | 12692201.35 | 2766150.00 | 4645244.00 | 6915352.0 | 20598108.00 | 47394223.00 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
Population_exp | 0 | 1 | 12961746.35 | 12687827.68 | 2766150.00 | 4645244.00 | 6915352.0 | 20598108.00 | 47085087.00 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
si_one | 0 | 1 | 0.00 | 0.71 | -1.00 | -0.59 | 0.0 | 0.59 | 1.00 | <U+2587><U+2585><U+2585><U+2585><U+2587> |
si_two | 0 | 1 | 0.00 | 0.71 | -0.87 | -0.87 | 0.0 | 0.87 | 0.87 | <U+2587><U+2581><U+2587><U+2581><U+2587> |
co_one | 0 | 1 | 0.00 | 0.71 | -1.00 | -0.59 | 0.0 | 0.59 | 1.00 | <U+2587><U+2585><U+2585><U+2585><U+2587> |
co_two | 0 | 1 | 0.00 | 0.71 | -1.00 | -0.50 | 0.0 | 0.50 | 1.00 | <U+2583><U+2587><U+2581><U+2587><U+2583> |
Yearly deaths by sex and age group with pops
Spain age&sex distribution in HMD is available from later!
<- bind_rows(
deaths_yearly_age_sex
hmd_deaths_age_sex,
# SWE
# 2020 is already in HMD for SWE
# 2021 is sadly missing for SWE
#ESP
es_deaths_age_2019_2020,
es_deaths_age_2021,
# CHE
# 2020 is already in HMD for CHE
ch_deaths_age_sex_2021
%>%
) mutate(
# Age_cat = cut(Age,
# breaks = c(0, 1, 5, 20, 40, 60, 80, 112),
# # labels = c("<1", "1-4", "5-19", "20-39", "40-59", "60-79","80+"),
# right = FALSE),
Age_cat = cut(Age,
breaks = c(seq(0, 90, 10), 120),
include.lowest = TRUE, right = FALSE)
%>%
) group_by(Country, Year, Age_cat) %>%
summarise(Deaths = sum(Deaths)) %>%
ungroup() %>%
left_join(pop_age_sex_obs) %>%
left_join(pop_age_sex_exp)
# labels=c("0-9","10-19","20-29","30-39","40-49", "50-59","60-69","70-79","80-89","90+")
write_rds(deaths_yearly_age_sex, "data/deaths_yearly_age_sex.Rds")
Name | deaths_yearly_age_sex |
Number of rows | 5300 |
Number of columns | 6 |
_______________________ | |
Column type frequency: | |
character | 1 |
factor | 1 |
numeric | 4 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
Country | 0 | 1 | 5 | 11 | 0 | 3 | 0 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Age_cat | 0 | 1 | FALSE | 10 | [0,: 530, [10: 530, [20: 530, [30: 530 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Year | 0 | 1 | 1919.85 | 71.23 | 1751 | 1879.00 | 1933 | 1977.0 | 2021 | <U+2583><U+2582><U+2585><U+2587><U+2587> |
Deaths | 0 | 1 | 13170.75 | 22287.96 | 65 | 2694.75 | 5193 | 13933.5 | 250259 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
Population_obs | 0 | 1 | 1095818.15 | 1532831.43 | 378 | 211811.00 | 575000 | 1067678.0 | 8097288 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
Population_exp | 0 | 1 | 1095709.08 | 1532476.50 | 378 | 211811.00 | 575000 | 1067678.0 | 8097288 | <U+2587><U+2581><U+2581><U+2581><U+2581> |
Computing Environment
R version 4.1.2 (2021-11-01) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] skimr_2.1.3 lemon_0.4.5 kableExtra_1.3.4 wktmo_1.0.5 [5] aweek_1.0.2 lubridate_1.8.0 readxl_1.3.1 scales_1.1.1 [9] janitor_2.1.0 magrittr_2.0.1 forcats_0.5.1 stringr_1.4.0 [13] dplyr_1.0.7 purrr_0.3.4 readr_2.1.1 tidyr_1.1.4 [17] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1 pacman_0.5.1To cite R in publications use:
R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.
To cite the ggplot2 package in publications use:Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.