2020 excess mortality & voting patterns in CH
Ancillary data preparation
Population
Data extracted from px-x-0103010000_201
:
Ständige und nichtständige Wohnbevölkerung nach institutionellen Gliederungen, Staatsangehörigkeit (Kategorie), Geburtsort, Geschlecht und Altersklasse
Important info from footnotes:
Letzte Änderungen: Neuer Datensatz (Jahr 2020) Stand der Datenbank: Juni 2021 Stichtag: 31. Dezember Raumbezug: Gemeinden / 18.10.2020 Datenquelle: Statistik der Bevölkerung und der Haushalte STATPOP Definition der ständigen Wohnbevölkerung
Community codes
Community mutations data after 2021-01-01
from BfS are
integrated into the data and pops are recalculated using these new codes
to match mortality.
<- read_rds("data/BfS/histcomm.Rds") %>%
histcomm filter(datum_der_aufnahme >= ymd("2021-01-01")) %>%
select(bfs_gde_num_old, bfs_gde_num_new, gemeindename_new)
Further corrections of municipalities mutations are needed to bring
the data to the state of 2022-01-01
.
Data preps
Data aggregated to age groups as in deaths dataset.
<- read_xlsx("data-raw/BfS/px-x-0103010000_201.xlsx",
pop col_types = c("numeric",
"skip", "numeric", "text", "skip",
"skip", "skip", "text", "skip", "skip",
"skip", "text", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric", "numeric", "numeric",
"numeric"),
skip = 1) %>%
remove_empty(c("rows", "cols")) %>% clean_names() %>%
rename(year = x1,
GMDNR = x2,
GMDNAME = x3) %>%
mutate(sex = if_else(x5 == "Frau", "Female", "Male"),
nationality = if_else(x4 == "Schweiz", "Swiss", "Foreigner"),
year = as.integer(year)) %>%
select(-x4, -x5) %>%
relocate(sex, .after = GMDNAME) %>%
relocate(nationality, .after = sex) %>%
mutate(GMDNAME = word(GMDNAME, 2, -1)) %>%
fill(year, GMDNR, GMDNAME, nationality) %>%
mutate(`<40` = x0_4_jahre + x5_9_jahre + x10_14_jahre + x15_19_jahre +
+ x25_29_jahre + x30_34_jahre + x35_39_jahre) %>%
x20_24_jahre select(-x0_4_jahre, -x5_9_jahre, -x10_14_jahre, -x15_19_jahre,
-x20_24_jahre, -x25_29_jahre, -x30_34_jahre, -x35_39_jahre) %>%
mutate(`40-49` = x40_44_jahre + x45_49_jahre) %>%
mutate(`50-59` = x50_54_jahre + x55_59_jahre) %>%
mutate(`60-69` = x60_64_jahre + x65_69_jahre) %>%
mutate(`70-79` = x70_74_jahre + x75_79_jahre) %>%
select(-x40_44_jahre, -x45_49_jahre, -x50_54_jahre, -x55_59_jahre,
-x60_64_jahre, -x65_69_jahre, -x70_74_jahre, -x75_79_jahre) %>%
mutate(`80+` = x80_84_jahre +x85_89_jahre + x90_94_jahre +
+ x100_jahre_und_mehr) %>%
x95_99_jahre select(-x80_84_jahre, -x85_89_jahre, -x90_94_jahre,
-x95_99_jahre, -x100_jahre_und_mehr) %>%
pivot_longer(cols = `<40`:`80+`,
names_to = "age", values_to = "pop") %>%
# BfS identified changes
left_join(histcomm, by = c("GMDNR" = "bfs_gde_num_old")) %>%
mutate(GMDNR = if_else(!is.na(bfs_gde_num_new), bfs_gde_num_new, GMDNR),
GMDNAME = if_else(!is.na(gemeindename_new), gemeindename_new, GMDNAME)) %>%
select(-bfs_gde_num_new, -gemeindename_new) %>%
# Essertes merge
mutate(GMDNR = if_else(GMDNAME == "Essertes", 5805, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Essertes", "Oron", GMDNAME)) %>%
# now to reach the state of `2022-01-01`
mutate(GMDNR = if_else(GMDNAME == "Bad Zurzach", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Bad Zurzach", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Baldingen", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Baldingen", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Böbikon", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Böbikon", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Kaiserstuhl", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Kaiserstuhl", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Rekingen (AG)", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rekingen (AG)", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Rietheim", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rietheim", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Rümikon", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rümikon", "Zurzach", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Wislikofen", 4324, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Wislikofen", "Zurzach", GMDNAME)) %>%
# Böztal merge
mutate(GMDNR = if_else(GMDNAME == "Bözen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Bözen", "Böztal", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Elfingen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Elfingen", "Böztal", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Effingen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Effingen", "Böztal", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Hornussen", 4185, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Hornussen", "Böztal", GMDNAME)) %>%
# Blonay - Saint-Légier merge
mutate(GMDNR = if_else(GMDNAME == "Blonay", 5892, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Blonay", "Blonay - Saint-Légier", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Saint-Légier-La Chiésaz", 5892, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Saint-Légier-La Chiésaz", "Blonay - Saint-Légier", GMDNAME)) %>%
# Murten merge
mutate(GMDNR = if_else(GMDNAME == "Galmiz", 2275, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Galmiz", "Murten", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Gempenach", 2275, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Gempenach", "Murten", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Clavaleyres", 2275, GMDNR)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Clavaleyres", "Murten", GMDNAME)) %>%
# Schwende-Rüte merge
mutate(GMDNAME = if_else(GMDNAME == "Schwende", "Schwende-Rüte", GMDNAME)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rüte", "Schwende-Rüte", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Schwende-Rüte", 3112, GMDNR)) %>%
# Val Mara merge
mutate(GMDNAME = if_else(GMDNAME == "Melano", "Val Mara", GMDNAME)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Maroggia", "Val Mara", GMDNAME)) %>%
mutate(GMDNAME = if_else(GMDNAME == "Rovio", "Val Mara", GMDNAME)) %>%
mutate(GMDNR = if_else(GMDNAME == "Val Mara", 5240, GMDNR)) %>%
group_by(year, GMDNR, GMDNAME, sex, age) %>%
summarise(pop = as.integer(sum(pop))) %>%
ungroup() %>%
arrange(year, GMDNR, sex, age)
Yearly totals (on 31st Dec!):
# A tibble: 7 × 2
year pop
<int> <chr>
1 2014 8,237,666
2 2015 8,327,126
3 2016 8,419,550
4 2017 8,484,130
5 2018 8,544,527
6 2019 8,606,033
7 2020 8,670,300
Age (40 plus!) distributions over years
Predicting 2020 population
For each age, sex, nationality stratum, for each municipality separately, Poisson model was fitted using the 2014-2019 data; then prediction was made for 2020 & 2021 years. The gist of this operation was to exclude the effect of pandemic from the pop counts in years affected.
<- pop %>%
pop_ext_poi filter(year != 2020) %>%
# filter(GMDNR == 261 | GMDNR == 389) %>%
group_by(GMDNR, age, sex) %>%
do(glm(pop ~ year, data = ., family = "poisson") %>%
predict(., newdata = tibble(year = as.integer(2020)), type = "response") %>%
tibble(year = as.integer(2020), pop_ext_poi = .)) %>%
ungroup() %>%
mutate(pop_ext_poi = as.integer(round(pop_ext_poi))) %>%
arrange(year, GMDNR, sex, age) %>%
relocate(year)
Summary of values:
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 42.0 109.0 338.5 283.0 117341.0 2
Note: 2 NAs!
# A tibble: 7 × 6
year GMDNR GMDNAME sex age pop
<int> <dbl> <chr> <chr> <chr> <int>
1 2014 4232 Geltwil Male 80+ 0
2 2015 4232 Geltwil Male 80+ 0
3 2016 4232 Geltwil Male 80+ 0
4 2017 4232 Geltwil Male 80+ 0
5 2018 4232 Geltwil Male 80+ 0
6 2019 4232 Geltwil Male 80+ 1
7 2020 4232 Geltwil Male 80+ 3
# A tibble: 1 × 5
year GMDNR age sex pop_ext_poi
<int> <dbl> <chr> <chr> <int>
1 2020 4232 80+ Male NA
# A tibble: 7 × 6
year GMDNR GMDNAME sex age pop
<int> <dbl> <chr> <chr> <chr> <int>
1 2014 389 Meienried Male 80+ 0
2 2015 389 Meienried Male 80+ 0
3 2016 389 Meienried Male 80+ 0
4 2017 389 Meienried Male 80+ 0
5 2018 389 Meienried Male 80+ 0
6 2019 389 Meienried Male 80+ 1
7 2020 389 Meienried Male 80+ 1
# A tibble: 1 × 5
year GMDNR age sex pop_ext_poi
<int> <dbl> <chr> <chr> <int>
1 2020 389 80+ Male NA
Results in large municipality
Results for large municipality are fine. Predictions are in red.
For small municipality we might end up with sth more wiggly.
Note: lack of prediction for 2020 & 2021 for oldest males!
There are two municipalities with missing predictions for oldest males:
# A tibble: 2 × 4
GMDNR age sex GMDNAME
<dbl> <chr> <chr> <chr>
1 389 80+ Male Meienried
2 4232 80+ Male Geltwil
Population in this strata was replaced by estimates of a simple linear model.
<- pop %>%
pop_ext_lm filter(year != 2020) %>%
group_by(GMDNR, age, sex) %>%
do(lm(pop ~ year, data = .) %>%
predict(., newdata = tibble(year = as.integer(2020))) %>%
tibble(year = as.integer(2020), pop_ext_lm = .)) %>%
ungroup() %>%
mutate(pop_ext_lm = as.integer(round(pop_ext_lm))) %>%
arrange(year, GMDNR, sex, age) %>%
relocate(year)
<- pop %>%
pop_complete
# extrapolated 2020
left_join(pop_ext_poi) %>%
mutate(pop_ext_poi = if_else(year < 2020,
%>%
pop, pop_ext_poi))
# replacing 2 poi missings with lm
left_join(pop_ext_lm) %>%
mutate(pop_ext_poi = if_else(year == 2020 & is.na(pop_ext_poi),
%>%
pop_ext_lm, pop_ext_poi)) select(-pop_ext_lm)
Differences in 2020 pops
Comparing estimates to real values
<- pop %>%
pop_compare
filter(year == 2020) %>%
select(-year) %>%
# poi model with 2 missings
left_join(pop_ext_poi) %>%
# lm model
left_join(pop_ext_lm) %>%
mutate(pop_ext_poi = if_else(is.na(pop_ext_poi),
%>%
pop_ext_lm, pop_ext_poi)) select(-pop_ext_lm) %>%
# diffs
mutate(pop_diff = pop_ext_poi - pop,
perc_diff = (pop_diff / pop_ext_poi) * 100,
alt = pop * 100) %>%
mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>%
select(-alt)
Differences
frq(pop_compare, pop_ext_poi > pop)
pop_ext_poi > pop <lgl>
# total N=25740 valid N=25740 mean=0.50 sd=0.50
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 12931 | 50.24 | 50.24 | 50.24
TRUE | 12809 | 49.76 | 49.76 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
frq(pop_compare, pop_ext_poi < pop)
pop_ext_poi < pop <lgl>
# total N=25740 valid N=25740 mean=0.43 sd=0.50
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 14670 | 56.99 | 56.99 | 56.99
TRUE | 11070 | 43.01 | 43.01 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
frq(pop_compare, pop_ext_poi == pop)
pop_ext_poi == pop <lgl>
# total N=25740 valid N=25740 mean=0.07 sd=0.26
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 23879 | 92.77 | 92.77 | 92.77
TRUE | 1861 | 7.23 | 7.23 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
summary(pop_compare$pop_diff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-391.000 -4.000 0.000 1.649 5.000 2594.000
summary(pop_compare$perc_diff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-400.0000 -3.9062 0.0000 -0.1833 4.7059 300.0000
Age group specific diffs
Municipality specific diffs
<- pop_compare %>%
pop_compare_agg filter(age != "<40") %>%
group_by(GMDNR) %>%
summarise(pop = sum(pop),
pop_ext_poi = sum(pop_ext_poi)) %>%
ungroup() %>%
mutate(pop_diff = pop_ext_poi - pop,
perc_diff = (pop_diff / pop_ext_poi) * 100,
alt = pop * 100) %>%
mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>%
select(-alt)
<- read_rds("data/BfS/gg.Rds") %>%
gg_centr select(GMDNR, GMDNAME) %>%
st_centroid() %>%
left_join(pop_compare_agg)
summary(pop_compare_agg$pop_diff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-214.000 -5.000 5.000 7.896 16.000 548.000
summary(pop_compare_agg$perc_diff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-15.5556 -0.4819 0.5505 0.7524 1.8628 31.4286
Getting mid-year pops
For each age, sex, nationality stratum, for each municipality separately, simple mean of the two years of data is used to estimate the middle point. In such case we use, for instance data from 31st Dec 2014 and 31st Dec 2015 to estimate pop mid-2015.
for (i in 2014:2019){
<- pop_complete %>%
result filter(year >= i & year <= i + 1) %>%
# filter(GMDNR == 261 | GMDNR == 389) %>%
group_by(GMDNR, age, sex) %>%
summarise(pop_mid_poi = mean(pop_ext_poi)) %>%
ungroup() %>%
mutate(year = as.integer(i + 1),
pop_mid_poi = as.integer(round(pop_mid_poi))) %>%
arrange(GMDNR, sex, age) %>%
relocate(year)
if (i == 2014) {
<- result
pop_mid_poi else {
} <- bind_rows(pop_mid_poi, result)
pop_mid_poi
}
}
Large municipality
Again, results for large municipality are fine:
Small municipality
Again, for small municipality we might end up with sth more wiggly.
Deaths <-> pop link
Prepared pop file is merged to deaths dataset prepared in
02.Rmd
.
We use file based on place of residence.
<- read_rds("data/BfS-closed/monthly_deaths/w_deaths_2015_2020_year.Rds") %>%
w_deaths_2015_2020_year_pop left_join(pop_mid_poi)
Deaths > pop problem
frq(w_deaths_2015_2020_year_pop, observed > pop_mid_poi)
observed > pop_mid_poi <lgl>
# total N=154440 valid N=154440 mean=0.00 sd=0.01
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
FALSE | 154420 | 99.99 | 99.99 | 99.99
TRUE | 20 | 0.01 | 0.01 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
<- w_deaths_2015_2020_year_pop %>%
surplus filter(observed > pop_mid_poi) %>%
arrange(GMDNR, year) %>%
mutate(difference = pop_mid_poi - observed)
There are 20 strata from 19 communities where count of deaths is larger than count of pop. The difference is always 1 in plus over pop 0.
In all these cases pop was increased by 1 to match the count of deaths!
%<>%
w_deaths_2015_2020_year_pop mutate(pop_mid_poi = if_else(observed > pop_mid_poi, observed, pop_mid_poi))
Cartogram
Using 2019 pop counts (ignoring age structure) to derive cartograms to avoid effects of pandemic.
Communities & pops
R solution 1
Using cartogram
package.
# takes a while!
<- carto_data %>%
carto ::cartogram_cont("pop")
cartogram
write_rds(carto, "data/carto/carto.Rds")
<- carto_data %>%
carto_ncont ::cartogram_ncont("pop")
cartogram
write_rds(carto_ncont, "data/carto/carto_ncont.Rds")
<- carto_data %>%
carto_dorling ::cartogram_dorling("pop")
cartogram
write_rds(carto_dorling, "data/carto/carto_dorling.Rds")
<- carto_data %>%
carto_gsm ::cartogramR(count = "pop", method = "gsm")
cartogramR
write_rds(carto_gsm, "data/carto/carto_gsm.Rds")
ScapeToad version
%>%
carto_data st_write("data/carto/in_gg.shp", delete_dsn = TRUE)
%>%
se select(GMDNR, GMDNAME) %>%
st_write("data/carto/in_se.shp", delete_dsn = TRUE)
# java -Xmx1g -jar "C:\Program Files\ScapeToad-v11\ScapeToad.jar"
‘Classic’ cartogram
Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering
Noncontinuous cartogram
Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering
Dorling carotgram
Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering
Swiss-SEP
<- read_rds("../SNC_Swiss-SEP2/FINAL/RDS/ssep3_user_geo.Rds") %>%
sep3 select(gisid, ssep3, ssep3_q) %>%
st_transform(crs = 2056)
Using 1,527,173 n’hoods from version 3.0:
ssep3_q <integer>
# total N=1527173 valid N=1527173 mean=3.00 sd=1.41
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
1 | 305439 | 20 | 20 | 20
2 | 305434 | 20 | 20 | 40
3 | 305432 | 20 | 20 | 60
4 | 305439 | 20 | 20 | 80
5 | 305429 | 20 | 20 | 100
<NA> | 0 | 0 | <NA> | <NA>
This is aggregated to the level of 2,145 communities from swisstopo.
STATPOP
<- read_delim("data-raw/BfS-closed/STATPOP/statpop2019_220098p.zip",
statpop2019 delim = ";", escape_double = FALSE,
col_types = cols(STATYEAR = col_integer(),
SEX = col_integer(),
TYPEOFRESIDENCE = col_integer(),
POPULATIONTYPE = col_integer(),
AGE = col_integer(),
CLASSAGEFIVEYEARS = col_integer(),
NATIONALITYCATEGORY = col_integer(),
MAINRESIDENCECATEGORY = col_integer(),
GEOCOORDE = col_integer(),
GEOCOORDN = col_integer(),
INDIC_EGID = col_integer(),
statdate = col_date(format = "%d/%m/%Y")),
trim_ws = TRUE) %>%
remove_empty(c("rows", "cols")) %>% clean_names()
<- statpop2019 %>%
statpop2019_agg rename(egid = federalbuildingid) %>%
filter(typeofresidence == 1) %>%
filter(populationtype == 1) %>%
filter(mainresidencecategory == 1) %>%
filter(indic_egid == 1) %>%
select(-statyear, -statdate,
-typeofresidence, -populationtype, -mainresidencecategory,
-indic_egid) %>%
group_by(egid, geocoorde, geocoordn) %>%
summarise(pop = n()) %>%
ungroup() %>%
st_as_sf(coords = c("geocoorde", "geocoordn"),
crs = 2056,
remove = TRUE)
Using data from STATPOP
2019.
Dataset consists of 8,812,965 individuals and 1,540,622 buildings. We selected individuals with type of residence Hauptwohnsitz, pop type Ständige Wohnbevölkerung, main residency category Nur ein Hauptwohnsitz and with valid EGID building ID.
The nearest building with SSEP was assigned for each of these building. The point dataset was then overlaid with community boundaries and summary measures of pop based index were created and pop level SEP was aggregated by community.
’nhood based averages for communities
# # or with full version?
# st_gg <- st_read("data-raw/BfS/ag-b-00.03-875-gg/ggg_2021-LV95/shp/g1g21_01072021.shp",
# as_tibble = TRUE)
<-
ssep3_gem st_join(sep3,
st_gg, join = st_intersects)
# tm_shape(gg) +
# tm_polygons() +
# tm_shape(ssep3_gem %>% filter(is.na(GMDNR))) +
# tm_dots()
<- ssep3_gem %>%
ssep3_gem_neigh st_drop_geometry() %>%
group_by(GMDNR) %>%
summarise(n = n(),
ssep3_median = median(ssep3)) %>%
ungroup()
<- inner_join(gg, ssep3_gem_neigh) %>%
ssep3_gem_neigh_geo filter(!is.na(ssep3_median)) %>%
mutate(median_ssep3_q = ntile(ssep3_median, 5)) %>%
mutate(median_ssep3_q = factor(median_ssep3_q,
levels = 1:5,
labels = c("1st - lowest",
"2nd",
"3rd quintile",
"4th",
"5th - highest")))
write_rds(ssep3_gem_neigh_geo, "data/BfS/ssep3_gem_neigh_geo.rds")
Watch out for small n! Here some examples with communities below 20 n’hoods
Final map using median index - bubbles scaled to number of ’nhoods within community.
Population based averages for communities
# join to sep3
<- st_join(statpop2019_agg, sep3, join = st_nearest_feature)
statpop2019_agg_sep
# # fails for unknown and unsolved reasons
# nearest <- st_nearest_feature(statpop2019_agg, sep3)
# statpop2019_agg_sep$dist3 <- st_distance(statpop2019_agg_sep, sep3[nearest, ], by_element = TRUE)
# rm(nearest, statpop2019_agg); gc()
# summary(statpop2019_agg_sep$dist3)
<-
statpop2019_agg_sep_gem st_join(statpop2019_agg_sep, st_gg, join = st_intersects) %>%
relocate(egid, gisid, pop,
ssep3, ssep3_q,
GMDNR)
write_rds(statpop2019_agg_sep_gem, "data/BfS-closed/SEP/statpop2019_agg_sep_gem.Rds")
<- statpop2019_agg_sep_gem %>%
ssep3_gem_pop select(GMDNR, ssep3, pop) %>%
uncount(pop) %>%
group_by(GMDNR) %>%
summarise(pop = n(),
ssep3_median = median(ssep3)) %>%
ungroup()
# left_join(gg, ssep3_gem_pop) %>%
# filter(is.na(ssep3_median))
<-
ssep3_gem_pop_geo left_join(gg, ssep3_gem_pop) %>%
mutate(median_ssep3_q = ntile(ssep3_median, 5)) %>%
mutate(median_ssep3_q = factor(median_ssep3_q,
levels = 1:5,
labels = c("1st - lowest",
"2nd",
"3rd quintile",
"4th",
"5th - highest")))
write_rds(ssep3_gem_pop_geo, "data/BfS/ssep3_gem_pop_geo.rds")
Final map using median index - bubbles scaled to number of people (STATPOP 2020)
Raumgliederungen
Data from the app.
State as of 2021-07-01
to match spatial data nicely.
<- read_xlsx("data-raw/BfS/Raumgliederungen.xlsx",
raum skip = 1) %>%
remove_empty(c("rows", "cols")) %>% clean_names() %>%
filter(! is.na(bfs_gde_nummer)) %>%
rename(GMDNR = bfs_gde_nummer,
GMDNAME = gemeindename,
KTNAME = kanton,
BZNR = bezirks_nummer,
BZNAME = bezirksname) %>%
select(GMDNR, GMDNAME, KTNAME, BZNR, BZNAME,
stadtische_landliche_gebiete, sprachgebiete,
urbanisierungsgrad_2011_degurba_eurostat,%>%
gemeindetypologie_2012_9_typen, gemeindetypologie_2012_25_typen) rename(gemtyp_9 = gemeindetypologie_2012_9_typen,
gemtyp_25 = gemeindetypologie_2012_25_typen) %>%
mutate(gemtyp_9 = as.integer(gemtyp_9),
gemtyp_25 = as.integer(gemtyp_25)) %>%
mutate(
r_urban1 = case_when(
== 1 ~ "Urban",
stadtische_landliche_gebiete == 2 ~ "Periurban",
stadtische_landliche_gebiete == 3 ~ "Rural",
stadtische_landliche_gebiete TRUE ~ ""),
r_urban2 = case_when(
== 1 ~ "Dense",
urbanisierungsgrad_2011_degurba_eurostat == 2 ~ "Medium",
urbanisierungsgrad_2011_degurba_eurostat == 3 ~ "Low",
urbanisierungsgrad_2011_degurba_eurostat TRUE ~ ""),
r_lang = case_when(
== 1 ~ "German",
sprachgebiete == 2 ~ "French",
sprachgebiete == 3 ~ "Italian",
sprachgebiete == 4 ~ "Romansh",
sprachgebiete TRUE ~ "")
%>%
) select(-stadtische_landliche_gebiete, -sprachgebiete,
-urbanisierungsgrad_2011_degurba_eurostat) %>%
left_join(read_xlsx("data-raw/BfS/Raumgliederungen.xlsx",
skip = 1, sheet = "CH1+CL_GDET9+2012.1") %>%
remove_empty(c("rows", "cols")) %>% clean_names() %>%
rename(gemtyp_9 = code,
gemtyp_9_lab = label) %>%
mutate(gemtyp_9 = as.integer(gemtyp_9))) %>%
relocate(gemtyp_9_lab, .after = gemtyp_9) %>%
left_join(read_xlsx("data-raw/BfS/Raumgliederungen.xlsx",
skip = 1, sheet = "CH1+CL_GDET25+2012.1") %>%
remove_empty(c("rows", "cols")) %>% clean_names() %>%
rename(gemtyp_25 = code,
gemtyp_25_lab = label) %>%
mutate(gemtyp_25 = as.integer(gemtyp_25))) %>%
relocate(gemtyp_25_lab, .after = gemtyp_25)
Urbanization 1
x <categorical>
# total N=2145 valid N=2145 mean=2.30 sd=0.81
Value | N | Raw % | Valid % | Cum. %
-------------------------------------------
Urban | 475 | 22.14 | 22.14 | 22.14
Periurban | 560 | 26.11 | 26.11 | 48.25
Rural | 1110 | 51.75 | 51.75 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Urbanization 2
Using DEGURBA.
x <categorical>
# total N=2145 valid N=2145 mean=2.48 sd=0.58
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
Dense | 98 | 4.57 | 4.57 | 4.57
Medium | 911 | 42.47 | 42.47 | 47.04
Low | 1136 | 52.96 | 52.96 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Language region
x <categorical>
# total N=2145 valid N=2145 mean=1.40 sd=0.59
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
German | 1406 | 65.55 | 65.55 | 65.55
French | 618 | 28.81 | 28.81 | 94.36
Italian | 121 | 5.64 | 5.64 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Typology of communities - 9
x <character>
# total N=2145 valid N=2145 mean=4.54 sd=2.47
Value | N | Raw % | Valid % | Cum. %
------------------------------------------------------------------------------------------------------
Ländliche periphere Gemeinde | 257 | 11.98 | 11.98 | 11.98
Ländliche zentral gelegene Gemeinde | 384 | 17.90 | 17.90 | 29.88
Ländliche Zentrumsgemeinde | 84 | 3.92 | 3.92 | 33.80
Periurbane Gemeinde geringer Dichte | 469 | 21.86 | 21.86 | 55.66
Periurbane Gemeinde hoher Dichte | 108 | 5.03 | 5.03 | 60.70
Periurbane Gemeinde mittlerer Dichte | 368 | 17.16 | 17.16 | 77.86
Städtische Gemeinde einer grossen Agglomeration | 160 | 7.46 | 7.46 | 85.31
Städtische Gemeinde einer kleinen oder ausserhalb einer Agglomeration | 122 | 5.69 | 5.69 | 91.00
Städtische Gemeinde einer mittelgrossen Agglomeration | 193 | 9.00 | 9.00 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Typology of communities - 25
x <character>
# total N=2145 valid N=2145 mean=13.18 sd=5.74
Value | N | Raw % | Valid % | Cum. %
---------------------------------------------------------------------------------------------------------------------
Dienstleistungsgemeinde eines ländlichen Zentrums | 20 | 0.93 | 0.93 | 0.93
Industriegemeinde eines ländlichen Zentrums | 43 | 2.00 | 2.00 | 2.94
Kernstadt einer grossen Agglomeration | 5 | 0.23 | 0.23 | 3.17
Kernstadt einer mittelgrossen Agglomeration | 28 | 1.31 | 1.31 | 4.48
Ländliche periphere Agrargemeinde | 65 | 3.03 | 3.03 | 7.51
Ländliche periphere Mischgemeinde | 152 | 7.09 | 7.09 | 14.59
Ländliche periphere Tourismusgemeinde | 40 | 1.86 | 1.86 | 16.46
Ländliche zentral gelegene Agrargemeinde | 119 | 5.55 | 5.55 | 22.00
Ländliche zentral gelegene Dienstleistungsgemeinde | 118 | 5.50 | 5.50 | 27.51
Ländliche zentral gelegene Industriegemeinde | 147 | 6.85 | 6.85 | 34.36
Periurbane Agrargemeinde geringer Dichte | 165 | 7.69 | 7.69 | 42.05
Periurbane Dienstleistungsgemeinde geringer Dichte | 178 | 8.30 | 8.30 | 50.35
Periurbane Dienstleistungsgemeinde hoher Dichte | 53 | 2.47 | 2.47 | 52.82
Periurbane Dienstleistungsgemeinde mittlerer Dichte | 191 | 8.90 | 8.90 | 61.72
Periurbane Industriegemeinde geringer Dichte | 126 | 5.87 | 5.87 | 67.60
Periurbane Industriegemeinde hoher Dichte | 55 | 2.56 | 2.56 | 70.16
Periurbane Industriegemeinde mittlerer Dichte | 177 | 8.25 | 8.25 | 78.41
Städtische Arbeitsplatzgemeinde einer grossen Agglomeration | 63 | 2.94 | 2.94 | 81.35
Städtische Arbeitsplatzgemeinde einer mittelgrossen Agglomeration | 62 | 2.89 | 2.89 | 84.24
Städtische Dienstleistungsgemeinde einer kleinen oder ausserhalb einer Agglomeration | 49 | 2.28 | 2.28 | 86.53
Städtische Industriegemeinde einer kleinen oder ausserhalb einer Agglomeration | 63 | 2.94 | 2.94 | 89.46
Städtische Tourismusgemeinde einer kleinen oder ausserhalb einer Agglomeration | 10 | 0.47 | 0.47 | 89.93
Städtische Wohngemeinde einer grossen Agglomeration | 92 | 4.29 | 4.29 | 94.22
Städtische Wohngemeinde einer mittelgrossen Agglomeration | 103 | 4.80 | 4.80 | 99.02
Tourismusgemeinde eines ländlichen Zentrums | 21 | 0.98 | 0.98 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Voting
Data from swissdd
package.
Linked to municipality boundaries from 2022-01-01
.
<- read_rds("data/swisstopo/st_gg.Rds") st_gg
June vote
Bundesgesetz vom 25.09.2020 über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz)
Data
<- get_nationalvotes(votedates = "2021-06-13",
covid_jun geolevel = "municipality") %>%
filter(str_detect(name, "Covid-19")) %>%
select(-name, -id)
Dataset consists of 2,141 communities. Votes from abroad are excluded.
Results preview
Link to municipalities
Using GMDNR
/ mun_id
for deterministic
linkage.
<- st_gg %>%
covid_jun_gem left_join(covid_jun %>%
select(GMDNR, jaStimmenInProzent)) %>%
relocate(geometry, .after = last_col())
There are few communities that exist on the map but do not exist in the voting dataset:
# A tibble: 4 × 2
GMDNR GMDNAME
<dbl> <chr>
1 389 Meienried
2 408 Hellsau
3 535 Deisswil bei Münchenbuchsee
4 877 Niedermuhlern
November vote
Änderung des Bundesgesetzes über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz) (Härtefälle, Arbeitslosenversicherung, familienergänzende Kinderbetreuung, Kulturschaffende, Veranstaltungen)
Data
<- get_nationalvotes(votedates = "2021-11-28",
covid_nov geolevel = "municipality") %>%
filter(str_detect(name, "Covid-19")) %>%
select(-name, -id)
Dataset consists of 2,141 communities. Votes from abroad are excluded.
Results preview
Link to municipalities
Using GMDNR
/ mun_id
for deterministic
linkage.
<- st_gg %>%
covid_nov_gem left_join(covid_nov %>%
select(GMDNR, jaStimmenInProzent)) %>%
mutate(vote_yes = ntile(jaStimmenInProzent, 5)) %>%
# mutate(vote_yes = Hmisc::cut2(jaStimmenInProzent, g = 5)) %>%
relocate(geometry, .after = last_col())
Again, there are few communities that exist on the map but do not exist in the voting dataset:
# A tibble: 4 × 2
GMDNR GMDNAME
<dbl> <chr>
1 389 Meienried
2 408 Hellsau
3 535 Deisswil bei Münchenbuchsee
4 877 Niedermuhlern
Mobility & restrictions
Google community mobility reports from
COVID19
package.
<- covid19(country = "CHE", level = 2,
gmr start = "2020-01-01",
end = "2020-12-31",
gmr = TRUE, verbose = FALSE) %>%
select(-administrative_area_level,
-administrative_area_level_1,
-administrative_area_level_2,
-administrative_area_level_3,
-iso_numeric, -iso_currency,
- iso_alpha_2, -iso_alpha_3,
- latitude, -longitude, -population,
- key_google_mobility, -key_apple_mobility, -key_jhu_csse, -key_nuts, -key_gadm,
-(confirmed:vent)) %>%
rename(KTNAME = key_local) %>%
relocate(id, date, KTNAME)
Data structure for example of 2020 data:
Name | gmr |
Number of rows | 8135 |
Number of columns | 27 |
_______________________ | |
Column type frequency: | |
character | 2 |
Date | 1 |
numeric | 24 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
id | 0 | 1 | 8 | 8 | 0 | 26 | 0 |
KTNAME | 0 | 1 | 2 | 2 | 0 | 26 | 0 |
Variable type: Date
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
date | 0 | 1 | 2020-02-01 | 2020-12-31 | 2020-07-28 | 335 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
school_closing | 0 | 1.00 | 0.92 | 1.24 | 0.00 | 0.00 | 0.0 | 2.00 | 3 | ▇▁▁▂▂ |
workplace_closing | 0 | 1.00 | 0.11 | 2.03 | -3.00 | -2.00 | 0.0 | 2.00 | 3 | ▇▂▂▁▇ |
cancel_events | 0 | 1.00 | 1.23 | 1.43 | -2.00 | 2.00 | 2.0 | 2.00 | 2 | ▂▁▁▁▇ |
gatherings_restrictions | 0 | 1.00 | 0.95 | 3.21 | -3.00 | -3.00 | 3.0 | 4.00 | 4 | ▆▁▂▁▇ |
transport_closing | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.0 | 0.00 | 0 | ▁▁▇▁▁ |
stay_home_restrictions | 0 | 1.00 | 0.55 | 0.50 | 0.00 | 0.00 | 1.0 | 1.00 | 1 | ▆▁▁▁▇ |
internal_movement_restrictions | 0 | 1.00 | 0.26 | 0.44 | 0.00 | 0.00 | 0.0 | 1.00 | 1 | ▇▁▁▁▃ |
international_movement_restrictions | 0 | 1.00 | 2.79 | 0.73 | 0.00 | 3.00 | 3.0 | 3.00 | 3 | ▁▁▁▁▇ |
information_campaigns | 0 | 1.00 | 1.96 | 0.25 | 0.00 | 2.00 | 2.0 | 2.00 | 2 | ▁▁▁▁▇ |
testing_policy | 0 | 1.00 | 2.10 | 0.72 | 0.00 | 2.00 | 2.0 | 3.00 | 3 | ▁▃▁▇▆ |
contact_tracing | 0 | 1.00 | 1.75 | 0.44 | 0.00 | 2.00 | 2.0 | 2.00 | 2 | ▁▁▂▁▇ |
facial_coverings | 0 | 1.00 | 1.08 | 1.74 | -3.00 | 0.00 | 1.0 | 2.00 | 3 | ▂▁▃▃▇ |
vaccination_policy | 0 | 1.00 | 0.03 | 0.17 | 0.00 | 0.00 | 0.0 | 0.00 | 1 | ▇▁▁▁▁ |
elderly_people_protection | 0 | 1.00 | 1.32 | 1.88 | -3.00 | 2.00 | 2.0 | 2.00 | 3 | ▂▁▁▁▇ |
government_response_index | 0 | 1.00 | -49.55 | 10.42 | -60.42 | -57.81 | -48.7 | -45.05 | 0 | ▇▅▁▁▁ |
stringency_index | 0 | 1.00 | -49.46 | 15.17 | -73.15 | -59.26 | -46.3 | -39.35 | 0 | ▆▅▇▂▁ |
containment_health_index | 0 | 1.00 | -51.14 | 10.07 | -63.33 | -60.12 | -50.3 | -47.92 | 0 | ▇▇▁▁▁ |
economic_support_index | 0 | 1.00 | -38.39 | 18.15 | -62.50 | -37.50 | -37.5 | -37.50 | 0 | ▃▇▁▁▂ |
retail_and_recreation_percent_change_from_baseline | 1960 | 0.76 | -25.80 | 26.68 | -93.00 | -42.00 | -17.0 | -6.00 | 56 | ▂▂▇▂▁ |
grocery_and_pharmacy_percent_change_from_baseline | 2499 | 0.69 | -2.42 | 19.52 | -95.00 | -7.00 | 0.0 | 6.00 | 85 | ▁▁▇▁▁ |
parks_percent_change_from_baseline | 4338 | 0.47 | 25.63 | 58.22 | -77.00 | -17.00 | 12.0 | 58.00 | 325 | ▇▇▂▁▁ |
transit_stations_percent_change_from_baseline | 608 | 0.93 | -21.20 | 21.43 | -84.00 | -35.00 | -21.0 | -8.00 | 78 | ▂▇▇▁▁ |
workplaces_percent_change_from_baseline | 608 | 0.93 | -23.11 | 17.14 | -90.00 | -32.00 | -20.0 | -12.00 | 17 | ▁▁▅▇▂ |
residential_percent_change_from_baseline | 2650 | 0.67 | 7.95 | 6.99 | -6.00 | 3.00 | 7.0 | 11.00 | 37 | ▃▇▂▁▁ |
Note: description of variables is described here.
Example of workplace data for canton BE
Apple
Apple’s (discontinued) Mobility Trend Reports
from covmobility
package.
data(apple_mobility)
= apple_mobility %>%
apple_mobility filter(country == "Switzerland") %>%
filter(date <= ymd("2020-12-31")) %>%
filter(geo_type == "sub-region") %>%
select(-geo_type, -country, - alternative_name, -sub_region)
Simpler dataset with a 0-Inf score for each day and canton across three trasport modes.
transportation_type <character>
# total N=16284 valid N=16284 mean=1.74 sd=0.90
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
driving | 9204 | 56.52 | 56.52 | 56.52
transit | 2124 | 13.04 | 13.04 | 69.57
walking | 4956 | 30.43 | 30.43 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
10.76 84.98 106.12 112.82 134.93 621.56 756
Example of data for canton BE
The downloaded binary packages are in
/var/folders/cw/01mf9_sx1hqc0f2jc48w_dtw0000gn/T//RtmpXBqdl5/downloaded_packages
Note: Appenzell Innerrhoden & Uri have zero data!
Final link & save
<- w_deaths_2015_2020_year_pop %>%
w_deaths_2015_2020_year_fin left_join(ssep3_gem_pop_geo %>%
st_drop_geometry() %>%
select(-pop)) %>%
left_join(gg21_merged %>%
st_drop_geometry() %>%
select(-GMDNAME, -KTNAME, -BZNR, -BZNAME)
)
write_rds(w_deaths_2015_2020_year_fin, "data/BfS-closed/monthly_deaths/w_deaths_2015_2020_year_fin.Rds")