2020 excess mortality & voting patterns in CH

Ancillary data preparation

Population

Data extracted from px-x-0103010000_201:

Ständige und nichtständige Wohnbevölkerung nach institutionellen Gliederungen, Staatsangehörigkeit (Kategorie), Geburtsort, Geschlecht und Altersklasse

Important info from footnotes:

Letzte Änderungen: Neuer Datensatz (Jahr 2020) Stand der Datenbank: Juni 2021 Stichtag: 31. Dezember Raumbezug: Gemeinden / 18.10.2020 Datenquelle: Statistik der Bevölkerung und der Haushalte STATPOP Definition der ständigen Wohnbevölkerung

Community codes

Community mutations data after 2021-01-01 from BfS are integrated into the data and pops are recalculated using these new codes to match mortality.

histcomm <- read_rds("data/BfS/histcomm.Rds") %>% 
  filter(datum_der_aufnahme >= ymd("2021-01-01")) %>% 
  select(bfs_gde_num_old, bfs_gde_num_new, gemeindename_new)

Further corrections of municipalities mutations are needed to bring the data to the state of 2022-01-01.

Data preps

Data aggregated to age groups as in deaths dataset.

pop <- read_xlsx("data-raw/BfS/px-x-0103010000_201.xlsx", 
                 col_types = c("numeric", 
                               "skip", "numeric", "text", "skip", 
                               "skip", "skip", "text", "skip", "skip", 
                               "skip", "text", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric"), 
                 skip = 1) %>%
  remove_empty(c("rows", "cols")) %>% clean_names() %>% 
  rename(year = x1, 
         GMDNR = x2,
         GMDNAME = x3) %>% 
  mutate(sex = if_else(x5 == "Frau", "Female", "Male"),
         nationality = if_else(x4 == "Schweiz", "Swiss", "Foreigner"),
         year = as.integer(year)) %>% 
  select(-x4, -x5) %>% 
  relocate(sex, .after = GMDNAME) %>% 
  relocate(nationality, .after = sex) %>% 
  mutate(GMDNAME = word(GMDNAME, 2, -1)) %>% 
  fill(year, GMDNR, GMDNAME, nationality) %>% 
  mutate(`<40` = x0_4_jahre + x5_9_jahre + x10_14_jahre + x15_19_jahre + 
           x20_24_jahre + x25_29_jahre + x30_34_jahre + x35_39_jahre) %>% 
  select(-x0_4_jahre, -x5_9_jahre, -x10_14_jahre, -x15_19_jahre,  
         -x20_24_jahre, -x25_29_jahre, -x30_34_jahre, -x35_39_jahre) %>% 
  mutate(`40-49` = x40_44_jahre + x45_49_jahre) %>% 
  mutate(`50-59` = x50_54_jahre + x55_59_jahre) %>% 
  mutate(`60-69` = x60_64_jahre + x65_69_jahre) %>% 
  mutate(`70-79` = x70_74_jahre + x75_79_jahre) %>% 
  select(-x40_44_jahre, -x45_49_jahre, -x50_54_jahre, -x55_59_jahre,
         -x60_64_jahre, -x65_69_jahre, -x70_74_jahre, -x75_79_jahre) %>% 
  mutate(`80+` = x80_84_jahre +x85_89_jahre + x90_94_jahre +
           x95_99_jahre + x100_jahre_und_mehr) %>% 
  select(-x80_84_jahre, -x85_89_jahre, -x90_94_jahre,
         -x95_99_jahre, -x100_jahre_und_mehr) %>% 
  pivot_longer(cols = `<40`:`80+`, 
               names_to = "age", values_to = "pop") %>% 
  
  # BfS identified changes
  left_join(histcomm, by = c("GMDNR" = "bfs_gde_num_old")) %>% 
  mutate(GMDNR = if_else(!is.na(bfs_gde_num_new), bfs_gde_num_new, GMDNR),
         GMDNAME = if_else(!is.na(gemeindename_new), gemeindename_new, GMDNAME)) %>% 
  select(-bfs_gde_num_new, -gemeindename_new) %>% 
  
  # Essertes merge
  mutate(GMDNR = if_else(GMDNAME == "Essertes", 5805, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Essertes", "Oron", GMDNAME)) %>% 
  
  # now to reach the state of `2022-01-01`
  mutate(GMDNR = if_else(GMDNAME == "Bad Zurzach", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Bad Zurzach", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Baldingen", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Baldingen", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Böbikon", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Böbikon", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Kaiserstuhl", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Kaiserstuhl", "Zurzach", GMDNAME)) %>%
  mutate(GMDNR = if_else(GMDNAME == "Rekingen (AG)", 4324, GMDNR)) %>%
  mutate(GMDNAME = if_else(GMDNAME == "Rekingen (AG)", "Zurzach", GMDNAME)) %>%
  mutate(GMDNR = if_else(GMDNAME == "Rietheim", 4324, GMDNR)) %>%
  mutate(GMDNAME = if_else(GMDNAME == "Rietheim", "Zurzach", GMDNAME)) %>%
  mutate(GMDNR = if_else(GMDNAME == "Rümikon", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Rümikon", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Wislikofen", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Wislikofen", "Zurzach", GMDNAME)) %>% 
  # Böztal merge
  mutate(GMDNR = if_else(GMDNAME == "Bözen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Bözen", "Böztal", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Elfingen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Elfingen", "Böztal", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Effingen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Effingen", "Böztal", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Hornussen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Hornussen", "Böztal", GMDNAME)) %>% 
  # Blonay - Saint-Légier merge
  mutate(GMDNR = if_else(GMDNAME == "Blonay", 5892, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Blonay", "Blonay - Saint-Légier", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Saint-Légier-La Chiésaz", 5892, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Saint-Légier-La Chiésaz", "Blonay - Saint-Légier", GMDNAME)) %>% 
  # Murten merge
  mutate(GMDNR = if_else(GMDNAME == "Galmiz", 2275, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Galmiz", "Murten", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Gempenach", 2275, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Gempenach", "Murten", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Clavaleyres", 2275, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Clavaleyres", "Murten", GMDNAME)) %>% 
  
  # Schwende-Rüte merge
  mutate(GMDNAME = if_else(GMDNAME == "Schwende", "Schwende-Rüte", GMDNAME)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Rüte", "Schwende-Rüte", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Schwende-Rüte", 3112, GMDNR)) %>% 
  # Val Mara merge
  mutate(GMDNAME = if_else(GMDNAME == "Melano", "Val Mara", GMDNAME)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Maroggia", "Val Mara", GMDNAME)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Rovio", "Val Mara", GMDNAME)) %>%  
  mutate(GMDNR = if_else(GMDNAME == "Val Mara", 5240, GMDNR)) %>%  
  
  group_by(year, GMDNR, GMDNAME, sex, age) %>% 
  summarise(pop = as.integer(sum(pop))) %>% 
  ungroup() %>% 
  arrange(year, GMDNR, sex, age)

Yearly totals (on 31st Dec!):

# A tibble: 7 × 2
   year pop      
  <int> <chr>    
1  2014 8,237,666
2  2015 8,327,126
3  2016 8,419,550
4  2017 8,484,130
5  2018 8,544,527
6  2019 8,606,033
7  2020 8,670,300

Age (40 plus!) distributions over years

Predicting 2020 population

For each age, sex, nationality stratum, for each municipality separately, Poisson model was fitted using the 2014-2019 data; then prediction was made for 2020 & 2021 years. The gist of this operation was to exclude the effect of pandemic from the pop counts in years affected.

pop_ext_poi <- pop %>% 
  filter(year != 2020) %>% 
  # filter(GMDNR == 261 | GMDNR == 389) %>%
  group_by(GMDNR, age, sex) %>% 
  do(glm(pop ~ year, data = ., family = "poisson") %>%
       predict(., newdata = tibble(year = as.integer(2020)), type = "response") %>%
       tibble(year = as.integer(2020), pop_ext_poi = .)) %>% 
  ungroup() %>% 
  mutate(pop_ext_poi = as.integer(round(pop_ext_poi))) %>% 
  arrange(year, GMDNR, sex, age) %>% 
  relocate(year)

Summary of values:

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
     0.0     42.0    109.0    338.5    283.0 117341.0        2

Note: 2 NAs!

# A tibble: 7 × 6
   year GMDNR GMDNAME sex   age     pop
  <int> <dbl> <chr>   <chr> <chr> <int>
1  2014  4232 Geltwil Male  80+       0
2  2015  4232 Geltwil Male  80+       0
3  2016  4232 Geltwil Male  80+       0
4  2017  4232 Geltwil Male  80+       0
5  2018  4232 Geltwil Male  80+       0
6  2019  4232 Geltwil Male  80+       1
7  2020  4232 Geltwil Male  80+       3

# A tibble: 1 × 5
   year GMDNR age   sex   pop_ext_poi
  <int> <dbl> <chr> <chr>       <int>
1  2020  4232 80+   Male           NA

# A tibble: 7 × 6
   year GMDNR GMDNAME   sex   age     pop
  <int> <dbl> <chr>     <chr> <chr> <int>
1  2014   389 Meienried Male  80+       0
2  2015   389 Meienried Male  80+       0
3  2016   389 Meienried Male  80+       0
4  2017   389 Meienried Male  80+       0
5  2018   389 Meienried Male  80+       0
6  2019   389 Meienried Male  80+       1
7  2020   389 Meienried Male  80+       1

# A tibble: 1 × 5
   year GMDNR age   sex   pop_ext_poi
  <int> <dbl> <chr> <chr>       <int>
1  2020   389 80+   Male           NA

Results in large municipality

Results for large municipality are fine. Predictions are in red.

For small municipality we might end up with sth more wiggly.

Note: lack of prediction for 2020 & 2021 for oldest males!

There are two municipalities with missing predictions for oldest males:

# A tibble: 2 × 4
  GMDNR age   sex   GMDNAME  
  <dbl> <chr> <chr> <chr>    
1   389 80+   Male  Meienried
2  4232 80+   Male  Geltwil

Population in this strata was replaced by estimates of a simple linear model.

pop_ext_lm <- pop %>%
  filter(year != 2020) %>%
  group_by(GMDNR, age, sex) %>%
  do(lm(pop ~ year, data = .) %>%
       predict(., newdata = tibble(year = as.integer(2020))) %>%
       tibble(year = as.integer(2020), pop_ext_lm = .)) %>%
  ungroup() %>%
  mutate(pop_ext_lm = as.integer(round(pop_ext_lm))) %>%
  arrange(year, GMDNR, sex, age) %>%
  relocate(year)

pop_complete <- pop %>% 
  
  # extrapolated 2020
  left_join(pop_ext_poi) %>% 
  mutate(pop_ext_poi = if_else(year < 2020, 
                               pop, pop_ext_poi)) %>% 
  
  # replacing 2 poi missings with lm
  left_join(pop_ext_lm) %>% 
  mutate(pop_ext_poi = if_else(year == 2020 & is.na(pop_ext_poi), 
                               pop_ext_lm, pop_ext_poi)) %>% 
  select(-pop_ext_lm)

Differences in 2020 pops

Comparing estimates to real values

pop_compare <- pop %>% 
  
  filter(year == 2020) %>% 
  select(-year) %>% 
  
  # poi model with 2 missings
  left_join(pop_ext_poi) %>% 
  
  # lm model
  left_join(pop_ext_lm) %>% 
  mutate(pop_ext_poi = if_else(is.na(pop_ext_poi), 
                               pop_ext_lm, pop_ext_poi)) %>% 
  select(-pop_ext_lm) %>% 
  
  # diffs
  mutate(pop_diff = pop_ext_poi - pop,
         perc_diff = (pop_diff / pop_ext_poi) * 100,
         alt = pop * 100) %>% 
  mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
         perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>% 
  select(-alt)

Differences

frq(pop_compare, pop_ext_poi > pop)

pop_ext_poi > pop <lgl> 
# total N=25740 valid N=25740 mean=0.50 sd=0.50

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 12931 | 50.24 |   50.24 |  50.24
TRUE  | 12809 | 49.76 |   49.76 | 100.00
<NA>  |     0 |  0.00 |    <NA> |   <NA>

frq(pop_compare, pop_ext_poi < pop)

pop_ext_poi < pop <lgl> 
# total N=25740 valid N=25740 mean=0.43 sd=0.50

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 14670 | 56.99 |   56.99 |  56.99
TRUE  | 11070 | 43.01 |   43.01 | 100.00
<NA>  |     0 |  0.00 |    <NA> |   <NA>

frq(pop_compare, pop_ext_poi == pop)

pop_ext_poi == pop <lgl> 
# total N=25740 valid N=25740 mean=0.07 sd=0.26

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 23879 | 92.77 |   92.77 |  92.77
TRUE  |  1861 |  7.23 |    7.23 | 100.00
<NA>  |     0 |  0.00 |    <NA> |   <NA>

summary(pop_compare$pop_diff)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-391.000   -4.000    0.000    1.649    5.000 2594.000

summary(pop_compare$perc_diff)

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-400.0000   -3.9062    0.0000   -0.1833    4.7059  300.0000

Age group specific diffs

Municipality specific diffs

pop_compare_agg <- pop_compare %>% 
  filter(age != "<40") %>% 
  group_by(GMDNR) %>% 
  summarise(pop = sum(pop),
            pop_ext_poi = sum(pop_ext_poi)) %>% 
  ungroup() %>% 
  mutate(pop_diff = pop_ext_poi - pop,
         perc_diff = (pop_diff / pop_ext_poi) * 100,
         alt = pop * 100) %>% 
  mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
         perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>% 
  select(-alt)

gg_centr <- read_rds("data/BfS/gg.Rds") %>% 
  select(GMDNR, GMDNAME) %>% 
  st_centroid() %>% 
  left_join(pop_compare_agg)

summary(pop_compare_agg$pop_diff)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-214.000   -5.000    5.000    7.896   16.000  548.000

summary(pop_compare_agg$perc_diff)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-15.5556  -0.4819   0.5505   0.7524   1.8628  31.4286

Getting mid-year pops

For each age, sex, nationality stratum, for each municipality separately, simple mean of the two years of data is used to estimate the middle point. In such case we use, for instance data from 31st Dec 2014 and 31st Dec 2015 to estimate pop mid-2015.

for (i in 2014:2019){
  
  result <- pop_complete %>% 
    filter(year >= i & year <= i + 1) %>% 
    # filter(GMDNR == 261 | GMDNR == 389) %>%
    group_by(GMDNR, age, sex) %>% 
    summarise(pop_mid_poi = mean(pop_ext_poi)) %>% 
    ungroup() %>% 
    mutate(year = as.integer(i + 1),
           pop_mid_poi = as.integer(round(pop_mid_poi))) %>% 
    arrange(GMDNR, sex, age) %>% 
    relocate(year)
  
  if (i == 2014) {
    pop_mid_poi <- result
  } else {
    pop_mid_poi <- bind_rows(pop_mid_poi, result)
  }
  
}

Large municipality

Again, results for large municipality are fine:

Small municipality

Again, for small municipality we might end up with sth more wiggly.

Deaths <-> pop link

Prepared pop file is merged to deaths dataset prepared in 02.Rmd.
We use file based on place of residence.

w_deaths_2015_2020_year_pop <- read_rds("data/BfS-closed/monthly_deaths/w_deaths_2015_2020_year.Rds") %>% 
  left_join(pop_mid_poi)

Deaths > pop problem

frq(w_deaths_2015_2020_year_pop, observed > pop_mid_poi)

observed > pop_mid_poi <lgl> 
# total N=154440 valid N=154440 mean=0.00 sd=0.01

Value |      N | Raw % | Valid % | Cum. %
-----------------------------------------
FALSE | 154420 | 99.99 |   99.99 |  99.99
TRUE  |     20 |  0.01 |    0.01 | 100.00
<NA>  |      0 |  0.00 |    <NA> |   <NA>

surplus <- w_deaths_2015_2020_year_pop %>% 
  filter(observed > pop_mid_poi) %>% 
  arrange(GMDNR, year) %>% 
  mutate(difference = pop_mid_poi - observed)

There are 20 strata from 19 communities where count of deaths is larger than count of pop. The difference is always 1 in plus over pop 0.

In all these cases pop was increased by 1 to match the count of deaths!

w_deaths_2015_2020_year_pop %<>% 
  mutate(pop_mid_poi = if_else(observed > pop_mid_poi, observed, pop_mid_poi))

Cartogram

Using 2019 pop counts (ignoring age structure) to derive cartograms to avoid effects of pandemic.

Communities & pops

R solution 1

Using cartogram package.

# takes a while!
carto <- carto_data %>%
  cartogram::cartogram_cont("pop")

write_rds(carto, "data/carto/carto.Rds")

carto_ncont <- carto_data %>% 
  cartogram::cartogram_ncont("pop")

write_rds(carto_ncont, "data/carto/carto_ncont.Rds")

carto_dorling <- carto_data %>% 
  cartogram::cartogram_dorling("pop")

write_rds(carto_dorling, "data/carto/carto_dorling.Rds")

carto_gsm <- carto_data %>%
  cartogramR::cartogramR(count = "pop", method = "gsm")

write_rds(carto_gsm, "data/carto/carto_gsm.Rds")

ScapeToad version

carto_data %>% 
  st_write("data/carto/in_gg.shp", delete_dsn = TRUE)

se %>% 
  select(GMDNR, GMDNAME) %>% 
  st_write("data/carto/in_se.shp", delete_dsn = TRUE)

# java -Xmx1g -jar "C:\Program Files\ScapeToad-v11\ScapeToad.jar"

‘Classic’ cartogram

Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering

Noncontinuous cartogram

Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering

Dorling carotgram

Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering

Swiss-SEP

sep3 <- read_rds("../SNC_Swiss-SEP2/FINAL/RDS/ssep3_user_geo.Rds") %>%
  select(gisid, ssep3, ssep3_q) %>% 
  st_transform(crs = 2056)

Using 1,527,173 n’hoods from version 3.0:

ssep3_q <integer> 
# total N=1527173 valid N=1527173 mean=3.00 sd=1.41

Value |      N | Raw % | Valid % | Cum. %
-----------------------------------------
    1 | 305439 |    20 |      20 |     20
    2 | 305434 |    20 |      20 |     40
    3 | 305432 |    20 |      20 |     60
    4 | 305439 |    20 |      20 |     80
    5 | 305429 |    20 |      20 |    100
 <NA> |      0 |     0 |    <NA> |   <NA>

This is aggregated to the level of 2,145 communities from swisstopo.

STATPOP

statpop2019 <- read_delim("data-raw/BfS-closed/STATPOP/statpop2019_220098p.zip", 
                          delim = ";", escape_double = FALSE, 
                          col_types = cols(STATYEAR = col_integer(), 
                                           SEX = col_integer(), 
                                           TYPEOFRESIDENCE = col_integer(), 
                                           POPULATIONTYPE = col_integer(), 
                                           AGE = col_integer(), 
                                           CLASSAGEFIVEYEARS = col_integer(), 
                                           NATIONALITYCATEGORY = col_integer(), 
                                           MAINRESIDENCECATEGORY = col_integer(), 
                                           GEOCOORDE = col_integer(), 
                                           GEOCOORDN = col_integer(), 
                                           INDIC_EGID = col_integer(), 
                                           statdate = col_date(format = "%d/%m/%Y")), 
                          trim_ws = TRUE) %>% 
  remove_empty(c("rows", "cols")) %>% clean_names()  

statpop2019_agg <- statpop2019 %>% 
  rename(egid = federalbuildingid) %>% 
  filter(typeofresidence == 1) %>% 
  filter(populationtype == 1) %>% 
  filter(mainresidencecategory == 1) %>% 
  filter(indic_egid == 1) %>% 
  select(-statyear, -statdate, 
         -typeofresidence, -populationtype, -mainresidencecategory,
         -indic_egid) %>% 
  group_by(egid, geocoorde, geocoordn) %>% 
  summarise(pop = n()) %>% 
  ungroup() %>% 
  st_as_sf(coords = c("geocoorde", "geocoordn"), 
           crs = 2056,
           remove = TRUE)

Using data from STATPOP 2019.

Dataset consists of 8,812,965 individuals and 1,540,622 buildings. We selected individuals with type of residence Hauptwohnsitz, pop type Ständige Wohnbevölkerung, main residency category Nur ein Hauptwohnsitz and with valid EGID building ID.

The nearest building with SSEP was assigned for each of these building. The point dataset was then overlaid with community boundaries and summary measures of pop based index were created and pop level SEP was aggregated by community.

’nhood based averages for communities

# # or with full version?
# st_gg <- st_read("data-raw/BfS/ag-b-00.03-875-gg/ggg_2021-LV95/shp/g1g21_01072021.shp",
#           as_tibble = TRUE)

ssep3_gem <- 
  st_join(sep3, 
          st_gg, 
          join = st_intersects) 

# tm_shape(gg) +
#   tm_polygons() +
#   tm_shape(ssep3_gem %>% filter(is.na(GMDNR))) +
#   tm_dots()

ssep3_gem_neigh <- ssep3_gem %>% 
  st_drop_geometry() %>% 
  group_by(GMDNR) %>% 
  summarise(n = n(),
            ssep3_median = median(ssep3)) %>% 
  ungroup() 

ssep3_gem_neigh_geo <- inner_join(gg, ssep3_gem_neigh) %>% 
  filter(!is.na(ssep3_median)) %>% 
  mutate(median_ssep3_q = ntile(ssep3_median, 5)) %>% 
  mutate(median_ssep3_q = factor(median_ssep3_q,
                                 levels = 1:5,
                                 labels = c("1st - lowest", 
                                            "2nd",  
                                            "3rd quintile", 
                                            "4th", 
                                            "5th - highest")))

write_rds(ssep3_gem_neigh_geo, "data/BfS/ssep3_gem_neigh_geo.rds")

Watch out for small n! Here some examples with communities below 20 n’hoods

Final map using median index - bubbles scaled to number of ’nhoods within community.

Population based averages for communities

# join to sep3
statpop2019_agg_sep <- st_join(statpop2019_agg, sep3, join = st_nearest_feature)

# # fails for unknown and unsolved reasons
# nearest <- st_nearest_feature(statpop2019_agg, sep3)
# statpop2019_agg_sep$dist3 <- st_distance(statpop2019_agg_sep, sep3[nearest, ], by_element = TRUE)
# rm(nearest, statpop2019_agg); gc()
# summary(statpop2019_agg_sep$dist3)

statpop2019_agg_sep_gem <- 
  st_join(statpop2019_agg_sep, st_gg, join = st_intersects) %>%  
  relocate(egid, gisid, pop,
           ssep3, ssep3_q,
           GMDNR) 

write_rds(statpop2019_agg_sep_gem, "data/BfS-closed/SEP/statpop2019_agg_sep_gem.Rds")

ssep3_gem_pop <- statpop2019_agg_sep_gem %>% 
  select(GMDNR, ssep3, pop) %>% 
  uncount(pop) %>% 
  group_by(GMDNR) %>% 
  summarise(pop = n(),
            ssep3_median = median(ssep3)) %>% 
  ungroup() 

# left_join(gg, ssep3_gem_pop) %>% 
#   filter(is.na(ssep3_median))

ssep3_gem_pop_geo <- 
  left_join(gg, ssep3_gem_pop) %>% 
  mutate(median_ssep3_q = ntile(ssep3_median, 5)) %>% 
  mutate(median_ssep3_q = factor(median_ssep3_q,
                                 levels = 1:5,
                                 labels = c("1st - lowest", 
                                            "2nd",  
                                            "3rd quintile", 
                                            "4th", 
                                            "5th - highest")))

write_rds(ssep3_gem_pop_geo, "data/BfS/ssep3_gem_pop_geo.rds")

Final map using median index - bubbles scaled to number of people (STATPOP 2020)

Raumgliederungen

Data from the app. State as of 2021-07-01 to match spatial data nicely.

raum <- read_xlsx("data-raw/BfS/Raumgliederungen.xlsx", 
                  skip = 1) %>% 
  remove_empty(c("rows", "cols")) %>% clean_names() %>% 
  filter(! is.na(bfs_gde_nummer)) %>% 
  rename(GMDNR = bfs_gde_nummer,
         GMDNAME = gemeindename,
         KTNAME = kanton,
         BZNR = bezirks_nummer,
         BZNAME = bezirksname) %>% 
  select(GMDNR, GMDNAME, KTNAME, BZNR, BZNAME, 
         stadtische_landliche_gebiete, sprachgebiete,
         urbanisierungsgrad_2011_degurba_eurostat,
         gemeindetypologie_2012_9_typen, gemeindetypologie_2012_25_typen) %>% 
  rename(gemtyp_9 = gemeindetypologie_2012_9_typen,
         gemtyp_25 = gemeindetypologie_2012_25_typen) %>% 
  mutate(gemtyp_9 = as.integer(gemtyp_9),
         gemtyp_25 = as.integer(gemtyp_25)) %>% 
  mutate(
    r_urban1 = case_when(
      stadtische_landliche_gebiete == 1 ~  "Urban",
      stadtische_landliche_gebiete == 2 ~  "Periurban",
      stadtische_landliche_gebiete == 3 ~  "Rural",
      TRUE ~  ""),
    r_urban2 = case_when(
      urbanisierungsgrad_2011_degurba_eurostat == 1 ~  "Dense",
      urbanisierungsgrad_2011_degurba_eurostat == 2 ~  "Medium",
      urbanisierungsgrad_2011_degurba_eurostat == 3 ~  "Low",
      TRUE ~  ""),
    r_lang = case_when(
      sprachgebiete == 1 ~  "German",
      sprachgebiete == 2 ~  "French",
      sprachgebiete == 3 ~  "Italian",
      sprachgebiete == 4 ~  "Romansh",
      TRUE ~  "")
  ) %>% 
  select(-stadtische_landliche_gebiete, -sprachgebiete,
         -urbanisierungsgrad_2011_degurba_eurostat) %>% 
  left_join(read_xlsx("data-raw/BfS/Raumgliederungen.xlsx", 
                      skip = 1, sheet = "CH1+CL_GDET9+2012.1") %>% 
              remove_empty(c("rows", "cols")) %>% clean_names() %>% 
              rename(gemtyp_9 = code,
                     gemtyp_9_lab = label) %>% 
              mutate(gemtyp_9 = as.integer(gemtyp_9))) %>% 
  relocate(gemtyp_9_lab, .after = gemtyp_9) %>% 
  left_join(read_xlsx("data-raw/BfS/Raumgliederungen.xlsx", 
                      skip = 1, sheet = "CH1+CL_GDET25+2012.1") %>% 
              remove_empty(c("rows", "cols")) %>% clean_names() %>% 
              rename(gemtyp_25 = code,
                     gemtyp_25_lab = label) %>% 
              mutate(gemtyp_25 = as.integer(gemtyp_25))) %>% 
  relocate(gemtyp_25_lab, .after = gemtyp_25)

Urbanization 1

x <categorical> 
# total N=2145 valid N=2145 mean=2.30 sd=0.81

Value     |    N | Raw % | Valid % | Cum. %
-------------------------------------------
Urban     |  475 | 22.14 |   22.14 |  22.14
Periurban |  560 | 26.11 |   26.11 |  48.25
Rural     | 1110 | 51.75 |   51.75 | 100.00
<NA>      |    0 |  0.00 |    <NA> |   <NA>

Urbanization 2

Using DEGURBA.

x <categorical> 
# total N=2145 valid N=2145 mean=2.48 sd=0.58

Value  |    N | Raw % | Valid % | Cum. %
----------------------------------------
Dense  |   98 |  4.57 |    4.57 |   4.57
Medium |  911 | 42.47 |   42.47 |  47.04
Low    | 1136 | 52.96 |   52.96 | 100.00
<NA>   |    0 |  0.00 |    <NA> |   <NA>

Language region

x <categorical> 
# total N=2145 valid N=2145 mean=1.40 sd=0.59

Value   |    N | Raw % | Valid % | Cum. %
-----------------------------------------
German  | 1406 | 65.55 |   65.55 |  65.55
French  |  618 | 28.81 |   28.81 |  94.36
Italian |  121 |  5.64 |    5.64 | 100.00
<NA>    |    0 |  0.00 |    <NA> |   <NA>

Typology of communities - 9

x <character> 
# total N=2145 valid N=2145 mean=4.54 sd=2.47

Value                                                                 |   N | Raw % | Valid % | Cum. %
------------------------------------------------------------------------------------------------------
Ländliche periphere Gemeinde                                          | 257 | 11.98 |   11.98 |  11.98
Ländliche zentral gelegene Gemeinde                                   | 384 | 17.90 |   17.90 |  29.88
Ländliche Zentrumsgemeinde                                            |  84 |  3.92 |    3.92 |  33.80
Periurbane Gemeinde geringer Dichte                                   | 469 | 21.86 |   21.86 |  55.66
Periurbane Gemeinde hoher Dichte                                      | 108 |  5.03 |    5.03 |  60.70
Periurbane Gemeinde mittlerer Dichte                                  | 368 | 17.16 |   17.16 |  77.86
Städtische Gemeinde einer grossen Agglomeration                       | 160 |  7.46 |    7.46 |  85.31
Städtische Gemeinde einer kleinen oder ausserhalb einer Agglomeration | 122 |  5.69 |    5.69 |  91.00
Städtische Gemeinde einer mittelgrossen Agglomeration                 | 193 |  9.00 |    9.00 | 100.00
<NA>                                                                  |   0 |  0.00 |    <NA> |   <NA>

Typology of communities - 25

x <character> 
# total N=2145 valid N=2145 mean=13.18 sd=5.74

Value                                                                                |   N | Raw % | Valid % | Cum. %
---------------------------------------------------------------------------------------------------------------------
Dienstleistungsgemeinde eines ländlichen Zentrums                                    |  20 |  0.93 |    0.93 |   0.93
Industriegemeinde eines ländlichen Zentrums                                          |  43 |  2.00 |    2.00 |   2.94
Kernstadt einer grossen Agglomeration                                                |   5 |  0.23 |    0.23 |   3.17
Kernstadt einer mittelgrossen Agglomeration                                          |  28 |  1.31 |    1.31 |   4.48
Ländliche periphere Agrargemeinde                                                    |  65 |  3.03 |    3.03 |   7.51
Ländliche periphere Mischgemeinde                                                    | 152 |  7.09 |    7.09 |  14.59
Ländliche periphere Tourismusgemeinde                                                |  40 |  1.86 |    1.86 |  16.46
Ländliche zentral gelegene Agrargemeinde                                             | 119 |  5.55 |    5.55 |  22.00
Ländliche zentral gelegene Dienstleistungsgemeinde                                   | 118 |  5.50 |    5.50 |  27.51
Ländliche zentral gelegene Industriegemeinde                                         | 147 |  6.85 |    6.85 |  34.36
Periurbane Agrargemeinde geringer Dichte                                             | 165 |  7.69 |    7.69 |  42.05
Periurbane Dienstleistungsgemeinde geringer Dichte                                   | 178 |  8.30 |    8.30 |  50.35
Periurbane Dienstleistungsgemeinde hoher Dichte                                      |  53 |  2.47 |    2.47 |  52.82
Periurbane Dienstleistungsgemeinde mittlerer Dichte                                  | 191 |  8.90 |    8.90 |  61.72
Periurbane Industriegemeinde geringer Dichte                                         | 126 |  5.87 |    5.87 |  67.60
Periurbane Industriegemeinde hoher Dichte                                            |  55 |  2.56 |    2.56 |  70.16
Periurbane Industriegemeinde mittlerer Dichte                                        | 177 |  8.25 |    8.25 |  78.41
Städtische Arbeitsplatzgemeinde einer grossen Agglomeration                          |  63 |  2.94 |    2.94 |  81.35
Städtische Arbeitsplatzgemeinde einer mittelgrossen Agglomeration                    |  62 |  2.89 |    2.89 |  84.24
Städtische Dienstleistungsgemeinde einer kleinen oder ausserhalb einer Agglomeration |  49 |  2.28 |    2.28 |  86.53
Städtische Industriegemeinde einer kleinen oder ausserhalb einer Agglomeration       |  63 |  2.94 |    2.94 |  89.46
Städtische Tourismusgemeinde einer kleinen oder ausserhalb einer Agglomeration       |  10 |  0.47 |    0.47 |  89.93
Städtische Wohngemeinde einer grossen Agglomeration                                  |  92 |  4.29 |    4.29 |  94.22
Städtische Wohngemeinde einer mittelgrossen Agglomeration                            | 103 |  4.80 |    4.80 |  99.02
Tourismusgemeinde eines ländlichen Zentrums                                          |  21 |  0.98 |    0.98 | 100.00
<NA>                                                                                 |   0 |  0.00 |    <NA> |   <NA>

Voting

Data from swissdd package.

Linked to municipality boundaries from 2022-01-01.

st_gg <- read_rds("data/swisstopo/st_gg.Rds")

June vote

Bundesgesetz vom 25.09.2020 über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz)

Data

covid_jun <- get_nationalvotes(votedates = "2021-06-13", 
                               geolevel = "municipality") %>% 
  filter(str_detect(name, "Covid-19")) %>% 
  select(-name, -id)

Dataset consists of 2,141 communities. Votes from abroad are excluded.

Results preview

Link to municipalities

Using GMDNR / mun_id for deterministic linkage.

covid_jun_gem <- st_gg %>%  
  left_join(covid_jun %>% 
              select(GMDNR, jaStimmenInProzent)) %>% 
  relocate(geometry, .after = last_col())

There are few communities that exist on the map but do not exist in the voting dataset:

# A tibble: 4 × 2
  GMDNR GMDNAME                    
  <dbl> <chr>                      
1   389 Meienried                  
2   408 Hellsau                    
3   535 Deisswil bei Münchenbuchsee
4   877 Niedermuhlern

November vote

Änderung des Bundesgesetzes über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz) (Härtefälle, Arbeitslosenversicherung, familienergänzende Kinderbetreuung, Kulturschaffende, Veranstaltungen)

Data

covid_nov <- get_nationalvotes(votedates = "2021-11-28", 
                               geolevel = "municipality") %>% 
  filter(str_detect(name, "Covid-19")) %>% 
  select(-name, -id)

Dataset consists of 2,141 communities. Votes from abroad are excluded.

Results preview

Link to municipalities

Using GMDNR / mun_id for deterministic linkage.

covid_nov_gem <- st_gg %>%  
  left_join(covid_nov %>% 
              select(GMDNR, jaStimmenInProzent)) %>% 
  mutate(vote_yes = ntile(jaStimmenInProzent, 5)) %>%
  # mutate(vote_yes = Hmisc::cut2(jaStimmenInProzent, g = 5)) %>% 
  relocate(geometry, .after = last_col())

Again, there are few communities that exist on the map but do not exist in the voting dataset:

# A tibble: 4 × 2
  GMDNR GMDNAME                    
  <dbl> <chr>                      
1   389 Meienried                  
2   408 Hellsau                    
3   535 Deisswil bei Münchenbuchsee
4   877 Niedermuhlern

Mobility & restrictions

Google

Google community mobility reports from COVID19 package.

gmr <- covid19(country = "CHE", level = 2, 
               start = "2020-01-01",
               end = "2020-12-31",
               gmr = TRUE, verbose = FALSE) %>% 
  select(-administrative_area_level, 
         -administrative_area_level_1,
         -administrative_area_level_2,
         -administrative_area_level_3,
         -iso_numeric, -iso_currency,
         - iso_alpha_2, -iso_alpha_3, 
         - latitude, -longitude, -population, 
         - key_google_mobility, -key_apple_mobility, -key_jhu_csse, -key_nuts, -key_gadm, 
         -(confirmed:vent)) %>% 
  rename(KTNAME = key_local) %>% 
  relocate(id, date, KTNAME)

Data structure for example of 2020 data:

Data summary
Name	gmr
Number of rows	8135
Number of columns	27
_______________________
Column type frequency:
character	2
Date	1
numeric	24
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
id	0	1	8	8	0	26	0
KTNAME	0	1	2	2	0	26	0

Variable type: Date

skim_variable	n_missing	complete_rate	min	max	median	n_unique
date	0	1	2020-02-01	2020-12-31	2020-07-28	335

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
school_closing	0	1.00	0.92	1.24	0.00	0.00	0.0	2.00	3	▇▁▁▂▂
workplace_closing	0	1.00	0.11	2.03	-3.00	-2.00	0.0	2.00	3	▇▂▂▁▇
cancel_events	0	1.00	1.23	1.43	-2.00	2.00	2.0	2.00	2	▂▁▁▁▇
gatherings_restrictions	0	1.00	0.95	3.21	-3.00	-3.00	3.0	4.00	4	▆▁▂▁▇
transport_closing	0	1.00	0.00	0.00	0.00	0.00	0.0	0.00	0	▁▁▇▁▁
stay_home_restrictions	0	1.00	0.55	0.50	0.00	0.00	1.0	1.00	1	▆▁▁▁▇
internal_movement_restrictions	0	1.00	0.26	0.44	0.00	0.00	0.0	1.00	1	▇▁▁▁▃
international_movement_restrictions	0	1.00	2.79	0.73	0.00	3.00	3.0	3.00	3	▁▁▁▁▇
information_campaigns	0	1.00	1.96	0.25	0.00	2.00	2.0	2.00	2	▁▁▁▁▇
testing_policy	0	1.00	2.10	0.72	0.00	2.00	2.0	3.00	3	▁▃▁▇▆
contact_tracing	0	1.00	1.75	0.44	0.00	2.00	2.0	2.00	2	▁▁▂▁▇
facial_coverings	0	1.00	1.08	1.74	-3.00	0.00	1.0	2.00	3	▂▁▃▃▇
vaccination_policy	0	1.00	0.03	0.17	0.00	0.00	0.0	0.00	1	▇▁▁▁▁
elderly_people_protection	0	1.00	1.32	1.88	-3.00	2.00	2.0	2.00	3	▂▁▁▁▇
government_response_index	0	1.00	-49.55	10.42	-60.42	-57.81	-48.7	-45.05	0	▇▅▁▁▁
stringency_index	0	1.00	-49.46	15.17	-73.15	-59.26	-46.3	-39.35	0	▆▅▇▂▁
containment_health_index	0	1.00	-51.14	10.07	-63.33	-60.12	-50.3	-47.92	0	▇▇▁▁▁
economic_support_index	0	1.00	-38.39	18.15	-62.50	-37.50	-37.5	-37.50	0	▃▇▁▁▂
retail_and_recreation_percent_change_from_baseline	1960	0.76	-25.80	26.68	-93.00	-42.00	-17.0	-6.00	56	▂▂▇▂▁
grocery_and_pharmacy_percent_change_from_baseline	2499	0.69	-2.42	19.52	-95.00	-7.00	0.0	6.00	85	▁▁▇▁▁
parks_percent_change_from_baseline	4338	0.47	25.63	58.22	-77.00	-17.00	12.0	58.00	325	▇▇▂▁▁
transit_stations_percent_change_from_baseline	608	0.93	-21.20	21.43	-84.00	-35.00	-21.0	-8.00	78	▂▇▇▁▁
workplaces_percent_change_from_baseline	608	0.93	-23.11	17.14	-90.00	-32.00	-20.0	-12.00	17	▁▁▅▇▂
residential_percent_change_from_baseline	2650	0.67	7.95	6.99	-6.00	3.00	7.0	11.00	37	▃▇▂▁▁

Note: description of variables is described here.

Example of workplace data for canton BE

Apple

Apple’s (discontinued) Mobility Trend Reports from covmobility package.

data(apple_mobility)

apple_mobility = apple_mobility %>% 
  filter(country == "Switzerland") %>% 
  filter(date <= ymd("2020-12-31")) %>% 
  filter(geo_type == "sub-region") %>% 
  select(-geo_type, -country, - alternative_name, -sub_region)

Simpler dataset with a 0-Inf score for each day and canton across three trasport modes.

transportation_type <character> 
# total N=16284 valid N=16284 mean=1.74 sd=0.90

Value   |    N | Raw % | Valid % | Cum. %
-----------------------------------------
driving | 9204 | 56.52 |   56.52 |  56.52
transit | 2124 | 13.04 |   13.04 |  69.57
walking | 4956 | 30.43 |   30.43 | 100.00
<NA>    |    0 |  0.00 |    <NA> |   <NA>

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  10.76   84.98  106.12  112.82  134.93  621.56     756

Example of data for canton BE


The downloaded binary packages are in
    /var/folders/cw/01mf9_sx1hqc0f2jc48w_dtw0000gn/T//RtmpXBqdl5/downloaded_packages

Note: Appenzell Innerrhoden & Uri have zero data!

Final link & save

w_deaths_2015_2020_year_fin <- w_deaths_2015_2020_year_pop %>% 
  left_join(ssep3_gem_pop_geo %>% 
              st_drop_geometry() %>% 
              select(-pop)) %>% 
  left_join(gg21_merged %>% 
              st_drop_geometry() %>% 
              select(-GMDNAME, -KTNAME, -BZNR, -BZNAME)
  )

write_rds(w_deaths_2015_2020_year_fin, "data/BfS-closed/monthly_deaths/w_deaths_2015_2020_year_fin.Rds")