2020 excess mortality & voting patterns in CH

Ancillary data preparation

Population

Data extracted from px-x-0103010000_201:

Ständige und nichtständige Wohnbevölkerung nach institutionellen Gliederungen, Staatsangehörigkeit (Kategorie), Geburtsort, Geschlecht und Altersklasse

Important info from footnotes:

Letzte Änderungen: Neuer Datensatz (Jahr 2020) Stand der Datenbank: Juni 2021 Stichtag: 31. Dezember Raumbezug: Gemeinden / 18.10.2020 Datenquelle: Statistik der Bevölkerung und der Haushalte STATPOP Definition der ständigen Wohnbevölkerung

Community codes

Community mutations data after 2021-01-01 from BfS are integrated into the data and pops are recalculated using these new codes to match mortality.

histcomm <- read_rds("data/BfS/histcomm.Rds") %>% 
  filter(datum_der_aufnahme >= ymd("2021-01-01")) %>% 
  select(bfs_gde_num_old, bfs_gde_num_new, gemeindename_new)

Further corrections of municipalities mutations are needed to bring the data to the state of 2022-01-01.

Data preps

Data aggregated to age groups as in deaths dataset.

pop <- read_xlsx("data-raw/BfS/px-x-0103010000_201.xlsx", 
                 col_types = c("numeric", 
                               "skip", "numeric", "text", "skip", 
                               "skip", "skip", "text", "skip", "skip", 
                               "skip", "text", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric", "numeric", "numeric", 
                               "numeric"), 
                 skip = 1) %>%
  remove_empty(c("rows", "cols")) %>% clean_names() %>% 
  rename(year = x1, 
         GMDNR = x2,
         GMDNAME = x3) %>% 
  mutate(sex = if_else(x5 == "Frau", "Female", "Male"),
         nationality = if_else(x4 == "Schweiz", "Swiss", "Foreigner"),
         year = as.integer(year)) %>% 
  select(-x4, -x5) %>% 
  relocate(sex, .after = GMDNAME) %>% 
  relocate(nationality, .after = sex) %>% 
  mutate(GMDNAME = word(GMDNAME, 2, -1)) %>% 
  fill(year, GMDNR, GMDNAME, nationality) %>% 
  mutate(`<40` = x0_4_jahre + x5_9_jahre + x10_14_jahre + x15_19_jahre + 
           x20_24_jahre + x25_29_jahre + x30_34_jahre + x35_39_jahre) %>% 
  select(-x0_4_jahre, -x5_9_jahre, -x10_14_jahre, -x15_19_jahre,  
         -x20_24_jahre, -x25_29_jahre, -x30_34_jahre, -x35_39_jahre) %>% 
  mutate(`40-49` = x40_44_jahre + x45_49_jahre) %>% 
  mutate(`50-59` = x50_54_jahre + x55_59_jahre) %>% 
  mutate(`60-69` = x60_64_jahre + x65_69_jahre) %>% 
  mutate(`70-79` = x70_74_jahre + x75_79_jahre) %>% 
  select(-x40_44_jahre, -x45_49_jahre, -x50_54_jahre, -x55_59_jahre,
         -x60_64_jahre, -x65_69_jahre, -x70_74_jahre, -x75_79_jahre) %>% 
  mutate(`80+` = x80_84_jahre +x85_89_jahre + x90_94_jahre +
           x95_99_jahre + x100_jahre_und_mehr) %>% 
  select(-x80_84_jahre, -x85_89_jahre, -x90_94_jahre,
         -x95_99_jahre, -x100_jahre_und_mehr) %>% 
  pivot_longer(cols = `<40`:`80+`, 
               names_to = "age", values_to = "pop") %>% 
  
  # BfS identified changes
  left_join(histcomm, by = c("GMDNR" = "bfs_gde_num_old")) %>% 
  mutate(GMDNR = if_else(!is.na(bfs_gde_num_new), bfs_gde_num_new, GMDNR),
         GMDNAME = if_else(!is.na(gemeindename_new), gemeindename_new, GMDNAME)) %>% 
  select(-bfs_gde_num_new, -gemeindename_new) %>% 
  
  # Essertes merge
  mutate(GMDNR = if_else(GMDNAME == "Essertes", 5805, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Essertes", "Oron", GMDNAME)) %>% 
  
  # now to reach the state of `2022-01-01`
  mutate(GMDNR = if_else(GMDNAME == "Bad Zurzach", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Bad Zurzach", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Baldingen", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Baldingen", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Böbikon", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Böbikon", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Kaiserstuhl", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Kaiserstuhl", "Zurzach", GMDNAME)) %>%
  mutate(GMDNR = if_else(GMDNAME == "Rekingen (AG)", 4324, GMDNR)) %>%
  mutate(GMDNAME = if_else(GMDNAME == "Rekingen (AG)", "Zurzach", GMDNAME)) %>%
  mutate(GMDNR = if_else(GMDNAME == "Rietheim", 4324, GMDNR)) %>%
  mutate(GMDNAME = if_else(GMDNAME == "Rietheim", "Zurzach", GMDNAME)) %>%
  mutate(GMDNR = if_else(GMDNAME == "Rümikon", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Rümikon", "Zurzach", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Wislikofen", 4324, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Wislikofen", "Zurzach", GMDNAME)) %>% 
  # Böztal merge
  mutate(GMDNR = if_else(GMDNAME == "Bözen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Bözen", "Böztal", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Elfingen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Elfingen", "Böztal", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Effingen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Effingen", "Böztal", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Hornussen", 4185, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Hornussen", "Böztal", GMDNAME)) %>% 
  # Blonay - Saint-Légier merge
  mutate(GMDNR = if_else(GMDNAME == "Blonay", 5892, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Blonay", "Blonay - Saint-Légier", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Saint-Légier-La Chiésaz", 5892, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Saint-Légier-La Chiésaz", "Blonay - Saint-Légier", GMDNAME)) %>% 
  # Murten merge
  mutate(GMDNR = if_else(GMDNAME == "Galmiz", 2275, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Galmiz", "Murten", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Gempenach", 2275, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Gempenach", "Murten", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Clavaleyres", 2275, GMDNR)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Clavaleyres", "Murten", GMDNAME)) %>% 
  
  # Schwende-Rüte merge
  mutate(GMDNAME = if_else(GMDNAME == "Schwende", "Schwende-Rüte", GMDNAME)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Rüte", "Schwende-Rüte", GMDNAME)) %>% 
  mutate(GMDNR = if_else(GMDNAME == "Schwende-Rüte", 3112, GMDNR)) %>% 
  # Val Mara merge
  mutate(GMDNAME = if_else(GMDNAME == "Melano", "Val Mara", GMDNAME)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Maroggia", "Val Mara", GMDNAME)) %>% 
  mutate(GMDNAME = if_else(GMDNAME == "Rovio", "Val Mara", GMDNAME)) %>%  
  mutate(GMDNR = if_else(GMDNAME == "Val Mara", 5240, GMDNR)) %>%  
  
  group_by(year, GMDNR, GMDNAME, sex, age) %>% 
  summarise(pop = as.integer(sum(pop))) %>% 
  ungroup() %>% 
  arrange(year, GMDNR, sex, age)

Yearly totals (on 31st Dec!):

# A tibble: 7 × 2
   year pop      
  <int> <chr>    
1  2014 8,237,666
2  2015 8,327,126
3  2016 8,419,550
4  2017 8,484,130
5  2018 8,544,527
6  2019 8,606,033
7  2020 8,670,300

Age (40 plus!) distributions over years

Predicting 2020 population

For each age, sex, nationality stratum, for each municipality separately, Poisson model was fitted using the 2014-2019 data; then prediction was made for 2020 & 2021 years. The gist of this operation was to exclude the effect of pandemic from the pop counts in years affected.

pop_ext_poi <- pop %>% 
  filter(year != 2020) %>% 
  # filter(GMDNR == 261 | GMDNR == 389) %>%
  group_by(GMDNR, age, sex) %>% 
  do(glm(pop ~ year, data = ., family = "poisson") %>%
       predict(., newdata = tibble(year = as.integer(2020)), type = "response") %>%
       tibble(year = as.integer(2020), pop_ext_poi = .)) %>% 
  ungroup() %>% 
  mutate(pop_ext_poi = as.integer(round(pop_ext_poi))) %>% 
  arrange(year, GMDNR, sex, age) %>% 
  relocate(year)

Summary of values:

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
     0.0     42.0    109.0    338.5    283.0 117341.0        2 

Note: 2 NAs!

# A tibble: 7 × 6
   year GMDNR GMDNAME sex   age     pop
  <int> <dbl> <chr>   <chr> <chr> <int>
1  2014  4232 Geltwil Male  80+       0
2  2015  4232 Geltwil Male  80+       0
3  2016  4232 Geltwil Male  80+       0
4  2017  4232 Geltwil Male  80+       0
5  2018  4232 Geltwil Male  80+       0
6  2019  4232 Geltwil Male  80+       1
7  2020  4232 Geltwil Male  80+       3
# A tibble: 1 × 5
   year GMDNR age   sex   pop_ext_poi
  <int> <dbl> <chr> <chr>       <int>
1  2020  4232 80+   Male           NA
# A tibble: 7 × 6
   year GMDNR GMDNAME   sex   age     pop
  <int> <dbl> <chr>     <chr> <chr> <int>
1  2014   389 Meienried Male  80+       0
2  2015   389 Meienried Male  80+       0
3  2016   389 Meienried Male  80+       0
4  2017   389 Meienried Male  80+       0
5  2018   389 Meienried Male  80+       0
6  2019   389 Meienried Male  80+       1
7  2020   389 Meienried Male  80+       1
# A tibble: 1 × 5
   year GMDNR age   sex   pop_ext_poi
  <int> <dbl> <chr> <chr>       <int>
1  2020   389 80+   Male           NA

Results in large municipality

Results for large municipality are fine. Predictions are in red.

For small municipality we might end up with sth more wiggly.

Note: lack of prediction for 2020 & 2021 for oldest males!

There are two municipalities with missing predictions for oldest males:

# A tibble: 2 × 4
  GMDNR age   sex   GMDNAME  
  <dbl> <chr> <chr> <chr>    
1   389 80+   Male  Meienried
2  4232 80+   Male  Geltwil  

Population in this strata was replaced by estimates of a simple linear model.

pop_ext_lm <- pop %>%
  filter(year != 2020) %>%
  group_by(GMDNR, age, sex) %>%
  do(lm(pop ~ year, data = .) %>%
       predict(., newdata = tibble(year = as.integer(2020))) %>%
       tibble(year = as.integer(2020), pop_ext_lm = .)) %>%
  ungroup() %>%
  mutate(pop_ext_lm = as.integer(round(pop_ext_lm))) %>%
  arrange(year, GMDNR, sex, age) %>%
  relocate(year)
pop_complete <- pop %>% 
  
  # extrapolated 2020
  left_join(pop_ext_poi) %>% 
  mutate(pop_ext_poi = if_else(year < 2020, 
                               pop, pop_ext_poi)) %>% 
  
  # replacing 2 poi missings with lm
  left_join(pop_ext_lm) %>% 
  mutate(pop_ext_poi = if_else(year == 2020 & is.na(pop_ext_poi), 
                               pop_ext_lm, pop_ext_poi)) %>% 
  select(-pop_ext_lm) 

Differences in 2020 pops

Comparing estimates to real values

pop_compare <- pop %>% 
  
  filter(year == 2020) %>% 
  select(-year) %>% 
  
  # poi model with 2 missings
  left_join(pop_ext_poi) %>% 
  
  # lm model
  left_join(pop_ext_lm) %>% 
  mutate(pop_ext_poi = if_else(is.na(pop_ext_poi), 
                               pop_ext_lm, pop_ext_poi)) %>% 
  select(-pop_ext_lm) %>% 
  
  # diffs
  mutate(pop_diff = pop_ext_poi - pop,
         perc_diff = (pop_diff / pop_ext_poi) * 100,
         alt = pop * 100) %>% 
  mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
         perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>% 
  select(-alt)

Differences

frq(pop_compare, pop_ext_poi > pop) 
pop_ext_poi > pop <lgl> 
# total N=25740 valid N=25740 mean=0.50 sd=0.50

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 12931 | 50.24 |   50.24 |  50.24
TRUE  | 12809 | 49.76 |   49.76 | 100.00
<NA>  |     0 |  0.00 |    <NA> |   <NA>
frq(pop_compare, pop_ext_poi < pop) 
pop_ext_poi < pop <lgl> 
# total N=25740 valid N=25740 mean=0.43 sd=0.50

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 14670 | 56.99 |   56.99 |  56.99
TRUE  | 11070 | 43.01 |   43.01 | 100.00
<NA>  |     0 |  0.00 |    <NA> |   <NA>
frq(pop_compare, pop_ext_poi == pop)
pop_ext_poi == pop <lgl> 
# total N=25740 valid N=25740 mean=0.07 sd=0.26

Value |     N | Raw % | Valid % | Cum. %
----------------------------------------
FALSE | 23879 | 92.77 |   92.77 |  92.77
TRUE  |  1861 |  7.23 |    7.23 | 100.00
<NA>  |     0 |  0.00 |    <NA> |   <NA>
summary(pop_compare$pop_diff)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-391.000   -4.000    0.000    1.649    5.000 2594.000 
summary(pop_compare$perc_diff)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
-400.0000   -3.9062    0.0000   -0.1833    4.7059  300.0000 

Age group specific diffs

Municipality specific diffs

pop_compare_agg <- pop_compare %>% 
  filter(age != "<40") %>% 
  group_by(GMDNR) %>% 
  summarise(pop = sum(pop),
            pop_ext_poi = sum(pop_ext_poi)) %>% 
  ungroup() %>% 
  mutate(pop_diff = pop_ext_poi - pop,
         perc_diff = (pop_diff / pop_ext_poi) * 100,
         alt = pop * 100) %>% 
  mutate(perc_diff = if_else(pop_diff == 0, 0, perc_diff),
         perc_diff = if_else(pop_ext_poi == 0, alt, perc_diff)) %>% 
  select(-alt)

gg_centr <- read_rds("data/BfS/gg.Rds") %>% 
  select(GMDNR, GMDNAME) %>% 
  st_centroid() %>% 
  left_join(pop_compare_agg)
summary(pop_compare_agg$pop_diff)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-214.000   -5.000    5.000    7.896   16.000  548.000 
summary(pop_compare_agg$perc_diff)
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
-15.5556  -0.4819   0.5505   0.7524   1.8628  31.4286 

Getting mid-year pops

For each age, sex, nationality stratum, for each municipality separately, simple mean of the two years of data is used to estimate the middle point. In such case we use, for instance data from 31st Dec 2014 and 31st Dec 2015 to estimate pop mid-2015.

for (i in 2014:2019){
  
  result <- pop_complete %>% 
    filter(year >= i & year <= i + 1) %>% 
    # filter(GMDNR == 261 | GMDNR == 389) %>%
    group_by(GMDNR, age, sex) %>% 
    summarise(pop_mid_poi = mean(pop_ext_poi)) %>% 
    ungroup() %>% 
    mutate(year = as.integer(i + 1),
           pop_mid_poi = as.integer(round(pop_mid_poi))) %>% 
    arrange(GMDNR, sex, age) %>% 
    relocate(year)
  
  if (i == 2014) {
    pop_mid_poi <- result
  } else {
    pop_mid_poi <- bind_rows(pop_mid_poi, result)
  }
  
}

Large municipality

Again, results for large municipality are fine:

Small municipality

Again, for small municipality we might end up with sth more wiggly.

Deaths > pop problem

frq(w_deaths_2015_2020_year_pop, observed > pop_mid_poi)
observed > pop_mid_poi <lgl> 
# total N=154440 valid N=154440 mean=0.00 sd=0.01

Value |      N | Raw % | Valid % | Cum. %
-----------------------------------------
FALSE | 154420 | 99.99 |   99.99 |  99.99
TRUE  |     20 |  0.01 |    0.01 | 100.00
<NA>  |      0 |  0.00 |    <NA> |   <NA>
surplus <- w_deaths_2015_2020_year_pop %>% 
  filter(observed > pop_mid_poi) %>% 
  arrange(GMDNR, year) %>% 
  mutate(difference = pop_mid_poi - observed)

There are 20 strata from 19 communities where count of deaths is larger than count of pop. The difference is always 1 in plus over pop 0.

In all these cases pop was increased by 1 to match the count of deaths!

w_deaths_2015_2020_year_pop %<>% 
  mutate(pop_mid_poi = if_else(observed > pop_mid_poi, observed, pop_mid_poi))

Cartogram

Using 2019 pop counts (ignoring age structure) to derive cartograms to avoid effects of pandemic.

Communities & pops

R solution 1

Using cartogram package.

# takes a while!
carto <- carto_data %>%
  cartogram::cartogram_cont("pop")

write_rds(carto, "data/carto/carto.Rds")

carto_ncont <- carto_data %>% 
  cartogram::cartogram_ncont("pop")

write_rds(carto_ncont, "data/carto/carto_ncont.Rds")

carto_dorling <- carto_data %>% 
  cartogram::cartogram_dorling("pop")

write_rds(carto_dorling, "data/carto/carto_dorling.Rds")
carto_gsm <- carto_data %>%
  cartogramR::cartogramR(count = "pop", method = "gsm")

write_rds(carto_gsm, "data/carto/carto_gsm.Rds")

ScapeToad version

carto_data %>% 
  st_write("data/carto/in_gg.shp", delete_dsn = TRUE)

se %>% 
  select(GMDNR, GMDNAME) %>% 
  st_write("data/carto/in_se.shp", delete_dsn = TRUE)

# java -Xmx1g -jar "C:\Program Files\ScapeToad-v11\ScapeToad.jar"

‘Classic’ cartogram

Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering

Noncontinuous cartogram

Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering

Dorling carotgram

Committee Member: 1(1) 2(1) 3(1) 4(1) 5(1) 6(1) 7(1) 8(1) 9(1) 10(1)
Computing Hierarchical Clustering

Swiss-SEP

sep3 <- read_rds("../SNC_Swiss-SEP2/FINAL/RDS/ssep3_user_geo.Rds") %>%
  select(gisid, ssep3, ssep3_q) %>% 
  st_transform(crs = 2056)

Using 1,527,173 n’hoods from version 3.0:

ssep3_q <integer> 
# total N=1527173 valid N=1527173 mean=3.00 sd=1.41

Value |      N | Raw % | Valid % | Cum. %
-----------------------------------------
    1 | 305439 |    20 |      20 |     20
    2 | 305434 |    20 |      20 |     40
    3 | 305432 |    20 |      20 |     60
    4 | 305439 |    20 |      20 |     80
    5 | 305429 |    20 |      20 |    100
 <NA> |      0 |     0 |    <NA> |   <NA>

This is aggregated to the level of 2,145 communities from swisstopo.

STATPOP

statpop2019 <- read_delim("data-raw/BfS-closed/STATPOP/statpop2019_220098p.zip", 
                          delim = ";", escape_double = FALSE, 
                          col_types = cols(STATYEAR = col_integer(), 
                                           SEX = col_integer(), 
                                           TYPEOFRESIDENCE = col_integer(), 
                                           POPULATIONTYPE = col_integer(), 
                                           AGE = col_integer(), 
                                           CLASSAGEFIVEYEARS = col_integer(), 
                                           NATIONALITYCATEGORY = col_integer(), 
                                           MAINRESIDENCECATEGORY = col_integer(), 
                                           GEOCOORDE = col_integer(), 
                                           GEOCOORDN = col_integer(), 
                                           INDIC_EGID = col_integer(), 
                                           statdate = col_date(format = "%d/%m/%Y")), 
                          trim_ws = TRUE) %>% 
  remove_empty(c("rows", "cols")) %>% clean_names()  

statpop2019_agg <- statpop2019 %>% 
  rename(egid = federalbuildingid) %>% 
  filter(typeofresidence == 1) %>% 
  filter(populationtype == 1) %>% 
  filter(mainresidencecategory == 1) %>% 
  filter(indic_egid == 1) %>% 
  select(-statyear, -statdate, 
         -typeofresidence, -populationtype, -mainresidencecategory,
         -indic_egid) %>% 
  group_by(egid, geocoorde, geocoordn) %>% 
  summarise(pop = n()) %>% 
  ungroup() %>% 
  st_as_sf(coords = c("geocoorde", "geocoordn"), 
           crs = 2056,
           remove = TRUE)

Using data from STATPOP 2019.

Dataset consists of 8,812,965 individuals and 1,540,622 buildings. We selected individuals with type of residence Hauptwohnsitz, pop type Ständige Wohnbevölkerung, main residency category Nur ein Hauptwohnsitz and with valid EGID building ID.

The nearest building with SSEP was assigned for each of these building. The point dataset was then overlaid with community boundaries and summary measures of pop based index were created and pop level SEP was aggregated by community.

’nhood based averages for communities

# # or with full version?
# st_gg <- st_read("data-raw/BfS/ag-b-00.03-875-gg/ggg_2021-LV95/shp/g1g21_01072021.shp",
#           as_tibble = TRUE)

ssep3_gem <- 
  st_join(sep3, 
          st_gg, 
          join = st_intersects) 

# tm_shape(gg) +
#   tm_polygons() +
#   tm_shape(ssep3_gem %>% filter(is.na(GMDNR))) +
#   tm_dots()

ssep3_gem_neigh <- ssep3_gem %>% 
  st_drop_geometry() %>% 
  group_by(GMDNR) %>% 
  summarise(n = n(),
            ssep3_median = median(ssep3)) %>% 
  ungroup() 

ssep3_gem_neigh_geo <- inner_join(gg, ssep3_gem_neigh) %>% 
  filter(!is.na(ssep3_median)) %>% 
  mutate(median_ssep3_q = ntile(ssep3_median, 5)) %>% 
  mutate(median_ssep3_q = factor(median_ssep3_q,
                                 levels = 1:5,
                                 labels = c("1st - lowest", 
                                            "2nd",  
                                            "3rd quintile", 
                                            "4th", 
                                            "5th - highest")))

write_rds(ssep3_gem_neigh_geo, "data/BfS/ssep3_gem_neigh_geo.rds")

Watch out for small n! Here some examples with communities below 20 n’hoods

Final map using median index - bubbles scaled to number of ’nhoods within community.

Population based averages for communities

# join to sep3
statpop2019_agg_sep <- st_join(statpop2019_agg, sep3, join = st_nearest_feature)

# # fails for unknown and unsolved reasons
# nearest <- st_nearest_feature(statpop2019_agg, sep3)
# statpop2019_agg_sep$dist3 <- st_distance(statpop2019_agg_sep, sep3[nearest, ], by_element = TRUE)
# rm(nearest, statpop2019_agg); gc()
# summary(statpop2019_agg_sep$dist3)

statpop2019_agg_sep_gem <- 
  st_join(statpop2019_agg_sep, st_gg, join = st_intersects) %>%  
  relocate(egid, gisid, pop,
           ssep3, ssep3_q,
           GMDNR) 

write_rds(statpop2019_agg_sep_gem, "data/BfS-closed/SEP/statpop2019_agg_sep_gem.Rds")
ssep3_gem_pop <- statpop2019_agg_sep_gem %>% 
  select(GMDNR, ssep3, pop) %>% 
  uncount(pop) %>% 
  group_by(GMDNR) %>% 
  summarise(pop = n(),
            ssep3_median = median(ssep3)) %>% 
  ungroup() 

# left_join(gg, ssep3_gem_pop) %>% 
#   filter(is.na(ssep3_median))

ssep3_gem_pop_geo <- 
  left_join(gg, ssep3_gem_pop) %>% 
  mutate(median_ssep3_q = ntile(ssep3_median, 5)) %>% 
  mutate(median_ssep3_q = factor(median_ssep3_q,
                                 levels = 1:5,
                                 labels = c("1st - lowest", 
                                            "2nd",  
                                            "3rd quintile", 
                                            "4th", 
                                            "5th - highest")))

write_rds(ssep3_gem_pop_geo, "data/BfS/ssep3_gem_pop_geo.rds")

Final map using median index - bubbles scaled to number of people (STATPOP 2020)

Raumgliederungen

Data from the app. State as of 2021-07-01 to match spatial data nicely.

raum <- read_xlsx("data-raw/BfS/Raumgliederungen.xlsx", 
                  skip = 1) %>% 
  remove_empty(c("rows", "cols")) %>% clean_names() %>% 
  filter(! is.na(bfs_gde_nummer)) %>% 
  rename(GMDNR = bfs_gde_nummer,
         GMDNAME = gemeindename,
         KTNAME = kanton,
         BZNR = bezirks_nummer,
         BZNAME = bezirksname) %>% 
  select(GMDNR, GMDNAME, KTNAME, BZNR, BZNAME, 
         stadtische_landliche_gebiete, sprachgebiete,
         urbanisierungsgrad_2011_degurba_eurostat,
         gemeindetypologie_2012_9_typen, gemeindetypologie_2012_25_typen) %>% 
  rename(gemtyp_9 = gemeindetypologie_2012_9_typen,
         gemtyp_25 = gemeindetypologie_2012_25_typen) %>% 
  mutate(gemtyp_9 = as.integer(gemtyp_9),
         gemtyp_25 = as.integer(gemtyp_25)) %>% 
  mutate(
    r_urban1 = case_when(
      stadtische_landliche_gebiete == 1 ~  "Urban",
      stadtische_landliche_gebiete == 2 ~  "Periurban",
      stadtische_landliche_gebiete == 3 ~  "Rural",
      TRUE ~  ""),
    r_urban2 = case_when(
      urbanisierungsgrad_2011_degurba_eurostat == 1 ~  "Dense",
      urbanisierungsgrad_2011_degurba_eurostat == 2 ~  "Medium",
      urbanisierungsgrad_2011_degurba_eurostat == 3 ~  "Low",
      TRUE ~  ""),
    r_lang = case_when(
      sprachgebiete == 1 ~  "German",
      sprachgebiete == 2 ~  "French",
      sprachgebiete == 3 ~  "Italian",
      sprachgebiete == 4 ~  "Romansh",
      TRUE ~  "")
  ) %>% 
  select(-stadtische_landliche_gebiete, -sprachgebiete,
         -urbanisierungsgrad_2011_degurba_eurostat) %>% 
  left_join(read_xlsx("data-raw/BfS/Raumgliederungen.xlsx", 
                      skip = 1, sheet = "CH1+CL_GDET9+2012.1") %>% 
              remove_empty(c("rows", "cols")) %>% clean_names() %>% 
              rename(gemtyp_9 = code,
                     gemtyp_9_lab = label) %>% 
              mutate(gemtyp_9 = as.integer(gemtyp_9))) %>% 
  relocate(gemtyp_9_lab, .after = gemtyp_9) %>% 
  left_join(read_xlsx("data-raw/BfS/Raumgliederungen.xlsx", 
                      skip = 1, sheet = "CH1+CL_GDET25+2012.1") %>% 
              remove_empty(c("rows", "cols")) %>% clean_names() %>% 
              rename(gemtyp_25 = code,
                     gemtyp_25_lab = label) %>% 
              mutate(gemtyp_25 = as.integer(gemtyp_25))) %>% 
  relocate(gemtyp_25_lab, .after = gemtyp_25) 

Urbanization 1

x <categorical> 
# total N=2145 valid N=2145 mean=2.30 sd=0.81

Value     |    N | Raw % | Valid % | Cum. %
-------------------------------------------
Urban     |  475 | 22.14 |   22.14 |  22.14
Periurban |  560 | 26.11 |   26.11 |  48.25
Rural     | 1110 | 51.75 |   51.75 | 100.00
<NA>      |    0 |  0.00 |    <NA> |   <NA>

Urbanization 2

Using DEGURBA.

x <categorical> 
# total N=2145 valid N=2145 mean=2.48 sd=0.58

Value  |    N | Raw % | Valid % | Cum. %
----------------------------------------
Dense  |   98 |  4.57 |    4.57 |   4.57
Medium |  911 | 42.47 |   42.47 |  47.04
Low    | 1136 | 52.96 |   52.96 | 100.00
<NA>   |    0 |  0.00 |    <NA> |   <NA>

Language region

x <categorical> 
# total N=2145 valid N=2145 mean=1.40 sd=0.59

Value   |    N | Raw % | Valid % | Cum. %
-----------------------------------------
German  | 1406 | 65.55 |   65.55 |  65.55
French  |  618 | 28.81 |   28.81 |  94.36
Italian |  121 |  5.64 |    5.64 | 100.00
<NA>    |    0 |  0.00 |    <NA> |   <NA>

Typology of communities - 9

x <character> 
# total N=2145 valid N=2145 mean=4.54 sd=2.47

Value                                                                 |   N | Raw % | Valid % | Cum. %
------------------------------------------------------------------------------------------------------
Ländliche periphere Gemeinde                                          | 257 | 11.98 |   11.98 |  11.98
Ländliche zentral gelegene Gemeinde                                   | 384 | 17.90 |   17.90 |  29.88
Ländliche Zentrumsgemeinde                                            |  84 |  3.92 |    3.92 |  33.80
Periurbane Gemeinde geringer Dichte                                   | 469 | 21.86 |   21.86 |  55.66
Periurbane Gemeinde hoher Dichte                                      | 108 |  5.03 |    5.03 |  60.70
Periurbane Gemeinde mittlerer Dichte                                  | 368 | 17.16 |   17.16 |  77.86
Städtische Gemeinde einer grossen Agglomeration                       | 160 |  7.46 |    7.46 |  85.31
Städtische Gemeinde einer kleinen oder ausserhalb einer Agglomeration | 122 |  5.69 |    5.69 |  91.00
Städtische Gemeinde einer mittelgrossen Agglomeration                 | 193 |  9.00 |    9.00 | 100.00
<NA>                                                                  |   0 |  0.00 |    <NA> |   <NA>

Typology of communities - 25

x <character> 
# total N=2145 valid N=2145 mean=13.18 sd=5.74

Value                                                                                |   N | Raw % | Valid % | Cum. %
---------------------------------------------------------------------------------------------------------------------
Dienstleistungsgemeinde eines ländlichen Zentrums                                    |  20 |  0.93 |    0.93 |   0.93
Industriegemeinde eines ländlichen Zentrums                                          |  43 |  2.00 |    2.00 |   2.94
Kernstadt einer grossen Agglomeration                                                |   5 |  0.23 |    0.23 |   3.17
Kernstadt einer mittelgrossen Agglomeration                                          |  28 |  1.31 |    1.31 |   4.48
Ländliche periphere Agrargemeinde                                                    |  65 |  3.03 |    3.03 |   7.51
Ländliche periphere Mischgemeinde                                                    | 152 |  7.09 |    7.09 |  14.59
Ländliche periphere Tourismusgemeinde                                                |  40 |  1.86 |    1.86 |  16.46
Ländliche zentral gelegene Agrargemeinde                                             | 119 |  5.55 |    5.55 |  22.00
Ländliche zentral gelegene Dienstleistungsgemeinde                                   | 118 |  5.50 |    5.50 |  27.51
Ländliche zentral gelegene Industriegemeinde                                         | 147 |  6.85 |    6.85 |  34.36
Periurbane Agrargemeinde geringer Dichte                                             | 165 |  7.69 |    7.69 |  42.05
Periurbane Dienstleistungsgemeinde geringer Dichte                                   | 178 |  8.30 |    8.30 |  50.35
Periurbane Dienstleistungsgemeinde hoher Dichte                                      |  53 |  2.47 |    2.47 |  52.82
Periurbane Dienstleistungsgemeinde mittlerer Dichte                                  | 191 |  8.90 |    8.90 |  61.72
Periurbane Industriegemeinde geringer Dichte                                         | 126 |  5.87 |    5.87 |  67.60
Periurbane Industriegemeinde hoher Dichte                                            |  55 |  2.56 |    2.56 |  70.16
Periurbane Industriegemeinde mittlerer Dichte                                        | 177 |  8.25 |    8.25 |  78.41
Städtische Arbeitsplatzgemeinde einer grossen Agglomeration                          |  63 |  2.94 |    2.94 |  81.35
Städtische Arbeitsplatzgemeinde einer mittelgrossen Agglomeration                    |  62 |  2.89 |    2.89 |  84.24
Städtische Dienstleistungsgemeinde einer kleinen oder ausserhalb einer Agglomeration |  49 |  2.28 |    2.28 |  86.53
Städtische Industriegemeinde einer kleinen oder ausserhalb einer Agglomeration       |  63 |  2.94 |    2.94 |  89.46
Städtische Tourismusgemeinde einer kleinen oder ausserhalb einer Agglomeration       |  10 |  0.47 |    0.47 |  89.93
Städtische Wohngemeinde einer grossen Agglomeration                                  |  92 |  4.29 |    4.29 |  94.22
Städtische Wohngemeinde einer mittelgrossen Agglomeration                            | 103 |  4.80 |    4.80 |  99.02
Tourismusgemeinde eines ländlichen Zentrums                                          |  21 |  0.98 |    0.98 | 100.00
<NA>                                                                                 |   0 |  0.00 |    <NA> |   <NA>

Voting

Data from swissdd package.

Linked to municipality boundaries from 2022-01-01.

st_gg <- read_rds("data/swisstopo/st_gg.Rds")

June vote

Bundesgesetz vom 25.09.2020 über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz)

Data

covid_jun <- get_nationalvotes(votedates = "2021-06-13", 
                               geolevel = "municipality") %>% 
  filter(str_detect(name, "Covid-19")) %>% 
  select(-name, -id)

Dataset consists of 2,141 communities. Votes from abroad are excluded.

Results preview

November vote

Änderung des Bundesgesetzes über die gesetzlichen Grundlagen für Verordnungen des Bundesrates zur Bewältigung der Covid-19-Epidemie (Covid-19-Gesetz) (Härtefälle, Arbeitslosenversicherung, familienergänzende Kinderbetreuung, Kulturschaffende, Veranstaltungen)

Data

covid_nov <- get_nationalvotes(votedates = "2021-11-28", 
                               geolevel = "municipality") %>% 
  filter(str_detect(name, "Covid-19")) %>% 
  select(-name, -id)

Dataset consists of 2,141 communities. Votes from abroad are excluded.

Results preview

Mobility & restrictions

Google

Google community mobility reports from COVID19 package.

gmr <- covid19(country = "CHE", level = 2, 
               start = "2020-01-01",
               end = "2020-12-31",
               gmr = TRUE, verbose = FALSE) %>% 
  select(-administrative_area_level, 
         -administrative_area_level_1,
         -administrative_area_level_2,
         -administrative_area_level_3,
         -iso_numeric, -iso_currency,
         - iso_alpha_2, -iso_alpha_3, 
         - latitude, -longitude, -population, 
         - key_google_mobility, -key_apple_mobility, -key_jhu_csse, -key_nuts, -key_gadm, 
         -(confirmed:vent)) %>% 
  rename(KTNAME = key_local) %>% 
  relocate(id, date, KTNAME)

Data structure for example of 2020 data:

Data summary
Name gmr
Number of rows 8135
Number of columns 27
_______________________
Column type frequency:
character 2
Date 1
numeric 24
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id 0 1 8 8 0 26 0
KTNAME 0 1 2 2 0 26 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 2020-02-01 2020-12-31 2020-07-28 335

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
school_closing 0 1.00 0.92 1.24 0.00 0.00 0.0 2.00 3 ▇▁▁▂▂
workplace_closing 0 1.00 0.11 2.03 -3.00 -2.00 0.0 2.00 3 ▇▂▂▁▇
cancel_events 0 1.00 1.23 1.43 -2.00 2.00 2.0 2.00 2 ▂▁▁▁▇
gatherings_restrictions 0 1.00 0.95 3.21 -3.00 -3.00 3.0 4.00 4 ▆▁▂▁▇
transport_closing 0 1.00 0.00 0.00 0.00 0.00 0.0 0.00 0 ▁▁▇▁▁
stay_home_restrictions 0 1.00 0.55 0.50 0.00 0.00 1.0 1.00 1 ▆▁▁▁▇
internal_movement_restrictions 0 1.00 0.26 0.44 0.00 0.00 0.0 1.00 1 ▇▁▁▁▃
international_movement_restrictions 0 1.00 2.79 0.73 0.00 3.00 3.0 3.00 3 ▁▁▁▁▇
information_campaigns 0 1.00 1.96 0.25 0.00 2.00 2.0 2.00 2 ▁▁▁▁▇
testing_policy 0 1.00 2.10 0.72 0.00 2.00 2.0 3.00 3 ▁▃▁▇▆
contact_tracing 0 1.00 1.75 0.44 0.00 2.00 2.0 2.00 2 ▁▁▂▁▇
facial_coverings 0 1.00 1.08 1.74 -3.00 0.00 1.0 2.00 3 ▂▁▃▃▇
vaccination_policy 0 1.00 0.03 0.17 0.00 0.00 0.0 0.00 1 ▇▁▁▁▁
elderly_people_protection 0 1.00 1.32 1.88 -3.00 2.00 2.0 2.00 3 ▂▁▁▁▇
government_response_index 0 1.00 -49.55 10.42 -60.42 -57.81 -48.7 -45.05 0 ▇▅▁▁▁
stringency_index 0 1.00 -49.46 15.17 -73.15 -59.26 -46.3 -39.35 0 ▆▅▇▂▁
containment_health_index 0 1.00 -51.14 10.07 -63.33 -60.12 -50.3 -47.92 0 ▇▇▁▁▁
economic_support_index 0 1.00 -38.39 18.15 -62.50 -37.50 -37.5 -37.50 0 ▃▇▁▁▂
retail_and_recreation_percent_change_from_baseline 1960 0.76 -25.80 26.68 -93.00 -42.00 -17.0 -6.00 56 ▂▂▇▂▁
grocery_and_pharmacy_percent_change_from_baseline 2499 0.69 -2.42 19.52 -95.00 -7.00 0.0 6.00 85 ▁▁▇▁▁
parks_percent_change_from_baseline 4338 0.47 25.63 58.22 -77.00 -17.00 12.0 58.00 325 ▇▇▂▁▁
transit_stations_percent_change_from_baseline 608 0.93 -21.20 21.43 -84.00 -35.00 -21.0 -8.00 78 ▂▇▇▁▁
workplaces_percent_change_from_baseline 608 0.93 -23.11 17.14 -90.00 -32.00 -20.0 -12.00 17 ▁▁▅▇▂
residential_percent_change_from_baseline 2650 0.67 7.95 6.99 -6.00 3.00 7.0 11.00 37 ▃▇▂▁▁

Note: description of variables is described here.

Example of workplace data for canton BE

Apple

Apple’s (discontinued) Mobility Trend Reports from covmobility package.

data(apple_mobility)

apple_mobility = apple_mobility %>% 
  filter(country == "Switzerland") %>% 
  filter(date <= ymd("2020-12-31")) %>% 
  filter(geo_type == "sub-region") %>% 
  select(-geo_type, -country, - alternative_name, -sub_region)

Simpler dataset with a 0-Inf score for each day and canton across three trasport modes.

transportation_type <character> 
# total N=16284 valid N=16284 mean=1.74 sd=0.90

Value   |    N | Raw % | Valid % | Cum. %
-----------------------------------------
driving | 9204 | 56.52 |   56.52 |  56.52
transit | 2124 | 13.04 |   13.04 |  69.57
walking | 4956 | 30.43 |   30.43 | 100.00
<NA>    |    0 |  0.00 |    <NA> |   <NA>
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  10.76   84.98  106.12  112.82  134.93  621.56     756 

Example of data for canton BE


The downloaded binary packages are in
    /var/folders/cw/01mf9_sx1hqc0f2jc48w_dtw0000gn/T//RtmpXBqdl5/downloaded_packages

Note: Appenzell Innerrhoden & Uri have zero data!