Preparations in file 00.Rmd
<- read_rds("data/dog_ownership_cost.Rds") %>%
dog_ownership_cost select(-cost_compared_to_other_breeds)
[1] 183
[1] 182
<- read_rds("data/geo/SSC.Rds")
[1] 183
<- read_rds("data/wide_cost_n.Rds")
wide_cost_n <- read_rds("data/wide_cost_p.Rds") wide_cost_p
Summarizing all dogs, and expensive only.
Excluding areas with no dogs.
<- dog_ownership_cost %>%
dog_ownership_agg group_by(SSC_NAME16) %>%
summarise(dogs_exp = sum(expensive))
SSC left_join(dog_ownership_agg) %>%
mutate(dogs_exp_prop = dogs_exp / dogs_total) %>%
relocate(geometry, .after = last_col())
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000 0.1178 0.1366 0.1403 0.1650 0.2636
<- function (seifa_index) {
<- enquo(seifa_index)
SSC st_drop_geometry() %>%
group_by(!!myenc) %>%
summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
sd = sd(dogs_exp_prop, na.rm = TRUE),
p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>%
<- function (seifa_index) {
<- enquo(seifa_index)
SSC st_drop_geometry() %>%
select(!!myenc, dogs_exp_prop) %>%
mutate_if(is.factor, as.numeric) %>%
correlation(method = "kendall")
<- function (seifa_index) {
<- eval(substitute(lm(dogs_exp_prop ~ seifa_index,
model data = SSC, na.action = na.omit)))
<- estimate_means(model)
<- enquo(seifa_index)
aes(x = !!myenc,
y = dogs_exp_prop,
fill = !!myenc)) +
geom_violin(alpha = 0.66) +
geom_jitter2(width = 0.05, alpha = 0.5) +
geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
geom_pointrange(data = means,
aes(y = Mean, ymin = CI_low, ymax = CI_high),
size = 1,
color = "white") +
scale_fill_brewer(palette = "BrBG") +
ylab("Proportion of expensive dogs") +
# A tibble: 10 x 6
IRSD_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.161 0.0443 0.146 0.159 0.184
2 2 0.130 0.0833 0.0913 0.155 0.170
3 3 0.147 0.0258 0.129 0.140 0.160
4 4 0.147 0.0411 0.121 0.144 0.173
5 5 0.145 0.0305 0.130 0.137 0.155
6 6 0.142 0.0350 0.115 0.136 0.156
7 7 0.127 0.0213 0.113 0.128 0.136
8 8 0.148 0.0268 0.125 0.148 0.163
9 9 0.126 0.0351 0.112 0.122 0.141
10 10 0.127 0.0336 0.104 0.114 0.152
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IRSD_d | dogs_exp_prop | -0.19 | [-0.28, -0.10] | -3.69 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IRSD_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.185 0.0179 0.179 0.193 0.195
2 2 0.164 0.0605 0.138 0.161 0.179
3 3 0.142 0.0540 0.125 0.159 0.180
4 4 0.161 0.0163 0.151 0.158 0.159
5 5 0.150 0.0654 0.122 0.160 0.178
6 6 0.122 0.0813 0.0962 0.136 0.166
7 7 0.143 0.0346 0.124 0.142 0.166
8 8 0.150 0.0312 0.130 0.140 0.157
9 9 0.136 0.0321 0.112 0.131 0.149
10 10 0.134 0.0316 0.113 0.127 0.156
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.08] | -3.31 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IRSAD_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.156 0.0641 0.138 0.162 0.186
2 2 0.140 0.0689 0.130 0.156 0.173
3 3 0.143 0.0232 0.127 0.144 0.157
4 4 0.157 0.0408 0.135 0.158 0.185
5 5 0.139 0.0311 0.128 0.135 0.146
6 6 0.137 0.0262 0.115 0.129 0.154
7 7 0.136 0.0354 0.115 0.129 0.146
8 8 0.141 0.0260 0.123 0.132 0.155
9 9 0.132 0.0370 0.114 0.124 0.154
10 10 0.121 0.0315 0.101 0.112 0.127
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IRSAD_d | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IRSAD_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.185 0.0179 0.179 0.193 0.195
2 2 0.145 0.0955 0.0751 0.131 0.201
3 3 0.161 0.0248 0.143 0.161 0.179
4 4 0.138 0.0183 0.131 0.138 0.144
5 5 0.153 0.0777 0.151 0.158 0.176
6 6 0.126 0.0875 0.0641 0.159 0.177
7 7 0.157 0.0258 0.136 0.156 0.173
8 8 0.147 0.0297 0.127 0.144 0.155
9 9 0.144 0.0352 0.129 0.143 0.162
10 10 0.133 0.0319 0.112 0.127 0.149
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.13] | -3.98 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IER_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.162 0.0452 0.136 0.165 0.187
2 2 0.137 0.0331 0.126 0.134 0.154
3 3 0.138 0.0210 0.118 0.137 0.152
4 4 0.144 0.0410 0.115 0.129 0.152
5 5 0.125 0.0702 0.110 0.138 0.165
6 6 0.152 0.0288 0.129 0.156 0.172
7 7 0.132 0.0424 0.113 0.139 0.150
8 8 0.132 0.0256 0.115 0.129 0.153
9 9 0.137 0.0425 0.114 0.122 0.146
10 10 0.144 0.0439 0.114 0.153 0.171
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IER_d | dogs_exp_prop | -0.08 | [-0.18, 0.02] | -1.56 | 0.118
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IER_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.161 0.0440 0.140 0.162 0.183
2 2 0.137 0.0316 0.124 0.132 0.154
3 3 0.145 0.0387 0.117 0.137 0.163
4 4 0.134 0.0263 0.114 0.128 0.144
5 5 0.130 0.0693 0.114 0.143 0.173
6 6 0.145 0.0297 0.127 0.138 0.161
7 7 0.144 0.0340 0.128 0.141 0.154
8 8 0.129 0.0465 0.127 0.130 0.152
9 9 0.139 0.0369 0.115 0.132 0.164
10 10 0.138 0.0412 0.111 0.127 0.164
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IER_d_orig | dogs_exp_prop | -0.08 | [-0.18, 0.01] | -1.57 | 0.116
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IEO_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.156 0.0646 0.133 0.165 0.188
2 2 0.166 0.0265 0.152 0.159 0.178
3 3 0.132 0.0683 0.123 0.144 0.162
4 4 0.146 0.0292 0.123 0.145 0.165
5 5 0.146 0.0385 0.130 0.147 0.162
6 6 0.153 0.0306 0.131 0.152 0.171
7 7 0.133 0.0276 0.117 0.129 0.136
8 8 0.124 0.0304 0.115 0.125 0.132
9 9 0.127 0.0209 0.113 0.127 0.132
10 10 0.119 0.0302 0.106 0.112 0.119
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IEO_d | dogs_exp_prop | -0.30 | [-0.39, -0.21] | -5.84 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
# A tibble: 10 x 6
IEO_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.153 0.0665 0.138 0.179 0.194
2 2 0.175 0.0911 0.131 0.180 0.222
3 3 0.0625 0.0884 0.0312 0.0625 0.0938
4 4 0.172 0.0393 0.145 0.165 0.188
5 5 0.168 0.0221 0.151 0.171 0.175
6 6 0.166 0.0260 0.161 0.176 0.182
7 7 0.136 0.0733 0.127 0.154 0.159
8 8 0.150 0.0320 0.132 0.148 0.162
9 9 0.147 0.0331 0.130 0.147 0.165
10 10 0.130 0.0300 0.112 0.127 0.144
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.35, -0.17] | -4.82 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
data # wide_cost_n %>%
wide_cost_p st_drop_geometry() %>%
select(akita:last_col()) %>%
# View(cov(data))
<- principal_components(data,
pca standardize = FALSE,
n = "auto")
# Loadings from Principal Component Analysis (no rotation)
Variable | PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 | PC13 | PC14 | PC15 | PC16 | PC17 | PC18 | Complexity
akita | -3.34e-03 | 4.88e-04 | -2.13e-04 | 1.37e-03 | -8.24e-05 | 5.68e-04 | -6.24e-04 | 1.73e-04 | -3.23e-04 | 2.15e-03 | -2.67e-05 | 1.32e-03 | -2.15e-04 | 5.46e-03 | -3.37e-03 | -2.55e-03 | -4.01e-05 | -1.49e-04 | 3.84
british_bulldog | -0.04 | 1.16e-03 | -2.24e-03 | -0.01 | -6.08e-03 | -0.03 | 4.39e-03 | -1.67e-04 | -2.62e-03 | 4.79e-04 | 8.09e-04 | -2.79e-04 | -8.18e-04 | 1.11e-04 | 6.19e-05 | -1.73e-04 | 7.54e-05 | 1.62e-05 | 2.30
dogue_de_bordeaux | -9.89e-03 | -5.17e-04 | 2.47e-04 | 9.15e-04 | 7.80e-04 | -4.56e-03 | 4.28e-03 | -1.62e-03 | 0.02 | 2.89e-04 | -6.32e-04 | 4.60e-04 | 2.79e-04 | -6.63e-05 | -3.35e-04 | 9.68e-06 | -9.85e-05 | -1.80e-05 | 2.13
french_bulldog | -0.06 | -0.02 | 3.52e-03 | -0.06 | -6.62e-04 | 6.41e-03 | 6.69e-05 | 1.14e-03 | 7.85e-04 | 9.30e-05 | -2.35e-04 | 2.05e-04 | 7.57e-05 | 7.95e-05 | -4.49e-05 | -6.80e-05 | -4.45e-05 | -4.36e-06 | 2.20
german_shepherd | -0.20 | 0.15 | 0.02 | -2.30e-03 | -2.71e-04 | 9.16e-04 | 1.69e-04 | -3.81e-05 | 1.16e-05 | -3.43e-04 | 1.59e-05 | 6.94e-05 | 3.05e-05 | 2.25e-05 | 1.22e-05 | 6.03e-06 | 8.36e-06 | -7.78e-07 | 1.88
irish_wolfhound | -0.02 | 0.01 | -0.02 | -3.62e-03 | 0.04 | -3.73e-03 | -9.09e-04 | 1.54e-04 | -4.72e-04 | -4.01e-05 | -1.78e-04 | 9.06e-05 | -1.62e-05 | 2.79e-05 | 2.52e-06 | 5.67e-06 | 6.17e-05 | 4.31e-06 | 2.61
maltese | -0.54 | -0.06 | 3.43e-03 | 7.75e-03 | 9.18e-04 | 8.51e-04 | 3.82e-04 | 2.13e-04 | -2.81e-04 | -3.01e-05 | -7.79e-05 | -6.56e-05 | -3.69e-05 | -2.82e-05 | 1.83e-05 | 4.05e-05 | 2.38e-05 | -1.73e-06 | 1.02
rottweiler | -0.08 | 0.03 | -0.07 | -1.57e-03 | -0.01 | 2.84e-03 | -4.65e-04 | -4.86e-05 | 4.33e-04 | 1.88e-04 | 1.62e-04 | -2.87e-05 | -1.96e-04 | -8.98e-05 | -1.68e-05 | 1.23e-05 | -5.54e-05 | 1.38e-06 | 2.40
samoyed | -0.01 | -1.74e-04 | 1.77e-03 | -8.18e-04 | -2.12e-03 | -5.91e-03 | -0.02 | 5.43e-03 | 2.57e-03 | 5.12e-04 | 2.96e-04 | -8.06e-05 | 4.46e-04 | -2.48e-04 | 2.07e-06 | -1.82e-05 | -9.77e-05 | 1.32e-05 | 1.96
yorkshire_terrier | -0.01 | -2.35e-03 | 9.14e-04 | -3.53e-03 | -3.39e-04 | -5.43e-04 | -7.62e-03 | -0.02 | -4.84e-04 | -1.00e-03 | -2.43e-04 | 2.85e-04 | -1.60e-03 | -3.40e-04 | -5.95e-04 | -1.93e-05 | 3.39e-04 | -1.34e-05 | 2.32
chinese_crested_dog | -3.91e-03 | 2.53e-04 | -1.72e-03 | -6.27e-04 | -1.33e-03 | -2.88e-03 | -7.20e-04 | -2.52e-03 | -1.04e-03 | -9.14e-04 | -6.32e-03 | 3.73e-04 | 5.29e-03 | 2.07e-03 | 2.13e-03 | 5.85e-04 | -1.56e-03 | -3.53e-05 | 4.95
chow_chow | -4.87e-03 | -9.81e-04 | -2.39e-04 | 5.99e-05 | -6.66e-05 | -4.58e-04 | 2.52e-04 | -1.35e-03 | -5.98e-04 | 1.49e-03 | 4.36e-03 | 8.12e-03 | 3.57e-03 | -7.17e-04 | 2.54e-04 | 3.92e-04 | 1.40e-06 | 2.78e-05 | 3.00
lowchen | -4.43e-03 | -7.13e-04 | -3.60e-04 | -1.29e-03 | 7.87e-04 | 4.87e-04 | 5.05e-05 | -2.65e-03 | 6.87e-04 | -7.12e-04 | 6.47e-03 | -5.61e-03 | 4.64e-03 | 1.18e-03 | 4.03e-04 | -1.29e-04 | -3.22e-04 | -2.64e-06 | 4.44
saluki | -1.70e-03 | -1.85e-04 | 1.60e-04 | 5.64e-04 | 3.59e-04 | 6.20e-04 | 5.22e-05 | -5.46e-04 | 2.53e-04 | -1.14e-04 | 1.51e-04 | 2.77e-04 | -2.34e-04 | -1.73e-03 | 2.64e-03 | -5.49e-03 | -9.83e-04 | 1.84e-05 | 2.11
pharaoh_hound | -1.17e-03 | 1.03e-03 | -8.24e-04 | -3.59e-04 | -7.39e-04 | -1.17e-03 | 6.53e-04 | 9.25e-05 | -8.86e-04 | 7.20e-04 | -2.83e-03 | -1.08e-03 | 4.17e-03 | -3.01e-03 | -3.22e-03 | -1.41e-03 | 2.83e-03 | -8.12e-06 | 6.25
st_bernard | -3.52e-03 | 3.64e-03 | 1.52e-03 | 2.41e-04 | 6.54e-04 | 1.26e-03 | 6.03e-05 | -1.92e-03 | -2.81e-04 | 0.01 | -5.33e-04 | -1.50e-03 | -1.13e-04 | -4.82e-04 | 1.03e-03 | 4.96e-04 | -2.00e-04 | 4.08e-05 | 1.61
tibetan_mastiff | -1.52e-03 | 2.72e-04 | 5.26e-04 | 2.60e-05 | 4.69e-04 | -4.21e-04 | 5.78e-04 | -7.54e-05 | -8.27e-04 | 2.31e-04 | -6.56e-05 | -4.26e-04 | 3.87e-04 | -2.48e-03 | -3.60e-03 | 1.23e-04 | -4.28e-03 | 2.67e-05 | 3.21
canadian_eskimo_dog | -1.88e-04 | -5.72e-05 | 7.13e-05 | 1.57e-05 | 2.74e-06 | 1.02e-04 | 2.73e-05 | 7.69e-05 | 4.26e-05 | -5.43e-05 | -1.47e-04 | 4.45e-07 | 2.18e-05 | 7.49e-05 | 1.94e-05 | 4.83e-05 | -3.04e-05 | -1.12e-04 | 5.72
black_russian_terrier | -4.47e-04 | -1.11e-04 | 3.79e-05 | 5.02e-05 | -5.27e-05 | 1.90e-04 | 3.37e-05 | -1.12e-04 | 1.08e-04 | -1.11e-04 | -1.66e-04 | 1.98e-05 | 4.09e-05 | 4.36e-04 | -2.02e-04 | -1.28e-04 | 4.35e-05 | 2.31e-03 | 1.22
The 18 principal components accounted for 100.00% of the total variance of the original data (PC1 = 89.51%, PC2 = 7.12%, PC3 = 1.33%, PC4 = 0.86%, PC5 = 0.45%, PC6 = 0.24%, PC7 = 0.16%, PC8 = 0.09%, PC9 = 0.07%, PC10 = 0.04%, PC11 = 0.03%, PC12 = 0.03%, PC13 = 0.02%, PC14 = 0.01%, PC15 = 0.01%, PC16 = 0.01%, PC17 = 0.01%, PC18 = 0.00%).
# (Explained) Variance of Components
Parameter | PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 | PC13 | PC14 | PC15 | PC16 | PC17 | PC18
Eigenvalues | 0.344 | 0.027 | 0.005 | 0.003 | 0.002 | 9.350e-04 | 6.293e-04 | 3.515e-04 | 2.646e-04 | 1.454e-04 | 1.105e-04 | 1.034e-04 | 8.350e-05 | 5.492e-05 | 4.793e-05 | 3.945e-05 | 2.997e-05 | 5.374e-06
Variance Explained | 0.895 | 0.071 | 0.013 | 0.009 | 0.005 | 0.002 | 0.002 | 9.139e-04 | 6.880e-04 | 3.780e-04 | 2.872e-04 | 2.689e-04 | 2.171e-04 | 1.428e-04 | 1.246e-04 | 1.026e-04 | 7.791e-05 | 1.397e-05
Variance Explained (Cumulative) | 0.895 | 0.966 | 0.980 | 0.988 | 0.993 | 0.995 | 0.997 | 0.998 | 0.998 | 0.999 | 0.999 | 0.999 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000 | 1.000
Variance Explained (Proportion) | 0.895 | 0.071 | 0.013 | 0.009 | 0.005 | 0.002 | 0.002 | 9.139e-04 | 6.880e-04 | 3.780e-04 | 2.872e-04 | 2.689e-04 | 2.171e-04 | 1.428e-04 | 1.246e-04 | 1.026e-04 | 7.791e-05 | 1.397e-05
<- wide_cost_p %>%
pca_results st_drop_geometry() %>%
as_tibble() %>%
select(SSC_CODE16:caution) %>%
mutate(pca_raw = predict(pca)$Component_1,
pca = ntile(pca_raw, 10))
ggplot(pca_results, aes(x = IRSD, y = pca_raw)) +
pca == IRSD_d <lgl>
# total N=179 valid N=179 mean=0.11 sd=0.31
Value | N | Raw % | Valid % | Cum. %
FALSE | 160 | 89.39 | 89.39 | 89.39
TRUE | 19 | 10.61 | 10.61 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
ggplot(pca_results, aes(x = IRSAD, y = pca_raw)) +
pca == IRSAD_d <lgl>
# total N=179 valid N=179 mean=0.12 sd=0.32
Value | N | Raw % | Valid % | Cum. %
FALSE | 158 | 88.27 | 88.27 | 88.27
TRUE | 21 | 11.73 | 11.73 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
ggplot(pca_results, aes(x = IER, y = pca_raw)) +
pca == IER_d <lgl>
# total N=179 valid N=179 mean=0.14 sd=0.35
Value | N | Raw % | Valid % | Cum. %
FALSE | 154 | 86.03 | 86.03 | 86.03
TRUE | 25 | 13.97 | 13.97 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
ggplot(pca_results, aes(x = IEO, y = pca_raw)) +
pca == IEO_d <lgl>
# total N=179 valid N=179 mean=0.08 sd=0.28
Value | N | Raw % | Valid % | Cum. %
FALSE | 164 | 91.62 | 91.62 | 91.62
TRUE | 15 | 8.38 | 8.38 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
data %>%
wide_cost_p st_drop_geometry() %>%
select(akita:last_col()) %>%
<- n_clusters(data, package = c("easystats", "NbClust", "mclust"))
n n
# Method Agreement Procedure:
The choice of 2 clusters is supported by 7 (33.33%) methods out of 21 (Elbow, Silhouette, Duda, Pseudot2, Beale, Mcclain, Dunn).
<- cluster_analysis(data, n = 2, method = "kmeans")
# Clustering Solution
The 2 clusters accounted for 5.68% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
1 | 44 | 1123.56 | -0.36 | 0.87 | 0.06 | 1.00 | -0.69 | -0.26 | 0.12 | -0.33 | 0.30 | 0.46 | 0.21 | 0.20 | 0.52 | -0.14 | 0.06 | -0.15 | 0.11 | 0.13 | -0.07
2 | 135 | 2066.27 | 0.12 | -0.28 | -0.02 | -0.33 | 0.22 | 0.08 | -0.04 | 0.11 | -0.10 | -0.15 | -0.07 | -0.07 | -0.17 | 0.04 | -0.02 | 0.05 | -0.03 | -0.04 | 0.02
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 192.167 | 3189.833 | 0.057
# You can access the predicted clusters via 'predict()'.
<- wide_cost_p %>%
cluster_results st_drop_geometry() %>%
as_tibble() %>%
mutate(cluster = predict(rez_kmeans))
aggregate(data = cluster_results, german_shepherd ~ cluster, mean)
cluster german_shepherd
1 1 0.1226264
2 2 0.2410961
aggregate(data = cluster_results, french_bulldog ~ cluster, mean)
cluster french_bulldog
1 1 0.11382257
2 2 0.03761286
# aggregate(data = cluster_results, maltese ~ cluster, mean)
# aggregate(data = cluster_results, rottweiler ~ cluster, mean)
<- cluster_analysis(data, n = 2, method = "hclust")
# Clustering Solution
The 2 clusters accounted for 4.56% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
1 | 177 | 3126.78 | 4.95e-03 | -0.02 | 6.57e-03 | 5.91e-03 | 0.01 | 6.44e-03 | 6.71e-03 | -0.01 | -0.02 | -0.04 | -0.07 | 5.31e-03 | 4.91e-03 | 2.82e-03 | -0.05 | 3.56e-03 | 2.76e-03 | 1.31e-03 | 1.99e-03
2 | 2 | 101.09 | -0.44 | 2.02 | -0.58 | -0.52 | -1.16 | -0.57 | -0.59 | 0.90 | 1.60 | 3.51 | 5.80 | -0.47 | -0.43 | -0.25 | 4.40 | -0.32 | -0.24 | -0.12 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 154.132 | 3227.868 | 0.046
# You can access the predicted clusters via 'predict()'.
<- cluster_analysis(data, n = 2, method = "hkmeans")
# Clustering Solution
The 2 clusters accounted for 5.82% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
1 | 177 | 3115.75 | 4.95e-03 | -0.01 | 6.57e-03 | 4.74e-03 | -8.54e-03 | 2.42e-03 | 0.02 | -0.01 | 6.21e-03 | 6.39e-03 | -0.04 | 5.31e-03 | 4.91e-03 | 2.82e-03 | -0.10 | -0.01 | -0.03 | 1.31e-03 | 1.99e-03
2 | 2 | 69.34 | -0.44 | 1.15 | -0.58 | -0.42 | 0.76 | -0.21 | -1.78 | 1.14 | -0.55 | -0.57 | 3.16 | -0.47 | -0.43 | -0.25 | 8.53 | 0.97 | 2.32 | -0.12 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 196.908 | 3185.092 | 0.058
# You can access the predicted clusters via 'predict()'.
<- cluster_analysis(data, n = 2, method = "pam")
# Clustering Solution
The 2 clusters accounted for 4.02% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
1 | 97 | 1570.11 | -0.01 | -0.51 | -0.10 | -0.25 | 0.25 | -0.09 | 0.04 | 0.05 | 0.03 | -4.79e-03 | -0.14 | -0.25 | -0.36 | 0.08 | -0.03 | 0.10 | 0.03 | -0.01 | -0.06
2 | 82 | 1676.06 | 0.01 | 0.60 | 0.12 | 0.30 | -0.30 | 0.11 | -0.05 | -0.06 | -0.03 | 5.66e-03 | 0.17 | 0.29 | 0.43 | -0.09 | 0.03 | -0.12 | -0.03 | 0.02 | 0.08
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 135.832 | 3246.168 | 0.040
# You can access the predicted clusters via 'predict()'.
<- cluster_analysis(data,
rez_hclust2 n = NULL,
method = "hclust",
iterations = 500,
ci = 0.90)
# Clustering Solution
The 7 clusters accounted for 5.25% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
0 | 167 | 3196.03 | -0.08 | 5.33e-03 | 0.01 | 0.03 | 0.02 | 0.03 | -0.04 | -2.35e-03 | -0.01 | 0.02 | -3.47e-03 | 8.27e-03 | 0.02 | 0.02 | 0.01 | 0.02 | 3.17e-03 | 8.33e-03 | -0.07
1 | 2 | 0.22 | -0.44 | 0.52 | -0.58 | -0.12 | -0.41 | -0.57 | 0.87 | -0.07 | -0.42 | -0.05 | -0.42 | -0.47 | -0.43 | -0.25 | -0.18 | -0.32 | -0.24 | -0.12 | -0.18
2 | 2 | 0.32 | -0.28 | -0.34 | 0.26 | -0.46 | -0.04 | -0.28 | 0.42 | 0.18 | -0.45 | -0.19 | 0.18 | 0.23 | 0.14 | -0.25 | -0.18 | -0.32 | -0.24 | -0.12 | -0.18
3 | 2 | 0.77 | 0.56 | -0.21 | -0.39 | -0.83 | -0.46 | -0.38 | 1.10 | -0.22 | -0.11 | 0.04 | -0.10 | 0.27 | -0.43 | -0.25 | -0.18 | -0.32 | 0.96 | -0.12 | -0.18
4 | 2 | 1.12 | 0.97 | 0.83 | 1.02 | 0.02 | -0.46 | 9.78e-03 | -0.05 | 0.46 | -0.11 | -0.44 | 1.48 | -0.47 | -0.43 | -0.25 | -0.18 | -0.32 | -0.24 | -0.12 | -0.18
5 | 2 | 1.58 | 3.58 | -0.50 | -0.58 | -0.23 | 0.37 | -0.57 | -0.23 | -0.09 | 2.45 | -0.57 | -0.42 | -0.47 | -0.43 | -0.25 | -0.18 | -0.32 | -0.24 | -0.12 | -0.18
6 | 2 | 4.46 | 2.58 | -0.75 | -0.58 | -0.74 | -0.27 | -0.41 | 0.86 | -0.07 | -0.19 | -0.20 | -0.42 | 0.22 | -0.43 | -0.25 | -0.18 | -0.32 | -0.24 | -0.12 | 6.43
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 177.508 | 8.460 | 0.052
# You can access the predicted clusters via 'predict()'.
<- n_clusters_dbscan(data, min_size = 0.01)
The DBSCAN method, based on the total clusters sum of squares, suggests that the optimal eps = 10.1164031203656 (with min. cluster size set to 2), which corresponds to 0 clusters.
<- cluster_analysis(data, method = "dbscan", dbscan_eps = 5)
# Clustering Solution
The 2 clusters accounted for 2.82% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
0 | 38 | 2133.66 | 0.34 | 0.11 | 0.09 | 0.20 | 0.34 | 0.11 | -0.72 | 0.05 | 0.25 | 0.41 | 0.27 | 0.01 | -9.08e-03 | 0.34 | 0.27 | 0.42 | 0.33 | 0.37 | 0.49
1 | 141 | 1153.02 | -0.09 | -0.03 | -0.02 | -0.05 | -0.09 | -0.03 | 0.19 | -0.01 | -0.07 | -0.11 | -0.07 | -3.76e-03 | 2.45e-03 | -0.09 | -0.07 | -0.11 | -0.09 | -0.10 | -0.13
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 95.311 | 1153.024 | 0.028
# You can access the predicted clusters via 'predict()'.
<- cluster_analysis(data, method = "hdbscan")
# Clustering Solution
The unique cluster accounted for 0.00% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
0 | 179 | 3382 | -4.02e-17 | -6.48e-18 | 4.23e-17 | 2.98e-17 | 7.79e-17 | 4.27e-17 | 1.15e-16 | -8.45e-17 | 5.32e-17 | -1.50e-17 | 2.05e-17 | 3.35e-17 | 2.87e-17 | 9.80e-19 | 8.95e-18 | 1.30e-17 | -1.72e-17 | 0 | 1.30e-17
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 0.000 | 0.000 | 0.000
# You can access the predicted clusters via 'predict()'.
# plot(rez_hdbscan)
<- cluster_analysis(data, method = "pamk")
# Clustering Solution
The 10 clusters accounted for 39.07% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
1 | 62 | 484.15 | 0.07 | -0.43 | -0.06 | -0.31 | 0.06 | -0.12 | 0.30 | 0.03 | -0.11 | -0.12 | -0.09 | -0.38 | -0.33 | -0.16 | -0.07 | -0.02 | -9.44e-03 | -0.12 | -0.18
10 | 1 | 0.00 | -0.44 | 3.51 | -0.58 | -0.98 | -1.08 | -0.57 | -1.19 | 3.05 | -0.55 | -0.57 | 6.74 | -0.47 | -0.43 | -0.25 | 8.98 | -0.32 | -0.24 | -0.12 | -0.18
2 | 11 | 119.31 | -0.34 | 0.19 | -0.33 | 2.66 | -1.13 | -0.51 | 0.57 | -0.90 | -0.20 | 0.58 | 0.01 | -0.47 | 0.10 | -0.13 | -0.18 | -0.22 | -0.24 | -0.12 | 0.58
3 | 46 | 495.06 | 0.07 | 0.49 | 0.35 | 2.64e-03 | -0.21 | 0.13 | -8.46e-03 | 0.08 | -0.24 | -0.12 | 0.16 | -0.21 | 0.79 | -0.07 | -0.03 | -0.15 | -0.14 | -0.12 | -0.12
4 | 12 | 357.94 | -0.06 | -0.47 | -0.42 | -0.76 | 2.44 | 1.37 | -2.65 | 1.25 | -0.44 | -0.57 | -0.25 | -0.30 | -0.43 | -0.25 | -0.18 | 1.09 | -0.24 | -0.12 | -0.18
5 | 34 | 456.17 | -0.14 | 0.27 | -0.10 | 0.04 | -0.31 | -0.22 | 0.18 | -0.25 | 0.85 | 0.43 | -0.10 | 1.27 | -0.25 | -0.07 | -0.14 | -0.09 | -0.21 | -0.05 | -0.18
6 | 4 | 99.14 | -0.44 | 0.42 | -0.20 | 0.14 | 0.46 | 0.17 | -0.59 | -0.51 | -0.55 | -0.13 | -0.42 | 0.16 | -0.12 | -0.25 | 1.88 | 0.59 | 5.63 | -0.12 | -0.18
7 | 4 | 17.60 | -0.44 | -0.78 | 0.13 | -0.19 | -0.07 | -0.06 | 0.37 | -0.39 | -0.01 | -0.06 | -0.42 | -0.04 | -0.43 | 5.64 | -0.18 | -0.05 | -0.24 | -0.12 | -0.18
8 | 3 | 20.39 | 1.58 | -0.90 | 0.09 | -0.52 | -0.15 | -0.34 | 0.52 | -0.12 | -0.08 | 0.27 | 0.15 | -0.01 | 0.12 | -0.25 | -0.18 | -0.32 | -0.24 | -0.12 | 6.63
9 | 2 | 10.87 | 0.30 | -0.69 | 0.20 | 0.11 | -0.19 | -0.40 | 0.73 | -0.49 | -0.23 | -0.57 | 0.36 | -0.47 | -0.43 | -0.25 | -0.18 | -0.32 | -0.24 | 9.11 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 1321.372 | 2060.628 | 0.391
# You can access the predicted clusters via 'predict()'.
<- cluster_analysis(data, method = "mixture")
# Clustering Solution
The unique cluster accounted for 0.00% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
1 | 179 | 3382 | -4.02e-17 | -6.48e-18 | 4.23e-17 | 2.98e-17 | 7.79e-17 | 4.27e-17 | 1.15e-16 | -8.45e-17 | 5.32e-17 | -1.50e-17 | 2.05e-17 | 3.35e-17 | 2.87e-17 | 9.80e-19 | 8.95e-18 | 1.30e-17 | -1.72e-17 | 0 | 1.30e-17
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
3382.000 | 0.000 | 3382.000 | 0.000
# You can access the predicted clusters via 'predict()'.
<- list(rez_kmeans, rez_hclust, rez_hkmeans, rez_pam,
rez_hclust2, rez_dbscan, rez_hdbscan, rez_mixture)
<- cluster_meta(list_of_results)
heatmap(probability_matrix, scale = "none",
col = grDevices::hcl.colors(256, palette = "inferno"))
data %>%
wide_cost_p st_drop_geometry() %>%
select(akita:last_col()) %>%
<- mclustBIC(data, G = seq(from = 2, to = 9))
Best BIC values:
BIC 19781.85 19297.7036 18847.5907
BIC diff 0.00 -484.1458 -934.2586
<- Mclust(data, x = BIC)
mod1 mod1
'Mclust' model object: (VEI,2)
Available components:
[1] "call" "data" "modelName" "n"
[5] "d" "G" "BIC" "loglik"
[9] "df" "bic" "icl" "hypvol"
[13] "parameters" "z" "classification" "uncertainty"
summary(mod1, parameters = TRUE)
Gaussian finite mixture model fitted by EM algorithm
Mclust VEI (diagonal, equal shape) model with 2 components:
log-likelihood n df BIC ICL
10043.95 179 59 19781.85 19777.99
Clustering table:
1 2
117 62
Mixing probabilities:
1 2
0.6541546 0.3458454
[,1] [,2]
akita 0.002505341815638923494 0.0048484110
british_bulldog 0.037505394220490657908 0.0349970021
dogue_de_bordeaux 0.009531525587163920676 0.0106058945
french_bulldog 0.056033093753594345743 0.0569377788
german_shepherd 0.193475508352204395024 0.2469663463
irish_wolfhound 0.021633373196265383298 0.0337355101
maltese 0.564473150499892883936 0.4513020939
rottweiler 0.078914243307080883527 0.0923734268
samoyed 0.011079551634817269914 0.0180179368
yorkshire_terrier 0.009257182415792304317 0.0141727888
chinese_crested_dog 0.003165296957289343883 0.0062020689
chow_chow 0.004529585726722043688 0.0053345070
lowchen 0.003865480531525179904 0.0056947053
saluki 0.000608239446469556206 0.0035649829
pharaoh_hound 0.000457795006272484367 0.0032889170
st_bernard 0.002101000367237471772 0.0074501985
tibetan_mastiff 0.000864236363207894313 0.0028020386
canadian_eskimo_dog 0.000000000818330443212 0.0004857234
black_russian_terrier 0.000000000000004364326 0.0012196693
akita british_bulldog dogue_de_bordeaux
akita 0.00001782492 0.0000000000 0.00000000000
british_bulldog 0.00000000000 0.0004073227 0.00000000000
dogue_de_bordeaux 0.00000000000 0.0000000000 0.00009781482
french_bulldog german_shepherd irish_wolfhound
akita 0.000000000 0.000000000 0.0000000000
british_bulldog 0.000000000 0.000000000 0.0000000000
dogue_de_bordeaux 0.000000000 0.000000000 0.0000000000
maltese rottweiler samoyed yorkshire_terrier
akita 0.000000000 0.000000000 0.0000000000 0.0000000000
british_bulldog 0.000000000 0.000000000 0.0000000000 0.0000000000
dogue_de_bordeaux 0.000000000 0.000000000 0.0000000000 0.0000000000
chinese_crested_dog chow_chow lowchen
akita 0.00000000000 0.00000000000 0.00000000000
british_bulldog 0.00000000000 0.00000000000 0.00000000000
dogue_de_bordeaux 0.00000000000 0.00000000000 0.00000000000
saluki pharaoh_hound st_bernard tibetan_mastiff
akita 0.000000000000 0.000000000000 0.000000000 0.000000000000
british_bulldog 0.000000000000 0.000000000000 0.000000000 0.000000000000
dogue_de_bordeaux 0.000000000000 0.000000000000 0.000000000 0.000000000000
canadian_eskimo_dog black_russian_terrier
akita 0.0000000000000 0.0000000000000
british_bulldog 0.0000000000000 0.0000000000000
dogue_de_bordeaux 0.0000000000000 0.0000000000000
[ reached getOption("max.print") -- omitted 16 rows ]
akita british_bulldog dogue_de_bordeaux
akita 0.0001895734 0.000000 0.00000000
british_bulldog 0.0000000000 0.004332 0.00000000
dogue_de_bordeaux 0.0000000000 0.000000 0.00104029
french_bulldog german_shepherd irish_wolfhound maltese
akita 0.00000000 0.00000000 0.000000000 0.00000000
british_bulldog 0.00000000 0.00000000 0.000000000 0.00000000
dogue_de_bordeaux 0.00000000 0.00000000 0.000000000 0.00000000
rottweiler samoyed yorkshire_terrier
akita 0.0000000 0.000000000 0.000000000
british_bulldog 0.0000000 0.000000000 0.000000000
dogue_de_bordeaux 0.0000000 0.000000000 0.000000000
chinese_crested_dog chow_chow lowchen
akita 0.0000000000 0.0000000000 0.0000000000
british_bulldog 0.0000000000 0.0000000000 0.0000000000
dogue_de_bordeaux 0.0000000000 0.0000000000 0.0000000000
saluki pharaoh_hound st_bernard tibetan_mastiff
akita 0.00000000000 0.00000000000 0.0000000000 0.00000000000
british_bulldog 0.00000000000 0.00000000000 0.0000000000 0.00000000000
dogue_de_bordeaux 0.00000000000 0.00000000000 0.0000000000 0.00000000000
canadian_eskimo_dog black_russian_terrier
akita 0.000000000000 0.000000000000
british_bulldog 0.000000000000 0.000000000000
dogue_de_bordeaux 0.000000000000 0.000000000000
[ reached getOption("max.print") -- omitted 16 rows ]
x <numeric>
# total N=179 valid N=179 mean=1.35 sd=0.48
Value | N | Raw % | Valid % | Cum. %
1 | 117 | 65.36 | 65.36 | 65.36
2 | 62 | 34.64 | 34.64 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
plot(mod1, what = "classification")
# ICL <- mclustICL(data, G = seq(from = 2, to = 9))
# summary(ICL)
# plot(ICL)
# LRT <- mclustBootstrapLRT(data, modelName = "VEI")
# summary(LRT)
# plot(LRT)
<- wide_cost_p %>%
cluster_results st_drop_geometry() %>%
as_tibble() %>%
mutate(cluster = factor(mod1$classification))
aggregate(data = cluster_results, german_shepherd ~ cluster, mean)
cluster german_shepherd
1 1 0.1924585
2 2 0.2488047
aggregate(data = cluster_results, french_bulldog ~ cluster, mean)
cluster french_bulldog
1 1 0.05633092
2 2 0.05637439
# aggregate(data = cluster_results, maltese ~ cluster, mean)
# aggregate(data = cluster_results, rottweiler ~ cluster, mean)
R version 4.1.2 (2021-11-01) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] mclust_5.4.9 parameters_0.16.0 modelbased_0.9.0 see_0.6.8 [5] correlation_0.7.1 tmap_3.3-2 sf_1.0-5 DT_0.20 [9] sjPlot_2.8.10 sjmisc_2.8.9 scales_1.1.1 magrittr_2.0.2 [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 [17] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5 [21] tidyverse_1.3.1 pacman_0.5.1To cite R in publications use:
R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria.
To cite the ggplot2 package in publications use:Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4,