1 Data

Preparations in file 00.Rmd

dog_ownership_cost <- read_rds("data/dog_ownership_cost.Rds") %>% 
  select(-cost_compared_to_other_breeds)

length(unique(dog_ownership_cost$SSC_NAME16))

[1] 183

length(unique(dog_ownership_cost$dog_breed))

[1] 182

SSC <- read_rds("data/geo/SSC.Rds")

length(unique(SSC$SSC_NAME16))

[1] 183

wide_cost_n <- read_rds("data/wide_cost_n.Rds")
wide_cost_p <- read_rds("data/wide_cost_p.Rds")

1.1 All Brisbane dogs combined

1.2 Dog counts

Summarizing all dogs, and expensive only.
Excluding areas with no dogs.

dog_ownership_agg <- dog_ownership_cost %>% 
  group_by(SSC_NAME16) %>% 
  summarise(dogs_exp = sum(expensive))

SSC %<>% 
  left_join(dog_ownership_agg) %>%
  mutate(dogs_exp_prop = dogs_exp / dogs_total) %>% 
  relocate(geometry, .after = last_col())

1.3 Proportion of expensive dogs

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.1178  0.1366  0.1403  0.1650  0.2636

1.3.1 Ranking

1.3.2 Map

2 Association with SEIFA

2.1 Functions

seifa_means <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>% 
    st_drop_geometry() %>% 
    group_by(!!myenc) %>% 
    summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
              sd = sd(dogs_exp_prop, na.rm = TRUE),
              p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
              p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
              p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>% 
    ungroup()
}

seifa_cor <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>%
    st_drop_geometry() %>%
    select(!!myenc, dogs_exp_prop) %>%
    mutate_if(is.factor, as.numeric) %>%
    correlation(method = "kendall")
  
}

seifa_plot <- function (seifa_index) {
  
  model <- eval(substitute(lm(dogs_exp_prop ~ seifa_index, 
                              data = SSC, na.action = na.omit)))
  means <- estimate_means(model)
  
  myenc <- enquo(seifa_index)
  
  ggplot(SSC,
         aes(x = !!myenc,
             y = dogs_exp_prop,
             fill = !!myenc)) +
    geom_violin(alpha = 0.66) +
    geom_jitter2(width = 0.05, alpha = 0.5) +
    geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
    geom_pointrange(data = means,
                    aes(y = Mean, ymin = CI_low, ymax = CI_high),
                    size = 1,
                    color = "white") + 
    scale_fill_brewer(palette = "BrBG") +
    ylab("Proportion of expensive dogs") +
    theme_modern()
  
}

2.2 IRSD

2.2.1 Recalculated

seifa_means(IRSD_d)

# A tibble: 10 x 6
   IRSD_d  mean     sd    p25   p50   p75
   <fct>  <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1      0.161 0.0443 0.146  0.159 0.184
 2 2      0.130 0.0833 0.0913 0.155 0.170
 3 3      0.147 0.0258 0.129  0.140 0.160
 4 4      0.147 0.0411 0.121  0.144 0.173
 5 5      0.145 0.0305 0.130  0.137 0.155
 6 6      0.142 0.0350 0.115  0.136 0.156
 7 7      0.127 0.0213 0.113  0.128 0.136
 8 8      0.148 0.0268 0.125  0.148 0.163
 9 9      0.126 0.0351 0.112  0.122 0.141
10 10     0.127 0.0336 0.104  0.114 0.152

seifa_cor(IRSD_d)

# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSD_d     | dogs_exp_prop | -0.19 | [-0.28, -0.10] | -3.69 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IRSD_d)

2.2.2 Original

seifa_means(IRSD_d_orig)

# A tibble: 10 x 6
   IRSD_d_orig  mean     sd    p25   p50   p75
   <fct>       <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1           0.185 0.0179 0.179  0.193 0.195
 2 2           0.164 0.0605 0.138  0.161 0.179
 3 3           0.142 0.0540 0.125  0.159 0.180
 4 4           0.161 0.0163 0.151  0.158 0.159
 5 5           0.150 0.0654 0.122  0.160 0.178
 6 6           0.122 0.0813 0.0962 0.136 0.166
 7 7           0.143 0.0346 0.124  0.142 0.166
 8 8           0.150 0.0312 0.130  0.140 0.157
 9 9           0.136 0.0321 0.112  0.131 0.149
10 10          0.134 0.0316 0.113  0.127 0.156

seifa_cor(IRSD_d_orig)

# Correlation Matrix (kendall-method)

Parameter1  |    Parameter2 |   tau |         95% CI |     z |         p
------------------------------------------------------------------------
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.08] | -3.31 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IRSD_d_orig)

2.3 IRSAD

2.3.1 Recalculated

seifa_means(IRSAD_d)

# A tibble: 10 x 6
   IRSAD_d  mean     sd   p25   p50   p75
   <fct>   <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1       0.156 0.0641 0.138 0.162 0.186
 2 2       0.140 0.0689 0.130 0.156 0.173
 3 3       0.143 0.0232 0.127 0.144 0.157
 4 4       0.157 0.0408 0.135 0.158 0.185
 5 5       0.139 0.0311 0.128 0.135 0.146
 6 6       0.137 0.0262 0.115 0.129 0.154
 7 7       0.136 0.0354 0.115 0.129 0.146
 8 8       0.141 0.0260 0.123 0.132 0.155
 9 9       0.132 0.0370 0.114 0.124 0.154
10 10      0.121 0.0315 0.101 0.112 0.127

seifa_cor(IRSAD_d)

# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSAD_d    | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IRSAD_d)

2.3.2 Original

seifa_means(IRSAD_d_orig)

# A tibble: 10 x 6
   IRSAD_d_orig  mean     sd    p25   p50   p75
   <fct>        <dbl>  <dbl>  <dbl> <dbl> <dbl>
 1 1            0.185 0.0179 0.179  0.193 0.195
 2 2            0.145 0.0955 0.0751 0.131 0.201
 3 3            0.161 0.0248 0.143  0.161 0.179
 4 4            0.138 0.0183 0.131  0.138 0.144
 5 5            0.153 0.0777 0.151  0.158 0.176
 6 6            0.126 0.0875 0.0641 0.159 0.177
 7 7            0.157 0.0258 0.136  0.156 0.173
 8 8            0.147 0.0297 0.127  0.144 0.155
 9 9            0.144 0.0352 0.129  0.143 0.162
10 10           0.133 0.0319 0.112  0.127 0.149

seifa_cor(IRSAD_d_orig)

# Correlation Matrix (kendall-method)

Parameter1   |    Parameter2 |   tau |         95% CI |     z |         p
-------------------------------------------------------------------------
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.13] | -3.98 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IRSAD_d_orig)

2.4 IER

2.4.1 Recalculated

seifa_means(IER_d)

# A tibble: 10 x 6
   IER_d  mean     sd   p25   p50   p75
   <fct> <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1     0.162 0.0452 0.136 0.165 0.187
 2 2     0.137 0.0331 0.126 0.134 0.154
 3 3     0.138 0.0210 0.118 0.137 0.152
 4 4     0.144 0.0410 0.115 0.129 0.152
 5 5     0.125 0.0702 0.110 0.138 0.165
 6 6     0.152 0.0288 0.129 0.156 0.172
 7 7     0.132 0.0424 0.113 0.139 0.150
 8 8     0.132 0.0256 0.115 0.129 0.153
 9 9     0.137 0.0425 0.114 0.122 0.146
10 10    0.144 0.0439 0.114 0.153 0.171

seifa_cor(IER_d)

# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |        95% CI |     z |     p
------------------------------------------------------------------
IER_d      | dogs_exp_prop | -0.08 | [-0.18, 0.02] | -1.56 | 0.118

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IER_d)

2.4.2 Original

seifa_means(IER_d_orig)

# A tibble: 10 x 6
   IER_d_orig  mean     sd   p25   p50   p75
   <fct>      <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1          0.161 0.0440 0.140 0.162 0.183
 2 2          0.137 0.0316 0.124 0.132 0.154
 3 3          0.145 0.0387 0.117 0.137 0.163
 4 4          0.134 0.0263 0.114 0.128 0.144
 5 5          0.130 0.0693 0.114 0.143 0.173
 6 6          0.145 0.0297 0.127 0.138 0.161
 7 7          0.144 0.0340 0.128 0.141 0.154
 8 8          0.129 0.0465 0.127 0.130 0.152
 9 9          0.139 0.0369 0.115 0.132 0.164
10 10         0.138 0.0412 0.111 0.127 0.164

seifa_cor(IER_d_orig)

# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |        95% CI |     z |     p
------------------------------------------------------------------
IER_d_orig | dogs_exp_prop | -0.08 | [-0.18, 0.01] | -1.57 | 0.116

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IER_d_orig)

2.5 IEO

2.5.1 Recalculated

seifa_means(IEO_d)

# A tibble: 10 x 6
   IEO_d  mean     sd   p25   p50   p75
   <fct> <dbl>  <dbl> <dbl> <dbl> <dbl>
 1 1     0.156 0.0646 0.133 0.165 0.188
 2 2     0.166 0.0265 0.152 0.159 0.178
 3 3     0.132 0.0683 0.123 0.144 0.162
 4 4     0.146 0.0292 0.123 0.145 0.165
 5 5     0.146 0.0385 0.130 0.147 0.162
 6 6     0.153 0.0306 0.131 0.152 0.171
 7 7     0.133 0.0276 0.117 0.129 0.136
 8 8     0.124 0.0304 0.115 0.125 0.132
 9 9     0.127 0.0209 0.113 0.127 0.132
10 10    0.119 0.0302 0.106 0.112 0.119

seifa_cor(IEO_d)

# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d      | dogs_exp_prop | -0.30 | [-0.39, -0.21] | -5.84 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IEO_d)

2.5.2 Original

seifa_means(IEO_d_orig)

# A tibble: 10 x 6
   IEO_d_orig   mean     sd    p25    p50    p75
   <fct>       <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1          0.153  0.0665 0.138  0.179  0.194 
 2 2          0.175  0.0911 0.131  0.180  0.222 
 3 3          0.0625 0.0884 0.0312 0.0625 0.0938
 4 4          0.172  0.0393 0.145  0.165  0.188 
 5 5          0.168  0.0221 0.151  0.171  0.175 
 6 6          0.166  0.0260 0.161  0.176  0.182 
 7 7          0.136  0.0733 0.127  0.154  0.159 
 8 8          0.150  0.0320 0.132  0.148  0.162 
 9 9          0.147  0.0331 0.130  0.147  0.165 
10 10         0.130  0.0300 0.112  0.127  0.144

seifa_cor(IEO_d_orig)

# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.35, -0.17] | -4.82 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183

seifa_plot(IEO_d_orig)

3 PCA

data <- 
  # wide_cost_n %>%
  wide_cost_p %>%
  st_drop_geometry() %>% 
  select(akita:last_col()) %>% 
  as_tibble()

# View(cov(data))

pca <- principal_components(data, 
                            standardize = FALSE,
                            n = "auto")
pca

# Loadings from Principal Component Analysis (no rotation)

Variable              |    PC1    |    PC2    |    PC3    |    PC4    |    PC5    |    PC6    |    PC7    |    PC8    |    PC9    |   PC10    |   PC11    |   PC12    |   PC13    |   PC14    |   PC15    |   PC16    |   PC17    |   PC18    | Complexity
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
akita                 | -3.34e-03 | 4.88e-04  | -2.13e-04 | 1.37e-03  | -8.24e-05 | 5.68e-04  | -6.24e-04 | 1.73e-04  | -3.23e-04 | 2.15e-03  | -2.67e-05 | 1.32e-03  | -2.15e-04 | 5.46e-03  | -3.37e-03 | -2.55e-03 | -4.01e-05 | -1.49e-04 |    3.84   
british_bulldog       |   -0.04   | 1.16e-03  | -2.24e-03 |   -0.01   | -6.08e-03 |   -0.03   | 4.39e-03  | -1.67e-04 | -2.62e-03 | 4.79e-04  | 8.09e-04  | -2.79e-04 | -8.18e-04 | 1.11e-04  | 6.19e-05  | -1.73e-04 | 7.54e-05  | 1.62e-05  |    2.30   
dogue_de_bordeaux     | -9.89e-03 | -5.17e-04 | 2.47e-04  | 9.15e-04  | 7.80e-04  | -4.56e-03 | 4.28e-03  | -1.62e-03 |   0.02    | 2.89e-04  | -6.32e-04 | 4.60e-04  | 2.79e-04  | -6.63e-05 | -3.35e-04 | 9.68e-06  | -9.85e-05 | -1.80e-05 |    2.13   
french_bulldog        |   -0.06   |   -0.02   | 3.52e-03  |   -0.06   | -6.62e-04 | 6.41e-03  | 6.69e-05  | 1.14e-03  | 7.85e-04  | 9.30e-05  | -2.35e-04 | 2.05e-04  | 7.57e-05  | 7.95e-05  | -4.49e-05 | -6.80e-05 | -4.45e-05 | -4.36e-06 |    2.20   
german_shepherd       |   -0.20   |   0.15    |   0.02    | -2.30e-03 | -2.71e-04 | 9.16e-04  | 1.69e-04  | -3.81e-05 | 1.16e-05  | -3.43e-04 | 1.59e-05  | 6.94e-05  | 3.05e-05  | 2.25e-05  | 1.22e-05  | 6.03e-06  | 8.36e-06  | -7.78e-07 |    1.88   
irish_wolfhound       |   -0.02   |   0.01    |   -0.02   | -3.62e-03 |   0.04    | -3.73e-03 | -9.09e-04 | 1.54e-04  | -4.72e-04 | -4.01e-05 | -1.78e-04 | 9.06e-05  | -1.62e-05 | 2.79e-05  | 2.52e-06  | 5.67e-06  | 6.17e-05  | 4.31e-06  |    2.61   
maltese               |   -0.54   |   -0.06   | 3.43e-03  | 7.75e-03  | 9.18e-04  | 8.51e-04  | 3.82e-04  | 2.13e-04  | -2.81e-04 | -3.01e-05 | -7.79e-05 | -6.56e-05 | -3.69e-05 | -2.82e-05 | 1.83e-05  | 4.05e-05  | 2.38e-05  | -1.73e-06 |    1.02   
rottweiler            |   -0.08   |   0.03    |   -0.07   | -1.57e-03 |   -0.01   | 2.84e-03  | -4.65e-04 | -4.86e-05 | 4.33e-04  | 1.88e-04  | 1.62e-04  | -2.87e-05 | -1.96e-04 | -8.98e-05 | -1.68e-05 | 1.23e-05  | -5.54e-05 | 1.38e-06  |    2.40   
samoyed               |   -0.01   | -1.74e-04 | 1.77e-03  | -8.18e-04 | -2.12e-03 | -5.91e-03 |   -0.02   | 5.43e-03  | 2.57e-03  | 5.12e-04  | 2.96e-04  | -8.06e-05 | 4.46e-04  | -2.48e-04 | 2.07e-06  | -1.82e-05 | -9.77e-05 | 1.32e-05  |    1.96   
yorkshire_terrier     |   -0.01   | -2.35e-03 | 9.14e-04  | -3.53e-03 | -3.39e-04 | -5.43e-04 | -7.62e-03 |   -0.02   | -4.84e-04 | -1.00e-03 | -2.43e-04 | 2.85e-04  | -1.60e-03 | -3.40e-04 | -5.95e-04 | -1.93e-05 | 3.39e-04  | -1.34e-05 |    2.32   
chinese_crested_dog   | -3.91e-03 | 2.53e-04  | -1.72e-03 | -6.27e-04 | -1.33e-03 | -2.88e-03 | -7.20e-04 | -2.52e-03 | -1.04e-03 | -9.14e-04 | -6.32e-03 | 3.73e-04  | 5.29e-03  | 2.07e-03  | 2.13e-03  | 5.85e-04  | -1.56e-03 | -3.53e-05 |    4.95   
chow_chow             | -4.87e-03 | -9.81e-04 | -2.39e-04 | 5.99e-05  | -6.66e-05 | -4.58e-04 | 2.52e-04  | -1.35e-03 | -5.98e-04 | 1.49e-03  | 4.36e-03  | 8.12e-03  | 3.57e-03  | -7.17e-04 | 2.54e-04  | 3.92e-04  | 1.40e-06  | 2.78e-05  |    3.00   
lowchen               | -4.43e-03 | -7.13e-04 | -3.60e-04 | -1.29e-03 | 7.87e-04  | 4.87e-04  | 5.05e-05  | -2.65e-03 | 6.87e-04  | -7.12e-04 | 6.47e-03  | -5.61e-03 | 4.64e-03  | 1.18e-03  | 4.03e-04  | -1.29e-04 | -3.22e-04 | -2.64e-06 |    4.44   
saluki                | -1.70e-03 | -1.85e-04 | 1.60e-04  | 5.64e-04  | 3.59e-04  | 6.20e-04  | 5.22e-05  | -5.46e-04 | 2.53e-04  | -1.14e-04 | 1.51e-04  | 2.77e-04  | -2.34e-04 | -1.73e-03 | 2.64e-03  | -5.49e-03 | -9.83e-04 | 1.84e-05  |    2.11   
pharaoh_hound         | -1.17e-03 | 1.03e-03  | -8.24e-04 | -3.59e-04 | -7.39e-04 | -1.17e-03 | 6.53e-04  | 9.25e-05  | -8.86e-04 | 7.20e-04  | -2.83e-03 | -1.08e-03 | 4.17e-03  | -3.01e-03 | -3.22e-03 | -1.41e-03 | 2.83e-03  | -8.12e-06 |    6.25   
st_bernard            | -3.52e-03 | 3.64e-03  | 1.52e-03  | 2.41e-04  | 6.54e-04  | 1.26e-03  | 6.03e-05  | -1.92e-03 | -2.81e-04 |   0.01    | -5.33e-04 | -1.50e-03 | -1.13e-04 | -4.82e-04 | 1.03e-03  | 4.96e-04  | -2.00e-04 | 4.08e-05  |    1.61   
tibetan_mastiff       | -1.52e-03 | 2.72e-04  | 5.26e-04  | 2.60e-05  | 4.69e-04  | -4.21e-04 | 5.78e-04  | -7.54e-05 | -8.27e-04 | 2.31e-04  | -6.56e-05 | -4.26e-04 | 3.87e-04  | -2.48e-03 | -3.60e-03 | 1.23e-04  | -4.28e-03 | 2.67e-05  |    3.21   
canadian_eskimo_dog   | -1.88e-04 | -5.72e-05 | 7.13e-05  | 1.57e-05  | 2.74e-06  | 1.02e-04  | 2.73e-05  | 7.69e-05  | 4.26e-05  | -5.43e-05 | -1.47e-04 | 4.45e-07  | 2.18e-05  | 7.49e-05  | 1.94e-05  | 4.83e-05  | -3.04e-05 | -1.12e-04 |    5.72   
black_russian_terrier | -4.47e-04 | -1.11e-04 | 3.79e-05  | 5.02e-05  | -5.27e-05 | 1.90e-04  | 3.37e-05  | -1.12e-04 | 1.08e-04  | -1.11e-04 | -1.66e-04 | 1.98e-05  | 4.09e-05  | 4.36e-04  | -2.02e-04 | -1.28e-04 | 4.35e-05  | 2.31e-03  |    1.22   

The 18 principal components accounted for 100.00% of the total variance of the original data (PC1 = 89.51%, PC2 = 7.12%, PC3 = 1.33%, PC4 = 0.86%, PC5 = 0.45%, PC6 = 0.24%, PC7 = 0.16%, PC8 = 0.09%, PC9 = 0.07%, PC10 = 0.04%, PC11 = 0.03%, PC12 = 0.03%, PC13 = 0.02%, PC14 = 0.01%, PC15 = 0.01%, PC16 = 0.01%, PC17 = 0.01%, PC18 = 0.00%).

summary(pca)

# (Explained) Variance of Components

Parameter                       |   PC1 |   PC2 |   PC3 |   PC4 |   PC5 |       PC6 |       PC7 |       PC8 |       PC9 |      PC10 |      PC11 |      PC12 |      PC13 |      PC14 |      PC15 |      PC16 |      PC17 |      PC18
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Eigenvalues                     | 0.344 | 0.027 | 0.005 | 0.003 | 0.002 | 9.350e-04 | 6.293e-04 | 3.515e-04 | 2.646e-04 | 1.454e-04 | 1.105e-04 | 1.034e-04 | 8.350e-05 | 5.492e-05 | 4.793e-05 | 3.945e-05 | 2.997e-05 | 5.374e-06
Variance Explained              | 0.895 | 0.071 | 0.013 | 0.009 | 0.005 |     0.002 |     0.002 | 9.139e-04 | 6.880e-04 | 3.780e-04 | 2.872e-04 | 2.689e-04 | 2.171e-04 | 1.428e-04 | 1.246e-04 | 1.026e-04 | 7.791e-05 | 1.397e-05
Variance Explained (Cumulative) | 0.895 | 0.966 | 0.980 | 0.988 | 0.993 |     0.995 |     0.997 |     0.998 |     0.998 |     0.999 |     0.999 |     0.999 |     1.000 |     1.000 |     1.000 |     1.000 |     1.000 |     1.000
Variance Explained (Proportion) | 0.895 | 0.071 | 0.013 | 0.009 | 0.005 |     0.002 |     0.002 | 9.139e-04 | 6.880e-04 | 3.780e-04 | 2.872e-04 | 2.689e-04 | 2.171e-04 | 1.428e-04 | 1.246e-04 | 1.026e-04 | 7.791e-05 | 1.397e-05

plot(pca)

pca_results <- wide_cost_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  select(SSC_CODE16:caution) %>% 
  mutate(pca_raw = predict(pca)$Component_1, 
         pca = ntile(pca_raw, 10))

3.1 `IRSD_d`

ggplot(pca_results, aes(x = IRSD, y = pca_raw)) + 
  geom_point()

pca == IRSD_d <lgl> 
# total N=179 valid N=179 mean=0.11 sd=0.31

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 160 | 89.39 |   89.39 |  89.39
TRUE  |  19 | 10.61 |   10.61 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3.2 `IRSAD_d`

ggplot(pca_results, aes(x = IRSAD, y = pca_raw)) + 
  geom_point()

pca == IRSAD_d <lgl> 
# total N=179 valid N=179 mean=0.12 sd=0.32

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 158 | 88.27 |   88.27 |  88.27
TRUE  |  21 | 11.73 |   11.73 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3.3 `IER_d`

ggplot(pca_results, aes(x = IER, y = pca_raw)) + 
  geom_point()

pca == IER_d <lgl> 
# total N=179 valid N=179 mean=0.14 sd=0.35

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 154 | 86.03 |   86.03 |  86.03
TRUE  |  25 | 13.97 |   13.97 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3.4 `IEO_d`

ggplot(pca_results, aes(x = IEO, y = pca_raw)) + 
  geom_point()

pca == IEO_d <lgl> 
# total N=179 valid N=179 mean=0.08 sd=0.28

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 164 | 91.62 |   91.62 |  91.62
TRUE  |  15 |  8.38 |    8.38 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

4 Clustering I

data <- 
  wide_cost_p %>%
  st_drop_geometry() %>% 
  select(akita:last_col()) %>% 
  as_tibble()

4.1 No of clusters

n <- n_clusters(data, package = c("easystats", "NbClust", "mclust"))
n

# Method Agreement Procedure:

The choice of 2 clusters is supported by 7 (33.33%) methods out of 21 (Elbow, Silhouette, Duda, Pseudot2, Beale, Mcclain, Dunn).

plot(n)

4.2 K-Means

rez_kmeans <- cluster_analysis(data, n = 2, method = "kmeans")

rez_kmeans

# Clustering Solution

The 2 clusters accounted for 5.68% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    44 |     1123.56 | -0.36 |            0.87 |              0.06 |           1.00 |           -0.69 |           -0.26 |    0.12 |      -0.33 |    0.30 |              0.46 |                0.21 |      0.20 |    0.52 |  -0.14 |          0.06 |      -0.15 |            0.11 |                0.13 |                 -0.07
2       |   135 |     2066.27 |  0.12 |           -0.28 |             -0.02 |          -0.33 |            0.22 |            0.08 |   -0.04 |       0.11 |   -0.10 |             -0.15 |               -0.07 |     -0.07 |   -0.17 |   0.04 |         -0.02 |       0.05 |           -0.03 |               -0.04 |                  0.02

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |             192.167 |           3189.833 | 0.057

# You can access the predicted clusters via 'predict()'.

plot(rez_kmeans)

plot(summary(rez_kmeans))

cluster_results <- wide_cost_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  mutate(cluster = predict(rez_kmeans)) 

aggregate(data = cluster_results, german_shepherd ~ cluster, mean)

  cluster german_shepherd
1       1       0.1226264
2       2       0.2410961

aggregate(data = cluster_results, french_bulldog ~ cluster, mean)

  cluster french_bulldog
1       1     0.11382257
2       2     0.03761286

# aggregate(data = cluster_results, maltese ~ cluster, mean)
# aggregate(data = cluster_results, rottweiler ~ cluster, mean)

4.3 Hierarchical Clustering

rez_hclust <- cluster_analysis(data, n = 2, method = "hclust")

rez_hclust

# Clustering Solution

The 2 clusters accounted for 4.56% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |    akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |  maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |  lowchen |   saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   177 |     3126.78 | 4.95e-03 |           -0.02 |          6.57e-03 |       5.91e-03 |            0.01 |        6.44e-03 | 6.71e-03 |      -0.01 |   -0.02 |             -0.04 |               -0.07 |  5.31e-03 | 4.91e-03 | 2.82e-03 |         -0.05 |   3.56e-03 |        2.76e-03 |            1.31e-03 |              1.99e-03
2       |     2 |      101.09 |    -0.44 |            2.02 |             -0.58 |          -0.52 |           -1.16 |           -0.57 |    -0.59 |       0.90 |    1.60 |              3.51 |                5.80 |     -0.47 |    -0.43 |    -0.25 |          4.40 |      -0.32 |           -0.24 |               -0.12 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |             154.132 |           3227.868 | 0.046

# You can access the predicted clusters via 'predict()'.

plot(rez_hclust)

4.4 Hierarchical K-Means

rez_hkmeans <- cluster_analysis(data, n = 2, method = "hkmeans")

rez_hkmeans

# Clustering Solution

The 2 clusters accounted for 5.82% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |    akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler |  samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |  lowchen |   saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   177 |     3115.75 | 4.95e-03 |           -0.01 |          6.57e-03 |       4.74e-03 |       -8.54e-03 |        2.42e-03 |    0.02 |      -0.01 | 6.21e-03 |          6.39e-03 |               -0.04 |  5.31e-03 | 4.91e-03 | 2.82e-03 |         -0.10 |      -0.01 |           -0.03 |            1.31e-03 |              1.99e-03
2       |     2 |       69.34 |    -0.44 |            1.15 |             -0.58 |          -0.42 |            0.76 |           -0.21 |   -1.78 |       1.14 |    -0.55 |             -0.57 |                3.16 |     -0.47 |    -0.43 |    -0.25 |          8.53 |       0.97 |            2.32 |               -0.12 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |             196.908 |           3185.092 | 0.058

# You can access the predicted clusters via 'predict()'.

plot(rez_hkmeans)

4.5 K-Medoids (PAM)

rez_pam <- cluster_analysis(data, n = 2, method = "pam")

rez_pam

# Clustering Solution

The 2 clusters accounted for 4.02% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    97 |     1570.11 | -0.01 |           -0.51 |             -0.10 |          -0.25 |            0.25 |           -0.09 |    0.04 |       0.05 |    0.03 |         -4.79e-03 |               -0.14 |     -0.25 |   -0.36 |   0.08 |         -0.03 |       0.10 |            0.03 |               -0.01 |                 -0.06
2       |    82 |     1676.06 |  0.01 |            0.60 |              0.12 |           0.30 |           -0.30 |            0.11 |   -0.05 |      -0.06 |   -0.03 |          5.66e-03 |                0.17 |      0.29 |    0.43 |  -0.09 |          0.03 |      -0.12 |           -0.03 |                0.02 |                  0.08

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |             135.832 |           3246.168 | 0.040

# You can access the predicted clusters via 'predict()'.

plot(rez_pam)

4.6 Bootstrapped Hierarchical Clustering

rez_hclust2 <- cluster_analysis(data, 
                                n = NULL, 
                                method = "hclust", 
                                iterations = 500,
                                ci = 0.90)

rez_hclust2

# Clustering Solution

The 7 clusters accounted for 5.25% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |   167 |     3196.03 | -0.08 |        5.33e-03 |              0.01 |           0.03 |            0.02 |            0.03 |   -0.04 |  -2.35e-03 |   -0.01 |              0.02 |           -3.47e-03 |  8.27e-03 |    0.02 |   0.02 |          0.01 |       0.02 |        3.17e-03 |            8.33e-03 |                 -0.07
1       |     2 |        0.22 | -0.44 |            0.52 |             -0.58 |          -0.12 |           -0.41 |           -0.57 |    0.87 |      -0.07 |   -0.42 |             -0.05 |               -0.42 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |               -0.12 |                 -0.18
2       |     2 |        0.32 | -0.28 |           -0.34 |              0.26 |          -0.46 |           -0.04 |           -0.28 |    0.42 |       0.18 |   -0.45 |             -0.19 |                0.18 |      0.23 |    0.14 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |               -0.12 |                 -0.18
3       |     2 |        0.77 |  0.56 |           -0.21 |             -0.39 |          -0.83 |           -0.46 |           -0.38 |    1.10 |      -0.22 |   -0.11 |              0.04 |               -0.10 |      0.27 |   -0.43 |  -0.25 |         -0.18 |      -0.32 |            0.96 |               -0.12 |                 -0.18
4       |     2 |        1.12 |  0.97 |            0.83 |              1.02 |           0.02 |           -0.46 |        9.78e-03 |   -0.05 |       0.46 |   -0.11 |             -0.44 |                1.48 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |               -0.12 |                 -0.18
5       |     2 |        1.58 |  3.58 |           -0.50 |             -0.58 |          -0.23 |            0.37 |           -0.57 |   -0.23 |      -0.09 |    2.45 |             -0.57 |               -0.42 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |               -0.12 |                 -0.18
6       |     2 |        4.46 |  2.58 |           -0.75 |             -0.58 |          -0.74 |           -0.27 |           -0.41 |    0.86 |      -0.07 |   -0.19 |             -0.20 |               -0.42 |      0.22 |   -0.43 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |               -0.12 |                  6.43

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |             177.508 |              8.460 | 0.052

# You can access the predicted clusters via 'predict()'.

plot(rez_hclust2)

4.7 DBSCAN

eps <- n_clusters_dbscan(data, min_size = 0.01) 

eps

The DBSCAN method, based on the total clusters sum of squares, suggests that the optimal eps = 10.1164031203656 (with min. cluster size set to 2), which corresponds to 0 clusters.

plot(eps)

rez_dbscan <- cluster_analysis(data, method = "dbscan", dbscan_eps = 5)

rez_dbscan

# Clustering Solution

The 2 clusters accounted for 2.82% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |   lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |    38 |     2133.66 |  0.34 |            0.11 |              0.09 |           0.20 |            0.34 |            0.11 |   -0.72 |       0.05 |    0.25 |              0.41 |                0.27 |      0.01 | -9.08e-03 |   0.34 |          0.27 |       0.42 |            0.33 |                0.37 |                  0.49
1       |   141 |     1153.02 | -0.09 |           -0.03 |             -0.02 |          -0.05 |           -0.09 |           -0.03 |    0.19 |      -0.01 |   -0.07 |             -0.11 |               -0.07 | -3.76e-03 |  2.45e-03 |  -0.09 |         -0.07 |      -0.11 |           -0.09 |               -0.10 |                 -0.13

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |              95.311 |           1153.024 | 0.028

# You can access the predicted clusters via 'predict()'.

plot(rez_dbscan)

4.8 Hierarchical K-Means

rez_hdbscan <- cluster_analysis(data, method = "hdbscan")

rez_hdbscan

# Clustering Solution

The unique cluster accounted for 0.00% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |     akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |  maltese | rottweiler |  samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |  lowchen |   saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |   179 |        3382 | -4.02e-17 |       -6.48e-18 |          4.23e-17 |       2.98e-17 |        7.79e-17 |        4.27e-17 | 1.15e-16 |  -8.45e-17 | 5.32e-17 |         -1.50e-17 |            2.05e-17 |  3.35e-17 | 2.87e-17 | 9.80e-19 |      8.95e-18 |   1.30e-17 |       -1.72e-17 |                   0 |              1.30e-17

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |               0.000 |              0.000 | 0.000

# You can access the predicted clusters via 'predict()'.

# plot(rez_hdbscan)

4.9 K-Medoids with estimation of number of clusters (pamk)

rez_pamk <- cluster_analysis(data, method = "pamk")

rez_pamk

# Clustering Solution

The 10 clusters accounted for 39.07% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |   maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    62 |      484.15 |  0.07 |           -0.43 |             -0.06 |          -0.31 |            0.06 |           -0.12 |      0.30 |       0.03 |   -0.11 |             -0.12 |               -0.09 |     -0.38 |   -0.33 |  -0.16 |         -0.07 |      -0.02 |       -9.44e-03 |               -0.12 |                 -0.18
10      |     1 |        0.00 | -0.44 |            3.51 |             -0.58 |          -0.98 |           -1.08 |           -0.57 |     -1.19 |       3.05 |   -0.55 |             -0.57 |                6.74 |     -0.47 |   -0.43 |  -0.25 |          8.98 |      -0.32 |           -0.24 |               -0.12 |                 -0.18
2       |    11 |      119.31 | -0.34 |            0.19 |             -0.33 |           2.66 |           -1.13 |           -0.51 |      0.57 |      -0.90 |   -0.20 |              0.58 |                0.01 |     -0.47 |    0.10 |  -0.13 |         -0.18 |      -0.22 |           -0.24 |               -0.12 |                  0.58
3       |    46 |      495.06 |  0.07 |            0.49 |              0.35 |       2.64e-03 |           -0.21 |            0.13 | -8.46e-03 |       0.08 |   -0.24 |             -0.12 |                0.16 |     -0.21 |    0.79 |  -0.07 |         -0.03 |      -0.15 |           -0.14 |               -0.12 |                 -0.12
4       |    12 |      357.94 | -0.06 |           -0.47 |             -0.42 |          -0.76 |            2.44 |            1.37 |     -2.65 |       1.25 |   -0.44 |             -0.57 |               -0.25 |     -0.30 |   -0.43 |  -0.25 |         -0.18 |       1.09 |           -0.24 |               -0.12 |                 -0.18
5       |    34 |      456.17 | -0.14 |            0.27 |             -0.10 |           0.04 |           -0.31 |           -0.22 |      0.18 |      -0.25 |    0.85 |              0.43 |               -0.10 |      1.27 |   -0.25 |  -0.07 |         -0.14 |      -0.09 |           -0.21 |               -0.05 |                 -0.18
6       |     4 |       99.14 | -0.44 |            0.42 |             -0.20 |           0.14 |            0.46 |            0.17 |     -0.59 |      -0.51 |   -0.55 |             -0.13 |               -0.42 |      0.16 |   -0.12 |  -0.25 |          1.88 |       0.59 |            5.63 |               -0.12 |                 -0.18
7       |     4 |       17.60 | -0.44 |           -0.78 |              0.13 |          -0.19 |           -0.07 |           -0.06 |      0.37 |      -0.39 |   -0.01 |             -0.06 |               -0.42 |     -0.04 |   -0.43 |   5.64 |         -0.18 |      -0.05 |           -0.24 |               -0.12 |                 -0.18
8       |     3 |       20.39 |  1.58 |           -0.90 |              0.09 |          -0.52 |           -0.15 |           -0.34 |      0.52 |      -0.12 |   -0.08 |              0.27 |                0.15 |     -0.01 |    0.12 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |               -0.12 |                  6.63
9       |     2 |       10.87 |  0.30 |           -0.69 |              0.20 |           0.11 |           -0.19 |           -0.40 |      0.73 |      -0.49 |   -0.23 |             -0.57 |                0.36 |     -0.47 |   -0.43 |  -0.25 |         -0.18 |      -0.32 |           -0.24 |                9.11 |                 -0.18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |            1321.372 |           2060.628 | 0.391

# You can access the predicted clusters via 'predict()'.

plot(rez_pamk)

4.10 Mixture

p_load(mclust)

rez_mixture <- cluster_analysis(data, method = "mixture")

rez_mixture

# Clustering Solution

The unique cluster accounted for 0.00% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares |     akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound |  maltese | rottweiler |  samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow |  lowchen |   saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   179 |        3382 | -4.02e-17 |       -6.48e-18 |          4.23e-17 |       2.98e-17 |        7.79e-17 |        4.27e-17 | 1.15e-16 |  -8.45e-17 | 5.32e-17 |         -1.50e-17 |            2.05e-17 |  3.35e-17 | 2.87e-17 | 9.80e-19 |      8.95e-18 |   1.30e-17 |       -1.72e-17 |                   0 |              1.30e-17

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
3382.000          |               0.000 |           3382.000 | 0.000

# You can access the predicted clusters via 'predict()'.

plot(rez_mixture)

4.11 Metaclustering

list_of_results <- list(rez_kmeans, rez_hclust, rez_hkmeans, rez_pam,
                        rez_hclust2, rez_dbscan, rez_hdbscan, rez_mixture)

probability_matrix <- cluster_meta(list_of_results)

heatmap(probability_matrix, scale = "none", 
        col = grDevices::hcl.colors(256, palette = "inferno"))

5 Clustering II

data <- 
  wide_cost_p %>%
  st_drop_geometry() %>% 
  select(akita:last_col()) %>% 
  as_tibble()

p_load(mclust)

BIC <- mclustBIC(data, G = seq(from = 2, to = 9))

plot(BIC)

# BIC
summary(BIC)

Best BIC values:
            VEI,2      EEI,7      EEI,6
BIC      19781.85 19297.7036 18847.5907
BIC diff     0.00  -484.1458  -934.2586

mod1 <- Mclust(data, x = BIC)
mod1

'Mclust' model object: (VEI,2) 

Available components: 
 [1] "call"           "data"           "modelName"      "n"             
 [5] "d"              "G"              "BIC"            "loglik"        
 [9] "df"             "bic"            "icl"            "hypvol"        
[13] "parameters"     "z"              "classification" "uncertainty"

summary(mod1, parameters = TRUE)

---------------------------------------------------- 
Gaussian finite mixture model fitted by EM algorithm 
---------------------------------------------------- 

Mclust VEI (diagonal, equal shape) model with 2 components: 

 log-likelihood   n df      BIC      ICL
       10043.95 179 59 19781.85 19777.99

Clustering table:
  1   2 
117  62 

Mixing probabilities:
        1         2 
0.6541546 0.3458454 

Means:
                                         [,1]         [,2]
akita                 0.002505341815638923494 0.0048484110
british_bulldog       0.037505394220490657908 0.0349970021
dogue_de_bordeaux     0.009531525587163920676 0.0106058945
french_bulldog        0.056033093753594345743 0.0569377788
german_shepherd       0.193475508352204395024 0.2469663463
irish_wolfhound       0.021633373196265383298 0.0337355101
maltese               0.564473150499892883936 0.4513020939
rottweiler            0.078914243307080883527 0.0923734268
samoyed               0.011079551634817269914 0.0180179368
yorkshire_terrier     0.009257182415792304317 0.0141727888
chinese_crested_dog   0.003165296957289343883 0.0062020689
chow_chow             0.004529585726722043688 0.0053345070
lowchen               0.003865480531525179904 0.0056947053
saluki                0.000608239446469556206 0.0035649829
pharaoh_hound         0.000457795006272484367 0.0032889170
st_bernard            0.002101000367237471772 0.0074501985
tibetan_mastiff       0.000864236363207894313 0.0028020386
canadian_eskimo_dog   0.000000000818330443212 0.0004857234
black_russian_terrier 0.000000000000004364326 0.0012196693

Variances:
[,,1]
                              akita british_bulldog dogue_de_bordeaux
akita                 0.00001782492    0.0000000000     0.00000000000
british_bulldog       0.00000000000    0.0004073227     0.00000000000
dogue_de_bordeaux     0.00000000000    0.0000000000     0.00009781482
                      french_bulldog german_shepherd irish_wolfhound
akita                    0.000000000     0.000000000    0.0000000000
british_bulldog          0.000000000     0.000000000    0.0000000000
dogue_de_bordeaux        0.000000000     0.000000000    0.0000000000
                          maltese  rottweiler      samoyed yorkshire_terrier
akita                 0.000000000 0.000000000 0.0000000000      0.0000000000
british_bulldog       0.000000000 0.000000000 0.0000000000      0.0000000000
dogue_de_bordeaux     0.000000000 0.000000000 0.0000000000      0.0000000000
                      chinese_crested_dog     chow_chow       lowchen
akita                       0.00000000000 0.00000000000 0.00000000000
british_bulldog             0.00000000000 0.00000000000 0.00000000000
dogue_de_bordeaux           0.00000000000 0.00000000000 0.00000000000
                              saluki  pharaoh_hound  st_bernard tibetan_mastiff
akita                 0.000000000000 0.000000000000 0.000000000  0.000000000000
british_bulldog       0.000000000000 0.000000000000 0.000000000  0.000000000000
dogue_de_bordeaux     0.000000000000 0.000000000000 0.000000000  0.000000000000
                      canadian_eskimo_dog black_russian_terrier
akita                     0.0000000000000       0.0000000000000
british_bulldog           0.0000000000000       0.0000000000000
dogue_de_bordeaux         0.0000000000000       0.0000000000000
 [ reached getOption("max.print") -- omitted 16 rows ]
[,,2]
                             akita british_bulldog dogue_de_bordeaux
akita                 0.0001895734        0.000000        0.00000000
british_bulldog       0.0000000000        0.004332        0.00000000
dogue_de_bordeaux     0.0000000000        0.000000        0.00104029
                      french_bulldog german_shepherd irish_wolfhound    maltese
akita                     0.00000000      0.00000000     0.000000000 0.00000000
british_bulldog           0.00000000      0.00000000     0.000000000 0.00000000
dogue_de_bordeaux         0.00000000      0.00000000     0.000000000 0.00000000
                      rottweiler     samoyed yorkshire_terrier
akita                  0.0000000 0.000000000       0.000000000
british_bulldog        0.0000000 0.000000000       0.000000000
dogue_de_bordeaux      0.0000000 0.000000000       0.000000000
                      chinese_crested_dog    chow_chow      lowchen
akita                        0.0000000000 0.0000000000 0.0000000000
british_bulldog              0.0000000000 0.0000000000 0.0000000000
dogue_de_bordeaux            0.0000000000 0.0000000000 0.0000000000
                             saluki pharaoh_hound   st_bernard tibetan_mastiff
akita                 0.00000000000 0.00000000000 0.0000000000   0.00000000000
british_bulldog       0.00000000000 0.00000000000 0.0000000000   0.00000000000
dogue_de_bordeaux     0.00000000000 0.00000000000 0.0000000000   0.00000000000
                      canadian_eskimo_dog black_russian_terrier
akita                      0.000000000000        0.000000000000
british_bulldog            0.000000000000        0.000000000000
dogue_de_bordeaux          0.000000000000        0.000000000000
 [ reached getOption("max.print") -- omitted 16 rows ]

frq(mod1$classification)

x <numeric> 
# total N=179 valid N=179 mean=1.35 sd=0.48

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
    1 | 117 | 65.36 |   65.36 |  65.36
    2 |  62 | 34.64 |   34.64 | 100.00
 <NA> |   0 |  0.00 |    <NA> |   <NA>

plot(mod1, what = "classification")

# ICL <- mclustICL(data, G = seq(from = 2, to = 9))
# summary(ICL)
# plot(ICL)

# LRT <- mclustBootstrapLRT(data, modelName = "VEI")
# summary(LRT)
# plot(LRT)

cluster_results <- wide_cost_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  mutate(cluster = factor(mod1$classification)) 

aggregate(data = cluster_results, german_shepherd ~ cluster, mean)

  cluster german_shepherd
1       1       0.1924585
2       2       0.2488047

aggregate(data = cluster_results, french_bulldog ~ cluster, mean)

  cluster french_bulldog
1       1     0.05633092
2       2     0.05637439

# aggregate(data = cluster_results, maltese ~ cluster, mean)
# aggregate(data = cluster_results, rottweiler ~ cluster, mean)

6 Computing Environment

 R version 4.1.2 (2021-11-01)
 Platform: x86_64-w64-mingw32/x64 (64-bit)
 Running under: Windows 10 x64 (build 18363)
 
 Matrix products: default
 
 attached base packages:
 [1] stats     graphics  grDevices utils     datasets  methods   base     
 
 other attached packages:
  [1] mclust_5.4.9      parameters_0.16.0 modelbased_0.9.0  see_0.6.8        
  [5] correlation_0.7.1 tmap_3.3-2        sf_1.0-5          DT_0.20          
  [9] sjPlot_2.8.10     sjmisc_2.8.9      scales_1.1.1      magrittr_2.0.2   
 [13] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.7       purrr_0.3.4      
 [17] readr_2.1.2       tidyr_1.2.0       tibble_3.1.6      ggplot2_3.3.5    
 [21] tidyverse_1.3.1   pacman_0.5.1

To cite R in publications use:

R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.

To cite the ggplot2 package in publications use:

Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.

Dog-SEP

Dog cost

Radoslaw Panczak

04 February, 2022