1 Data

Preparations in file 00.Rmd

dog_ownership_cost <- read_rds("data/dog_ownership_cost.Rds") %>% 
  select(-expensive) %>% 
  # mutate(expensive = ifelse(cost_compared_to_other_breeds == "Below average", 0, 1)) %>%
  mutate(expensive = ifelse(cost_compared_to_other_breeds == "Significantly above average", 1, 0)) %>%
  select(-cost_compared_to_other_breeds)

length(unique(dog_ownership_cost$SSC_NAME16))
[1] 183
length(unique(dog_ownership_cost$dog_breed))
[1] 182
SSC <- read_rds("data/geo/SSC.Rds")

length(unique(SSC$SSC_NAME16))
[1] 183
wide_insurance_n <- read_rds("data/wide_insurance_n.Rds")
wide_insurance_p <- read_rds("data/wide_insurance_p.Rds")

Important to remember, we have small amount of missings here!

is.na(expensive) <lgl> 
# total N=106007 valid N=106007 mean=0.00 sd=0.04

Value |      N | Raw % | Valid % | Cum. %
-----------------------------------------
FALSE | 105809 | 99.81 |   99.81 |  99.81
TRUE  |    198 |  0.19 |    0.19 | 100.00
<NA>  |      0 |  0.00 |    <NA> |   <NA>

Breeds:

dog_breed <character> 
# total N=198 valid N=198 mean=5.87 sd=3.15

Value                      |   N | Raw % | Valid % | Cum. %
-----------------------------------------------------------
Pointer                    | 128 | 64.65 |   64.65 |  64.65
Akita                      |  55 | 27.78 |   27.78 |  92.42
Glen of Imaal Terrier      |   4 |  2.02 |    2.02 |  94.44
Canadian Eskimo Dog        |   3 |  1.52 |    1.52 |  95.96
Canaan Dog                 |   2 |  1.01 |    1.01 |  96.97
Central Asian Shepherd Dog |   2 |  1.01 |    1.01 |  97.98
Australian Staghound       |   1 |  0.51 |    0.51 |  98.48
Eurasier                   |   1 |  0.51 |    0.51 |  98.99
Portuguese Podengo         |   1 |  0.51 |    0.51 |  99.49
Swedish Lapphund           |   1 |  0.51 |    0.51 | 100.00
<NA>                       |   0 |  0.00 |    <NA> |   <NA>

1.1 All Brisbane dogs combined

1.2 Dog counts

Summarizing all dogs, and expensive only.

dog_ownership_agg <- dog_ownership_cost %>% 
  group_by(SSC_NAME16) %>% 
  summarise(dogs_exp = sum(expensive, na.rm = TRUE))

SSC %<>% 
  left_join(dog_ownership_agg) %>%
  mutate(dogs_exp_prop = dogs_exp / dogs_total) %>% 
  relocate(geometry, .after = last_col())

1.3 Proportion of expensive dogs

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.00000 0.06166 0.07618 0.07914 0.08865 0.33333 

1.3.1 Ranking

1.3.2 Map

2 Association with SEIFA

2.1 Functions

seifa_means <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>% 
    st_drop_geometry() %>% 
    group_by(!!myenc) %>% 
    summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
              sd = sd(dogs_exp_prop, na.rm = TRUE),
              p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
              p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
              p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>% 
    ungroup()
}

seifa_cor <- function (seifa_index) {
  
  myenc <- enquo(seifa_index)
  
  SSC %>%
    st_drop_geometry() %>%
    select(!!myenc, dogs_exp_prop) %>%
    mutate_if(is.factor, as.numeric) %>%
    correlation(method = "kendall")
  
}

seifa_plot <- function (seifa_index) {
  
  model <- eval(substitute(lm(dogs_exp_prop ~ seifa_index, 
                              data = SSC, na.action = na.omit)))
  means <- estimate_means(model)
  
  myenc <- enquo(seifa_index)
  
  ggplot(SSC,
         aes(x = !!myenc,
             y = dogs_exp_prop,
             fill = !!myenc)) +
    geom_violin(alpha = 0.66) +
    geom_jitter2(width = 0.05, alpha = 0.5) +
    geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
    geom_pointrange(data = means,
                    aes(y = Mean, ymin = CI_low, ymax = CI_high),
                    size = 1,
                    color = "white") + 
    scale_fill_brewer(palette = "BrBG") +
    ylab("Proportion of expensive dogs") +
    theme_modern()
  
}

2.2 IRSD

2.2.1 Recalculated

seifa_means(IRSD_d)
# A tibble: 10 x 6
   IRSD_d   mean     sd    p25    p50    p75
   <fct>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1      0.110  0.0380 0.0894 0.113  0.132 
 2 2      0.0845 0.0856 0.0508 0.0759 0.0870
 3 3      0.0760 0.0148 0.0672 0.0802 0.0863
 4 4      0.0745 0.0170 0.0617 0.0723 0.0880
 5 5      0.0781 0.0248 0.0701 0.0810 0.0880
 6 6      0.0730 0.0118 0.0646 0.0728 0.0812
 7 7      0.0746 0.0173 0.0618 0.0734 0.0845
 8 8      0.0748 0.0121 0.0679 0.0749 0.0809
 9 9      0.0652 0.0150 0.0584 0.0628 0.0684
10 10     0.0789 0.0258 0.0564 0.0699 0.0985
seifa_cor(IRSD_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSD_d     | dogs_exp_prop | -0.20 | [-0.29, -0.10] | -3.76 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d)

2.2.2 Original

seifa_means(IRSD_d_orig)
# A tibble: 10 x 6
   IRSD_d_orig   mean     sd    p25    p50    p75
   <fct>        <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1           0.125  0.0107 0.120  0.125  0.131 
 2 2           0.100  0.0480 0.0810 0.107  0.135 
 3 3           0.126  0.0511 0.0977 0.113  0.159 
 4 4           0.0985 0.0125 0.0927 0.0954 0.105 
 5 5           0.0837 0.0609 0.0660 0.0759 0.0845
 6 6           0.0817 0.0882 0.0304 0.0821 0.0901
 7 7           0.0758 0.0157 0.0602 0.0780 0.0867
 8 8           0.0743 0.0251 0.0643 0.0756 0.0835
 9 9           0.0732 0.0131 0.0640 0.0736 0.0857
10 10          0.0742 0.0192 0.0591 0.0699 0.0868
seifa_cor(IRSD_d_orig)
# Correlation Matrix (kendall-method)

Parameter1  |    Parameter2 |   tau |         95% CI |     z |         p
------------------------------------------------------------------------
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.09] | -3.39 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d_orig)

2.3 IRSAD

2.3.1 Recalculated

seifa_means(IRSAD_d)
# A tibble: 10 x 6
   IRSAD_d   mean     sd    p25    p50    p75
   <fct>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1       0.117  0.0582 0.0900 0.115  0.138 
 2 2       0.0843 0.0687 0.0753 0.0818 0.0901
 3 3       0.0724 0.0225 0.0585 0.0776 0.0863
 4 4       0.0766 0.0113 0.0647 0.08   0.0835
 5 5       0.0720 0.0170 0.0604 0.0677 0.0788
 6 6       0.0794 0.0195 0.0676 0.0769 0.0896
 7 7       0.0743 0.0145 0.0634 0.0749 0.0807
 8 8       0.0699 0.0132 0.0599 0.0709 0.0788
 9 9       0.0726 0.0214 0.0596 0.0663 0.0851
10 10      0.0734 0.0231 0.0564 0.0644 0.0870
seifa_cor(IRSAD_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IRSAD_d    | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d)

2.3.2 Original

seifa_means(IRSAD_d_orig)
# A tibble: 10 x 6
   IRSAD_d_orig   mean      sd    p25    p50    p75
   <fct>         <dbl>   <dbl>  <dbl>  <dbl>  <dbl>
 1 1            0.125  0.0107  0.120  0.125  0.131 
 2 2            0.105  0.0737  0.0646 0.103  0.144 
 3 3            0.121  0.0280  0.106  0.125  0.141 
 4 4            0.137  0.0306  0.127  0.137  0.148 
 5 5            0.102  0.0779  0.0857 0.0907 0.0966
 6 6            0.0855 0.0919  0.0326 0.0818 0.0951
 7 7            0.0812 0.00990 0.0767 0.0824 0.0870
 8 8            0.0701 0.0206  0.0607 0.0746 0.0832
 9 9            0.0756 0.0152  0.0632 0.0735 0.0863
10 10           0.0736 0.0187  0.0591 0.0699 0.0822
seifa_cor(IRSAD_d_orig)
# Correlation Matrix (kendall-method)

Parameter1   |    Parameter2 |   tau |         95% CI |     z |         p
-------------------------------------------------------------------------
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.12] | -3.98 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d_orig)

2.4 IER

2.4.1 Recalculated

seifa_means(IER_d)
# A tibble: 10 x 6
   IER_d   mean     sd    p25    p50    p75
   <fct>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1     0.0891 0.0343 0.0663 0.0924 0.116 
 2 2     0.0843 0.0343 0.0665 0.0765 0.0902
 3 3     0.0764 0.0135 0.0656 0.0776 0.0814
 4 4     0.0898 0.0477 0.0647 0.0818 0.0890
 5 5     0.0806 0.0714 0.0589 0.0762 0.0894
 6 6     0.0783 0.0144 0.0672 0.0794 0.0863
 7 7     0.0710 0.0259 0.0632 0.0673 0.0862
 8 8     0.0689 0.0235 0.0605 0.0691 0.0756
 9 9     0.0706 0.0141 0.0568 0.0704 0.0808
10 10    0.0806 0.0247 0.0596 0.0744 0.0942
seifa_cor(IER_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |      p
--------------------------------------------------------------------
IER_d      | dogs_exp_prop | -0.10 | [-0.20, -0.01] | -2.01 | 0.045*

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d)

2.4.2 Original

seifa_means(IER_d_orig)
# A tibble: 10 x 6
   IER_d_orig   mean     sd    p25    p50    p75
   <fct>       <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1          0.0891 0.0334 0.0675 0.0904 0.115 
 2 2          0.0833 0.0333 0.0628 0.0750 0.0910
 3 3          0.0869 0.0443 0.0753 0.0783 0.0833
 4 4          0.0797 0.0242 0.0631 0.0792 0.0867
 5 5          0.0790 0.0696 0.0600 0.072  0.0894
 6 6          0.0799 0.0187 0.0650 0.0818 0.0876
 7 7          0.0729 0.0157 0.0642 0.0716 0.0777
 8 8          0.0634 0.0343 0.0611 0.0673 0.0865
 9 9          0.0701 0.0142 0.0614 0.0682 0.0743
10 10         0.0762 0.0210 0.0591 0.0717 0.0889
seifa_cor(IER_d_orig)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |      p
--------------------------------------------------------------------
IER_d_orig | dogs_exp_prop | -0.11 | [-0.21, -0.02] | -2.17 | 0.030*

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d_orig)

2.5 IEO

2.5.1 Recalculated

seifa_means(IEO_d)
# A tibble: 10 x 6
   IEO_d   mean     sd    p25    p50    p75
   <fct>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 1     0.116  0.0583 0.0885 0.114  0.138 
 2 2     0.0819 0.0226 0.0761 0.0863 0.0914
 3 3     0.0784 0.0690 0.0589 0.0793 0.0832
 4 4     0.0728 0.0124 0.0636 0.0702 0.0790
 5 5     0.0801 0.0163 0.0645 0.0814 0.0927
 6 6     0.0774 0.0191 0.0610 0.0788 0.0866
 7 7     0.0699 0.0133 0.0587 0.0704 0.0804
 8 8     0.0746 0.0222 0.0669 0.0704 0.0792
 9 9     0.0729 0.0179 0.0633 0.0684 0.0799
10 10    0.0676 0.0192 0.0554 0.0590 0.0699
seifa_cor(IEO_d)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d      | dogs_exp_prop | -0.27 | [-0.35, -0.18] | -5.13 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d)

2.5.2 Original

seifa_means(IEO_d_orig)
# A tibble: 10 x 6
   IEO_d_orig   mean      sd    p25    p50    p75
   <fct>       <dbl>   <dbl>  <dbl>  <dbl>  <dbl>
 1 1          0.143  0.0356  0.122  0.131  0.151 
 2 2          0.0757 0.0535  0.0499 0.0793 0.103 
 3 3          0.0795 0.112   0.0398 0.0795 0.119 
 4 4          0.126  0.0553  0.0894 0.105  0.138 
 5 5          0.0952 0.0116  0.0894 0.0907 0.0922
 6 6          0.0897 0.00705 0.0852 0.0897 0.0942
 7 7          0.0767 0.0751  0.0652 0.0769 0.0836
 8 8          0.0765 0.0130  0.0647 0.0776 0.0843
 9 9          0.0751 0.0156  0.0614 0.0737 0.0862
10 10         0.0727 0.0186  0.0585 0.0691 0.0810
seifa_cor(IEO_d_orig)
# Correlation Matrix (kendall-method)

Parameter1 |    Parameter2 |   tau |         95% CI |     z |         p
-----------------------------------------------------------------------
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.36, -0.18] | -4.89 | < .001***

p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d_orig)

3 PCA

data <- 
  # wide_insurance_n %>%
  wide_insurance_p %>%
  st_drop_geometry() %>% 
  select(alaskan_malamute:last_col()) %>% 
  as_tibble()

# View(cov(data))
pca <- principal_components(data, 
                            standardize = FALSE,
                            n = 1)
pca
# Loadings from Principal Component Analysis (no rotation)

Variable             |    PC1    | Complexity
---------------------------------------------
alaskan_malamute     |   -0.03   |    1.00   
boxer                |   -0.15   |    1.00   
british_bulldog      |   -0.07   |    1.00   
bull_terrier         |   -0.05   |    1.00   
bullmastiff          |   -0.07   |    1.00   
dogue_de_bordeaux    |   -0.02   |    1.00   
french_bulldog       |   -0.10   |    1.00   
great_dane           |   -0.07   |    1.00   
irish_wolfhound      |   -0.04   |    1.00   
mastiff              |   -0.05   |    1.00   
rottweiler           |   -0.15   |    1.00   
shar_pei             |   -0.07   |    1.00   
yorkshire_terrier    |   -0.02   |    1.00   
airedale_terrier     |   -0.01   |    1.00   
irish_setter         | -7.16e-03 |    1.00   
pekingese            | -5.47e-03 |    1.00   
basset_hound         |   -0.01   |    1.00   
bedlington_terrier   | -4.79e-03 |    1.00   
bernese_mountain_dog | -5.38e-03 |    1.00   
italian_corso_dog    | -1.54e-03 |    1.00   
neapolitan_mastiff   |   -0.01   |    1.00   
newfoundland         | -2.76e-03 |    1.00   
weimaraner           |   -0.03   |    1.00   
poodle_standard      | -4.55e-03 |    1.00   
st_bernard           | -6.28e-03 |    1.00   
miniature_pinscher   | -8.28e-03 |    1.00   
welsh_corgi_pembroke | -1.09e-03 |    1.00   
welsh_corgi_cardigan | -2.61e-04 |    1.00   

The unique principal component accounted for 57.29% of the total variance of the original data.
summary(pca)
# (Explained) Variance of Components

Parameter                       |   PC1
---------------------------------------
Eigenvalues                     | 0.083
Variance Explained              | 0.573
Variance Explained (Cumulative) | 0.573
Variance Explained (Proportion) | 0.573
plot(pca)

pca_results <- wide_insurance_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  select(SSC_CODE16:caution) %>% 
  mutate(pca_raw = predict(pca)$Component_1, 
         pca = ntile(pca_raw, 10)) 

3.1 IRSD_d

ggplot(pca_results, aes(x = IRSD, y = pca_raw)) + 
  geom_point()

pca == IRSD_d <lgl> 
# total N=178 valid N=178 mean=0.09 sd=0.29

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 162 | 91.01 |   91.01 |  91.01
TRUE  |  16 |  8.99 |    8.99 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3.2 IRSAD_d

ggplot(pca_results, aes(x = IRSAD, y = pca_raw)) + 
  geom_point()

pca == IRSAD_d <lgl> 
# total N=178 valid N=178 mean=0.10 sd=0.30

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 160 | 89.89 |   89.89 |  89.89
TRUE  |  18 | 10.11 |   10.11 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3.3 IER_d

ggplot(pca_results, aes(x = IER, y = pca_raw)) + 
  geom_point()

pca == IER_d <lgl> 
# total N=178 valid N=178 mean=0.08 sd=0.28

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 91.57 |   91.57 |  91.57
TRUE  |  15 |  8.43 |    8.43 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

3.4 IEO_d

ggplot(pca_results, aes(x = IEO, y = pca_raw)) + 
  geom_point()

pca == IEO_d <lgl> 
# total N=178 valid N=178 mean=0.08 sd=0.28

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 91.57 |   91.57 |  91.57
TRUE  |  15 |  8.43 |    8.43 | 100.00
<NA>  |   0 |  0.00 |    <NA> |   <NA>

4 Clustering I

data <- 
  wide_insurance_p %>%
  st_drop_geometry() %>% 
  select(alaskan_malamute:last_col()) %>% 
  as_tibble()

4.1 No of clusters

n <- n_clusters(data, package = c("easystats", "NbClust", "mclust"))
n
# Method Agreement Procedure:

The choice of 2 clusters is supported by 9 (42.86%) methods out of 21 (Elbow, Silhouette, Duda, Pseudot2, Beale, Frey, Mcclain, Dunn, SDindex).
plot(n)

4.2 K-Means

rez_kmeans <- cluster_analysis(data, n = 2, method = "kmeans")

rez_kmeans
# Clustering Solution

The 2 clusters accounted for 3.46% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |     7 |      520.29 |            -0.76 |  1.87 |            0.29 |        -0.39 |       -0.38 |              0.15 |          -0.58 |      -1.15 |           -0.33 |   -0.80 |      -1.36 |     0.33 |             -0.26 |            -0.53 |        -0.38 |     -0.46 |        -0.52 |               1.27 |                 0.33 |             -0.23 |              -0.62 |        -0.22 |       1.93 |           -0.26 |       2.15 |               1.76 |                -0.18 |                -0.10
2       |   171 |     4264.01 |             0.03 | -0.08 |           -0.01 |         0.02 |        0.02 |         -5.97e-03 |           0.02 |       0.05 |            0.01 |    0.03 |       0.06 |    -0.01 |              0.01 |             0.02 |         0.02 |      0.02 |         0.02 |              -0.05 |                -0.01 |          9.36e-03 |               0.03 |     8.92e-03 |      -0.08 |            0.01 |      -0.09 |              -0.07 |             7.57e-03 |             4.22e-03

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |             171.698 |           4784.302 | 0.035

# You can access the predicted clusters via 'predict()'.
plot(rez_kmeans)

plot(summary(rez_kmeans))

cluster_results <- wide_insurance_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  mutate(cluster = predict(rez_kmeans)) 

frq(cluster_results, cluster)
cluster <integer> 
# total N=178 valid N=178 mean=1.96 sd=0.19

Value |   N | Raw % | Valid % | Cum. %
--------------------------------------
    1 |   7 |  3.93 |    3.93 |   3.93
    2 | 171 | 96.07 |   96.07 | 100.00
 <NA> |   0 |  0.00 |    <NA> |   <NA>
aggregate(data = cluster_results, french_bulldog ~ cluster, mean)
  cluster french_bulldog
1       1     0.04195011
2       2     0.10710798
aggregate(data = cluster_results, rottweiler ~ cluster, mean)
  cluster rottweiler
1       1  0.0260771
2       2  0.1489861

4.3 Hierarchical Clustering

rez_hclust <- cluster_analysis(data, n = 2, method = "hclust")

rez_hclust
# Clustering Solution

The 2 clusters accounted for 3.86% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute |    boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   175 |     4516.16 |             0.01 | 4.84e-03 |           -0.01 |    -2.27e-03 |    4.31e-03 |             -0.02 |           0.02 |       0.02 |            0.02 |    0.02 |       0.03 |    -0.03 |          8.17e-03 |         9.03e-03 |     6.49e-03 |  7.90e-03 |     9.00e-03 |              -0.05 |             6.40e-03 |          3.92e-03 |               0.01 |     3.73e-03 |      -0.08 |        4.38e-03 |      -0.04 |              -0.05 |             3.17e-03 |             1.77e-03
2       |     3 |      248.54 |            -0.76 |    -0.28 |            0.75 |         0.13 |       -0.25 |              1.33 |          -0.97 |      -1.33 |           -0.97 |   -1.14 |      -1.66 |     1.48 |             -0.48 |            -0.53 |        -0.38 |     -0.46 |        -0.52 |               2.70 |                -0.37 |             -0.23 |              -0.62 |        -0.22 |       4.93 |           -0.26 |       2.44 |               3.21 |                -0.18 |                -0.10

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |             191.303 |           4764.697 | 0.039

# You can access the predicted clusters via 'predict()'.
plot(rez_hclust)

4.4 Hierarchical K-Means

rez_hkmeans <- cluster_analysis(data, n = 2, method = "hkmeans")

rez_hkmeans
# Clustering Solution

The 2 clusters accounted for 3.86% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute |    boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   175 |     4516.16 |             0.01 | 4.84e-03 |           -0.01 |    -2.27e-03 |    4.31e-03 |             -0.02 |           0.02 |       0.02 |            0.02 |    0.02 |       0.03 |    -0.03 |          8.17e-03 |         9.03e-03 |     6.49e-03 |  7.90e-03 |     9.00e-03 |              -0.05 |             6.40e-03 |          3.92e-03 |               0.01 |     3.73e-03 |      -0.08 |        4.38e-03 |      -0.04 |              -0.05 |             3.17e-03 |             1.77e-03
2       |     3 |      248.54 |            -0.76 |    -0.28 |            0.75 |         0.13 |       -0.25 |              1.33 |          -0.97 |      -1.33 |           -0.97 |   -1.14 |      -1.66 |     1.48 |             -0.48 |            -0.53 |        -0.38 |     -0.46 |        -0.52 |               2.70 |                -0.37 |             -0.23 |              -0.62 |        -0.22 |       4.93 |           -0.26 |       2.44 |               3.21 |                -0.18 |                -0.10

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |             191.303 |           4764.697 | 0.039

# You can access the predicted clusters via 'predict()'.
plot(rez_hkmeans)

4.5 K-Medoids (PAM)

rez_pam <- cluster_analysis(data, n = 2, method = "pam")

rez_pam
# Clustering Solution

The 2 clusters accounted for 3.43% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    74 |     1919.91 |             0.31 | -0.20 |           -0.37 |        -0.14 |        0.30 |             -0.03 |          -0.39 |  -4.70e-03 |           -0.08 |    0.34 |       0.22 |     0.50 |              0.09 |            -0.35 |        -0.12 |     -0.12 |         0.31 |              -0.06 |                 0.26 |              0.02 |              -0.02 |        -0.04 |      -0.12 |           -0.05 |   4.52e-03 |              -0.16 |                -0.14 |                 0.05
2       |   104 |     2866.20 |            -0.22 |  0.14 |            0.26 |         0.10 |       -0.21 |              0.02 |           0.28 |   3.34e-03 |            0.06 |   -0.24 |      -0.16 |    -0.35 |             -0.06 |             0.25 |         0.09 |      0.09 |        -0.22 |               0.05 |                -0.18 |             -0.02 |               0.01 |         0.03 |       0.09 |            0.03 |  -3.21e-03 |               0.11 |                 0.10 |                -0.04

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |             169.887 |           4786.113 | 0.034

# You can access the predicted clusters via 'predict()'.
plot(rez_pam)

4.6 Bootstrapped Hierarchical Clustering

4.7 DBSCAN

eps <- n_clusters_dbscan(data, min_size = 0.01) 

eps
The DBSCAN method, based on the total clusters sum of squares, suggests that the optimal eps = 8.76170574003452 (with min. cluster size set to 2), which corresponds to 1 clusters.
plot(eps)

rez_dbscan <- cluster_analysis(data, method = "dbscan", dbscan_eps = 4.5)

rez_dbscan
# Clustering Solution

The 2 clusters accounted for 1.05% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute |     boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |    92 |     4060.21 |            -0.02 | -2.53e-04 |           -0.11 |        -0.09 |       -0.06 |              0.03 |           0.14 |      -0.12 |           -0.02 |    0.01 |      -0.17 |     0.01 |              0.08 |             0.12 |         0.13 |      0.18 |     6.51e-04 |               0.11 |                 0.05 |              0.09 |              -0.16 |         0.07 |  -6.32e-03 |            0.10 |       0.09 |               0.09 |                 0.15 |                 0.10
1       |    86 |      843.87 |             0.03 |  2.70e-04 |            0.12 |         0.10 |        0.06 |             -0.03 |          -0.15 |       0.13 |            0.02 |   -0.01 |       0.18 |    -0.01 |             -0.09 |            -0.13 |        -0.14 |     -0.20 |    -6.96e-04 |              -0.12 |                -0.05 |             -0.10 |               0.18 |        -0.08 |   6.76e-03 |           -0.11 |      -0.10 |              -0.09 |                -0.16 |                -0.10

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |              51.917 |            843.870 | 0.010

# You can access the predicted clusters via 'predict()'.
plot(rez_dbscan)

4.8 Hierarchical K-Means

rez_hdbscan <- cluster_analysis(data, method = "hdbscan")

rez_hdbscan
# Clustering Solution

The unique cluster accounted for 0.00% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute |    boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound |   mastiff | rottweiler |  shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0       |   178 |        4956 |         2.54e-17 | 6.80e-18 |        4.50e-17 |     6.84e-17 |    5.18e-18 |         -3.69e-17 |       2.36e-17 |   5.81e-17 |        1.67e-17 | -5.60e-17 |  -2.27e-17 | -3.01e-17 |          8.27e-18 |        -4.44e-17 |     3.02e-19 |  3.89e-17 |     3.95e-17 |          -1.12e-17 |            -4.61e-18 |          7.53e-18 |           7.46e-17 |     1.17e-17 |  -2.57e-17 |        6.33e-18 |   7.07e-19 |          -1.51e-17 |             1.27e-17 |            -6.26e-18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |               0.000 |              0.000 | 0.000

# You can access the predicted clusters via 'predict()'.
# plot(rez_hdbscan)

4.9 K-Medoids with estimation of number of clusters (pamk)

rez_pamk <- cluster_analysis(data, method = "pamk")

rez_pamk 
# Clustering Solution

The 10 clusters accounted for 26.62% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |    39 |      819.30 |             0.53 | -0.10 |           -0.41 |        -0.04 |        0.43 |              0.13 |          -0.42 |      -0.24 |           -0.45 |    0.32 |       0.31 |     0.51 |             -0.03 |            -0.28 |        -0.25 |     -0.39 |         0.70 |              -0.05 |                 0.13 |         -3.60e-03 |              -0.21 |         0.02 |      -0.18 |        9.18e-03 |      -0.27 |              -0.14 |                -0.10 |                -0.10
10      |     1 |        0.00 |            -0.76 |  0.85 |           -1.31 |        -1.20 |       -0.79 |             -0.75 |          -0.97 |       1.44 |            2.82 |    0.49 |      -0.77 |    -1.13 |             -0.48 |            -0.53 |        -0.38 |     -0.46 |        -0.52 |              -0.29 |                -0.37 |             -0.23 |               3.58 |        10.72 |      -0.70 |           -0.26 |      -0.34 |               2.52 |                -0.18 |                -0.10
2       |    33 |     1007.90 |             0.14 | -0.23 |           -0.41 |         0.08 |       -0.14 |             -0.33 |          -0.29 |       0.54 |            0.59 |    0.27 |      -0.18 |     0.22 |             -0.03 |            -0.26 |         0.47 |      0.71 |        -0.24 |               0.02 |                 0.57 |             -0.23 |              -0.36 |        -0.08 |       0.14 |           -0.14 |       0.70 |               0.01 |                 0.13 |                -0.10
3       |     4 |       47.41 |             0.17 | -0.58 |           -0.46 |        -0.60 |        0.02 |              1.26 |           0.34 |       0.16 |           -0.20 |   -0.14 |      -0.11 |    -0.26 |              0.27 |            -0.34 |        -0.38 |      1.67 |         0.69 |              -0.03 |                -0.04 |              5.45 |               1.10 |         0.12 |      -0.49 |            0.02 |      -0.34 |               0.26 |                -0.18 |                -0.10
4       |    55 |      943.44 |            -0.30 |  0.44 |            0.28 |         0.06 |       -0.21 |              0.07 |           0.07 |      -0.09 |            0.15 |   -0.41 |       0.04 |    -0.36 |             -0.14 |             0.56 |         0.02 |     -0.24 |        -0.12 |              -0.01 |                -0.17 |             -0.10 |              -0.34 |        -0.04 |       0.14 |           -0.13 |      -0.01 |              -0.03 |                 0.02 |                -0.10
5       |    12 |      301.83 |            -0.76 | -0.73 |            0.99 |         0.04 |       -0.18 |             -0.47 |           2.44 |      -1.17 |           -0.80 |    0.27 |      -1.19 |    -0.22 |              1.53 |        -9.05e-03 |        -0.19 |     -0.07 |        -0.43 |              -0.29 |                -0.37 |             -0.23 |              -0.62 |        -0.22 |      -0.18 |            0.26 |      -0.24 |              -0.11 |                 0.23 |                -0.10
6       |    30 |      495.73 |             0.09 | -0.03 |            0.07 |        -0.11 |        0.07 |             -0.01 |          -0.15 |       0.39 |           -0.08 |    0.04 |       0.28 |    -0.08 |             -0.26 |            -0.29 |        -0.04 |      0.06 |        -0.26 |              -0.06 |                -0.26 |             -0.15 |               1.27 |        -0.12 |      -0.32 |            0.29 |      -0.19 |              -0.20 |                -0.09 |                -0.10
7       |     2 |       20.89 |            -0.52 | -0.19 |           -0.28 |         0.38 |        0.05 |             -0.37 |          -0.49 |      -0.20 |            1.14 |   -0.47 |       0.91 |    -0.14 |             -0.17 |             0.15 |        -0.38 |     -0.46 |        -0.52 |              -0.29 |                -0.37 |             -0.23 |               0.63 |        -0.22 |       0.37 |            0.20 |      -0.34 |               0.28 |                -0.18 |                 9.08
8       |     1 |        0.00 |            -0.76 | -1.34 |            1.54 |        -1.20 |        0.82 |              5.49 |          -0.97 |      -1.33 |           -0.97 |   -1.14 |      -1.66 |     1.22 |             -0.48 |            -0.53 |        -0.38 |     -0.46 |        -0.52 |              -0.29 |                -0.37 |             -0.23 |              -0.62 |        -0.22 |       2.28 |           -0.26 |      -0.34 |              10.35 |                -0.18 |                -0.10
9       |     1 |        0.00 |            -0.76 |  0.24 |            2.02 |         2.81 |       -0.79 |             -0.75 |          -0.97 |      -1.33 |           -0.97 |   -1.14 |      -1.66 |    -1.13 |             -0.48 |            -0.53 |        -0.38 |     -0.46 |        -0.52 |               8.68 |                -0.37 |             -0.23 |              -0.62 |        -0.22 |       6.26 |           -0.26 |      -0.34 |              -0.36 |                -0.18 |                -0.10

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |            1319.503 |           3636.497 | 0.266

# You can access the predicted clusters via 'predict()'.
plot(rez_pamk)

4.10 Mixture

p_load(mclust)

rez_mixture <- cluster_analysis(data, method = "mixture")

rez_mixture
# Clustering Solution

The unique cluster accounted for 0.00% of the total variance of the original data.

Cluster | n_Obs | Sum_Squares | alaskan_malamute |    boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound |   mastiff | rottweiler |  shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1       |   178 |        4956 |         2.54e-17 | 6.80e-18 |        4.50e-17 |     6.84e-17 |    5.18e-18 |         -3.69e-17 |       2.36e-17 |   5.81e-17 |        1.67e-17 | -5.60e-17 |  -2.27e-17 | -3.01e-17 |          8.27e-18 |        -4.44e-17 |     3.02e-19 |  3.89e-17 |     3.95e-17 |          -1.12e-17 |            -4.61e-18 |          7.53e-18 |           7.46e-17 |     1.17e-17 |  -2.57e-17 |        6.33e-18 |   7.07e-19 |          -1.51e-17 |             1.27e-17 |            -6.26e-18

# Indices of model performance

Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within |    R2
--------------------------------------------------------------------
4956.000          |               0.000 |           4956.000 | 0.000

# You can access the predicted clusters via 'predict()'.
plot(rez_mixture)

4.11 Metaclustering

list_of_results <- list(rez_kmeans, rez_hclust, rez_hkmeans, rez_pam,
                        # rez_hclust2, 
                        rez_dbscan, rez_hdbscan, rez_mixture)

probability_matrix <- cluster_meta(list_of_results)

heatmap(probability_matrix, scale = "none", 
        col = grDevices::hcl.colors(256, palette = "inferno"))

5 Clustering II

data <- 
  wide_insurance_p %>%
  st_drop_geometry() %>% 
  select(alaskan_malamute:last_col()) %>% 
  as_tibble()
p_load(mclust)

BIC <- mclustBIC(data, G = seq(from = 2, to = 9))

plot(BIC)

# BIC
summary(BIC)
Best BIC values:
            VEI,5       VEI,4       VEI,6
BIC      23064.34 23038.86932 23024.00379
BIC diff     0.00   -25.46905   -40.33457
mod1 <- Mclust(data, x = BIC)
mod1
'Mclust' model object: (VEI,5) 

Available components: 
 [1] "call"           "data"           "modelName"      "n"             
 [5] "d"              "G"              "BIC"            "loglik"        
 [9] "df"             "bic"            "icl"            "hypvol"        
[13] "parameters"     "z"              "classification" "uncertainty"   
summary(mod1, parameters = TRUE)
---------------------------------------------------- 
Gaussian finite mixture model fitted by EM algorithm 
---------------------------------------------------- 

Mclust VEI (diagonal, equal shape) model with 5 components: 

 log-likelihood   n  df      BIC      ICL
       11988.17 178 176 23064.34 23059.05

Clustering table:
 1  2  3  4  5 
89 13  7 53 16 

Mixing probabilities:
         1          2          3          4          5 
0.49291878 0.07303509 0.03904661 0.30493951 0.09006000 

Means:
                             [,1]                [,2]
alaskan_malamute     0.0280260663 0.01523890420481271
boxer                0.1380130126 0.09251023230370482
british_bulldog      0.0703786970 0.06307841676974323
bull_terrier         0.0548620098 0.04967029096208535
bullmastiff          0.0750592832 0.11720898243497578
dogue_de_bordeaux    0.0160290654 0.01716535731557738
french_bulldog       0.0852446464 0.12670960528058386
great_dane           0.0849772815 0.04689081037828381
irish_wolfhound      0.0427095269 0.03036521620785435
mastiff              0.0541565500 0.01781719431315327
rottweiler           0.1672558659 0.06472812166570897
shar_pei             0.0736443850 0.06730510039360194
yorkshire_terrier    0.0149442332 0.05476075199356949
airedale_terrier     0.0087792248 0.01840293688166407
irish_setter         0.0041701205 0.00485050287557857
                                                                                                                                                                                                                                    [,3]
alaskan_malamute     0.02307868954891480542390702623833931284025311470031738281250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
boxer                0.09317627409444825814510693362535675987601280212402343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
british_bulldog      0.06512704622438660051475523005137802101671695709228515625000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bull_terrier         0.03518900869576900569679622776675387285649776458740234375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bullmastiff          0.06475947220930737546762401279920595698058605194091796875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dogue_de_bordeaux    0.03949146590040625987017008924340188968926668167114257812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
french_bulldog       0.14895948398939018453290827892487868666648864746093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
great_dane           0.07620511741503216418536226228752639144659042358398437500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_wolfhound      0.04659996204048265366903791573349735699594020843505859375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
mastiff              0.05387535876531312356574332511627289932221174240112304687500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
rottweiler           0.13483370784932108277232032378378789871931076049804687500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
shar_pei             0.05613689976338936266087031867755285929888486862182617187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
yorkshire_terrier    0.02299205053934313958130530863854801282286643981933593750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
airedale_terrier     0.01392807654745298502618755520643389900214970111846923828125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_setter         0.00000000000000000000183731842700712235862819943221779794839676469564437866210937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
                                                                                                                                  [,4]
alaskan_malamute     0.034972438226513739845113803994536283425986766815185546875000000000000000000000000000000000000000000000000000000
boxer                0.162774405083149720230295542933163233101367950439453125000000000000000000000000000000000000000000000000000000000
british_bulldog      0.064281559077350669539896443893667310476303100585937500000000000000000000000000000000000000000000000000000000000
bull_terrier         0.043268086948827569337971965524047845974564552307128906250000000000000000000000000000000000000000000000000000000
bullmastiff          0.057933001416211514500442802955149090848863124847412109375000000000000000000000000000000000000000000000000000000
dogue_de_bordeaux    0.015720621420363591752566989612205361481755971908569335937500000000000000000000000000000000000000000000000000000
french_bulldog       0.132188651912820254175073841906851157546043395996093750000000000000000000000000000000000000000000000000000000000
great_dane           0.060850530011690669163204603364647482521831989288330078125000000000000000000000000000000000000000000000000000000
irish_wolfhound      0.031272186141217704624661877232938422821462154388427734375000000000000000000000000000000000000000000000000000000
mastiff              0.062307360547957629792747979990963358432054519653320312500000000000000000000000000000000000000000000000000000000
rottweiler           0.120872611026586973537177982507273554801940917968750000000000000000000000000000000000000000000000000000000000000
shar_pei             0.060537649694227757546372004071599803864955902099609375000000000000000000000000000000000000000000000000000000000
yorkshire_terrier    0.021856493110238572885606345153064467012882232666015625000000000000000000000000000000000000000000000000000000000
airedale_terrier     0.020748329175947131247648158591800893191248178482055664062500000000000000000000000000000000000000000000000000000
irish_setter         0.014770551368987038659552624153548094909638166427612304687500000000000000000000000000000000000000000000000000000
                                                                                                                                                                                                                                                                                                       [,5]
alaskan_malamute     0.018890382428315834889565039134140533860772848129272460937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
boxer                0.142958598962115990271826149182743392884731292724609375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
british_bulldog      0.045954123313447801935094361169831245206296443939208984375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bull_terrier         0.053382937498291524458515766582422656938433647155761718750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bullmastiff          0.046800440036258564302507068077829899266362190246582031250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dogue_de_bordeaux    0.017505275546984531281902164323582837823778390884399414062500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
french_bulldog       0.079355418404189678738269719815434655174612998962402343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
great_dane           0.078319430557538871617850873008137568831443786621093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_wolfhound      0.054518537634597484531351341274785227142274379730224609375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
mastiff              0.051766762430036472597549845886533148586750030517578125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
rottweiler           0.164978520707840248249453907192219048738479614257812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
shar_pei             0.075679002581283363881858861077489564195275306701660156250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
yorkshire_terrier    0.037399094399947867273237278595843235962092876434326171875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
airedale_terrier     0.009802017106132451459976628882486693328246474266052246093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_setter         0.005766900955896783753651391180028440430760383605957031250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
 [ reached getOption("max.print") -- omitted 13 rows ]

Variances:
[,,1]
                     alaskan_malamute       boxer british_bulldog bull_terrier
alaskan_malamute         0.0006448616 0.000000000      0.00000000  0.000000000
boxer                    0.0000000000 0.004448486      0.00000000  0.000000000
                     bullmastiff dogue_de_bordeaux french_bulldog  great_dane
alaskan_malamute     0.000000000       0.000000000    0.000000000 0.000000000
boxer                0.000000000       0.000000000    0.000000000 0.000000000
                     irish_wolfhound     mastiff  rottweiler    shar_pei
alaskan_malamute        0.0000000000 0.000000000 0.000000000 0.000000000
boxer                   0.0000000000 0.000000000 0.000000000 0.000000000
                     yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute          0.0000000000     0.0000000000 0.0000000000
boxer                     0.0000000000     0.0000000000 0.0000000000
                         pekingese basset_hound bedlington_terrier
alaskan_malamute     0.00000000000 0.0000000000      0.00000000000
boxer                0.00000000000 0.0000000000      0.00000000000
                     bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute            0.00000000000    0.000000000000       0.0000000000
boxer                       0.00000000000    0.000000000000       0.0000000000
                      newfoundland   weimaraner poodle_standard   st_bernard
alaskan_malamute     0.00000000000 0.0000000000   0.00000000000 0.0000000000
boxer                0.00000000000 0.0000000000   0.00000000000 0.0000000000
                     miniature_pinscher welsh_corgi_pembroke
alaskan_malamute            0.000000000       0.000000000000
boxer                       0.000000000       0.000000000000
                     welsh_corgi_cardigan
alaskan_malamute            0.00000000000
boxer                       0.00000000000
 [ reached getOption("max.print") -- omitted 26 rows ]
[,,2]
                     alaskan_malamute     boxer british_bulldog bull_terrier
alaskan_malamute           0.02196267 0.0000000      0.00000000   0.00000000
boxer                      0.00000000 0.1515064      0.00000000   0.00000000
                     bullmastiff dogue_de_bordeaux french_bulldog great_dane
alaskan_malamute      0.00000000        0.00000000      0.0000000  0.0000000
boxer                 0.00000000        0.00000000      0.0000000  0.0000000
                     irish_wolfhound    mastiff rottweiler  shar_pei
alaskan_malamute          0.00000000 0.00000000  0.0000000 0.0000000
boxer                     0.00000000 0.00000000  0.0000000 0.0000000
                     yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute            0.00000000      0.000000000  0.000000000
boxer                       0.00000000      0.000000000  0.000000000
                       pekingese basset_hound bedlington_terrier
alaskan_malamute     0.000000000  0.000000000        0.000000000
boxer                0.000000000  0.000000000        0.000000000
                     bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute             0.0000000000      0.0000000000        0.000000000
boxer                        0.0000000000      0.0000000000        0.000000000
                     newfoundland weimaraner poodle_standard  st_bernard
alaskan_malamute     0.0000000000 0.00000000     0.000000000 0.000000000
boxer                0.0000000000 0.00000000     0.000000000 0.000000000
                     miniature_pinscher welsh_corgi_pembroke
alaskan_malamute            0.000000000         0.0000000000
boxer                       0.000000000         0.0000000000
                     welsh_corgi_cardigan
alaskan_malamute           0.000000000000
boxer                      0.000000000000
 [ reached getOption("max.print") -- omitted 26 rows ]
[,,3]
                     alaskan_malamute       boxer british_bulldog bull_terrier
alaskan_malamute          0.001298257 0.000000000     0.000000000  0.000000000
boxer                     0.000000000 0.008955844     0.000000000  0.000000000
                     bullmastiff dogue_de_bordeaux french_bulldog  great_dane
alaskan_malamute     0.000000000      0.0000000000    0.000000000 0.000000000
boxer                0.000000000      0.0000000000    0.000000000 0.000000000
                     irish_wolfhound     mastiff  rottweiler    shar_pei
alaskan_malamute         0.000000000 0.000000000 0.000000000 0.000000000
boxer                    0.000000000 0.000000000 0.000000000 0.000000000
                     yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute          0.0000000000     0.0000000000 0.0000000000
boxer                     0.0000000000     0.0000000000 0.0000000000
                        pekingese basset_hound bedlington_terrier
alaskan_malamute     0.0000000000 0.0000000000       0.0000000000
boxer                0.0000000000 0.0000000000       0.0000000000
                     bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute            0.00000000000     0.00000000000       0.0000000000
boxer                       0.00000000000     0.00000000000       0.0000000000
                      newfoundland  weimaraner poodle_standard   st_bernard
alaskan_malamute     0.00000000000 0.000000000    0.0000000000 0.0000000000
boxer                0.00000000000 0.000000000    0.0000000000 0.0000000000
                     miniature_pinscher welsh_corgi_pembroke
alaskan_malamute           0.0000000000       0.000000000000
boxer                      0.0000000000       0.000000000000
                     welsh_corgi_cardigan
alaskan_malamute          0.0000000000000
boxer                     0.0000000000000
 [ reached getOption("max.print") -- omitted 26 rows ]
[,,4]
                     alaskan_malamute      boxer british_bulldog bull_terrier
alaskan_malamute          0.002151014 0.00000000     0.000000000  0.000000000
boxer                     0.000000000 0.01483846     0.000000000  0.000000000
                     bullmastiff dogue_de_bordeaux french_bulldog  great_dane
alaskan_malamute      0.00000000      0.0000000000     0.00000000 0.000000000
boxer                 0.00000000      0.0000000000     0.00000000 0.000000000
                     irish_wolfhound     mastiff rottweiler    shar_pei
alaskan_malamute          0.00000000 0.000000000 0.00000000 0.000000000
boxer                     0.00000000 0.000000000 0.00000000 0.000000000
                     yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute           0.000000000     0.0000000000 0.0000000000
boxer                      0.000000000     0.0000000000 0.0000000000
                        pekingese basset_hound bedlington_terrier
alaskan_malamute     0.0000000000 0.0000000000       0.0000000000
boxer                0.0000000000 0.0000000000       0.0000000000
                     bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute            0.00000000000     0.00000000000       0.0000000000
boxer                       0.00000000000     0.00000000000       0.0000000000
                      newfoundland  weimaraner poodle_standard  st_bernard
alaskan_malamute     0.00000000000 0.000000000    0.0000000000 0.000000000
boxer                0.00000000000 0.000000000    0.0000000000 0.000000000
                     miniature_pinscher welsh_corgi_pembroke
alaskan_malamute           0.0000000000        0.00000000000
boxer                      0.0000000000        0.00000000000
                     welsh_corgi_cardigan
alaskan_malamute          0.0000000000000
boxer                     0.0000000000000
 [ reached getOption("max.print") -- omitted 26 rows ]
[,,5]
                     alaskan_malamute      boxer british_bulldog bull_terrier
alaskan_malamute         0.0009597299 0.00000000     0.000000000  0.000000000
boxer                    0.0000000000 0.00662056     0.000000000  0.000000000
                     bullmastiff dogue_de_bordeaux french_bulldog  great_dane
alaskan_malamute     0.000000000      0.0000000000    0.000000000 0.000000000
boxer                0.000000000      0.0000000000    0.000000000 0.000000000
                     irish_wolfhound     mastiff  rottweiler    shar_pei
alaskan_malamute         0.000000000 0.000000000 0.000000000 0.000000000
boxer                    0.000000000 0.000000000 0.000000000 0.000000000
                     yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute          0.0000000000     0.0000000000 0.0000000000
boxer                     0.0000000000     0.0000000000 0.0000000000
                        pekingese basset_hound bedlington_terrier
alaskan_malamute     0.0000000000 0.0000000000       0.0000000000
boxer                0.0000000000 0.0000000000       0.0000000000
                     bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute            0.00000000000     0.00000000000       0.0000000000
boxer                       0.00000000000     0.00000000000       0.0000000000
                      newfoundland  weimaraner poodle_standard   st_bernard
alaskan_malamute     0.00000000000 0.000000000   0.00000000000 0.0000000000
boxer                0.00000000000 0.000000000   0.00000000000 0.0000000000
                     miniature_pinscher welsh_corgi_pembroke
alaskan_malamute           0.0000000000       0.000000000000
boxer                      0.0000000000       0.000000000000
                     welsh_corgi_cardigan
alaskan_malamute          0.0000000000000
boxer                     0.0000000000000
 [ reached getOption("max.print") -- omitted 26 rows ]
frq(mod1$classification)
x <numeric> 
# total N=178 valid N=178 mean=2.40 sd=1.55

Value |  N | Raw % | Valid % | Cum. %
-------------------------------------
    1 | 89 | 50.00 |   50.00 |  50.00
    2 | 13 |  7.30 |    7.30 |  57.30
    3 |  7 |  3.93 |    3.93 |  61.24
    4 | 53 | 29.78 |   29.78 |  91.01
    5 | 16 |  8.99 |    8.99 | 100.00
 <NA> |  0 |  0.00 |    <NA> |   <NA>
plot(mod1, what = "classification")

# ICL <- mclustICL(data, G = seq(from = 2, to = 9))
# summary(ICL)
# plot(ICL)

# LRT <- mclustBootstrapLRT(data, modelName = "VEI")
# summary(LRT)
# plot(LRT)
cluster_results <- wide_insurance_p %>%
  st_drop_geometry() %>% 
  as_tibble() %>% 
  mutate(cluster = factor(mod1$classification)) 

aggregate(data = cluster_results, french_bulldog ~ cluster, mean)
  cluster french_bulldog
1       1     0.08537539
2       2     0.12670835
3       3     0.14993248
4       4     0.13293652
5       5     0.07927085
aggregate(data = cluster_results, rottweiler ~ cluster, mean)
  cluster rottweiler
1       1 0.16670138
2       2 0.06472732
3       3 0.13460077
4       4 0.12072372
5       5 0.16504495

6 Computing Environment

 R version 4.1.2 (2021-11-01)
 Platform: x86_64-w64-mingw32/x64 (64-bit)
 Running under: Windows 10 x64 (build 18363)
 
 Matrix products: default
 
 attached base packages:
 [1] stats     graphics  grDevices utils     datasets  methods   base     
 
 other attached packages:
  [1] mclust_5.4.9      parameters_0.16.0 modelbased_0.9.0  see_0.6.8        
  [5] correlation_0.7.1 tmap_3.3-2        sf_1.0-5          DT_0.20          
  [9] sjPlot_2.8.10     sjmisc_2.8.9      scales_1.1.1      magrittr_2.0.2   
 [13] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.7       purrr_0.3.4      
 [17] readr_2.1.2       tidyr_1.2.0       tibble_3.1.6      ggplot2_3.3.5    
 [21] tidyverse_1.3.1   pacman_0.5.1     
 
To cite R in publications use:

R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.

To cite the ggplot2 package in publications use:

Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.