Preparations in file 00.Rmd
<- read_rds("data/dog_ownership_cost.Rds") %>%
dog_ownership_cost select(-expensive) %>%
# mutate(expensive = ifelse(cost_compared_to_other_breeds == "Below average", 0, 1)) %>%
mutate(expensive = ifelse(cost_compared_to_other_breeds == "Significantly above average", 1, 0)) %>%
select(-cost_compared_to_other_breeds)
length(unique(dog_ownership_cost$SSC_NAME16))
[1] 183
length(unique(dog_ownership_cost$dog_breed))
[1] 182
<- read_rds("data/geo/SSC.Rds")
SSC
length(unique(SSC$SSC_NAME16))
[1] 183
<- read_rds("data/wide_insurance_n.Rds")
wide_insurance_n <- read_rds("data/wide_insurance_p.Rds") wide_insurance_p
Important to remember, we have small amount of missings here!
is.na(expensive) <lgl>
# total N=106007 valid N=106007 mean=0.00 sd=0.04
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------
FALSE | 105809 | 99.81 | 99.81 | 99.81
TRUE | 198 | 0.19 | 0.19 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Breeds:
dog_breed <character>
# total N=198 valid N=198 mean=5.87 sd=3.15
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------------------------
Pointer | 128 | 64.65 | 64.65 | 64.65
Akita | 55 | 27.78 | 27.78 | 92.42
Glen of Imaal Terrier | 4 | 2.02 | 2.02 | 94.44
Canadian Eskimo Dog | 3 | 1.52 | 1.52 | 95.96
Canaan Dog | 2 | 1.01 | 1.01 | 96.97
Central Asian Shepherd Dog | 2 | 1.01 | 1.01 | 97.98
Australian Staghound | 1 | 0.51 | 0.51 | 98.48
Eurasier | 1 | 0.51 | 0.51 | 98.99
Portuguese Podengo | 1 | 0.51 | 0.51 | 99.49
Swedish Lapphund | 1 | 0.51 | 0.51 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Summarizing all dogs, and expensive only.
<- dog_ownership_cost %>%
dog_ownership_agg group_by(SSC_NAME16) %>%
summarise(dogs_exp = sum(expensive, na.rm = TRUE))
%<>%
SSC left_join(dog_ownership_agg) %>%
mutate(dogs_exp_prop = dogs_exp / dogs_total) %>%
relocate(geometry, .after = last_col())
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00000 0.06166 0.07618 0.07914 0.08865 0.33333
<- function (seifa_index) {
seifa_means
<- enquo(seifa_index)
myenc
%>%
SSC st_drop_geometry() %>%
group_by(!!myenc) %>%
summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
sd = sd(dogs_exp_prop, na.rm = TRUE),
p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>%
ungroup()
}
<- function (seifa_index) {
seifa_cor
<- enquo(seifa_index)
myenc
%>%
SSC st_drop_geometry() %>%
select(!!myenc, dogs_exp_prop) %>%
mutate_if(is.factor, as.numeric) %>%
correlation(method = "kendall")
}
<- function (seifa_index) {
seifa_plot
<- eval(substitute(lm(dogs_exp_prop ~ seifa_index,
model data = SSC, na.action = na.omit)))
<- estimate_means(model)
means
<- enquo(seifa_index)
myenc
ggplot(SSC,
aes(x = !!myenc,
y = dogs_exp_prop,
fill = !!myenc)) +
geom_violin(alpha = 0.66) +
geom_jitter2(width = 0.05, alpha = 0.5) +
geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
geom_pointrange(data = means,
aes(y = Mean, ymin = CI_low, ymax = CI_high),
size = 1,
color = "white") +
scale_fill_brewer(palette = "BrBG") +
ylab("Proportion of expensive dogs") +
theme_modern()
}
seifa_means(IRSD_d)
# A tibble: 10 x 6
IRSD_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.110 0.0380 0.0894 0.113 0.132
2 2 0.0845 0.0856 0.0508 0.0759 0.0870
3 3 0.0760 0.0148 0.0672 0.0802 0.0863
4 4 0.0745 0.0170 0.0617 0.0723 0.0880
5 5 0.0781 0.0248 0.0701 0.0810 0.0880
6 6 0.0730 0.0118 0.0646 0.0728 0.0812
7 7 0.0746 0.0173 0.0618 0.0734 0.0845
8 8 0.0748 0.0121 0.0679 0.0749 0.0809
9 9 0.0652 0.0150 0.0584 0.0628 0.0684
10 10 0.0789 0.0258 0.0564 0.0699 0.0985
seifa_cor(IRSD_d)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IRSD_d | dogs_exp_prop | -0.20 | [-0.29, -0.10] | -3.76 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d)
seifa_means(IRSD_d_orig)
# A tibble: 10 x 6
IRSD_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.125 0.0107 0.120 0.125 0.131
2 2 0.100 0.0480 0.0810 0.107 0.135
3 3 0.126 0.0511 0.0977 0.113 0.159
4 4 0.0985 0.0125 0.0927 0.0954 0.105
5 5 0.0837 0.0609 0.0660 0.0759 0.0845
6 6 0.0817 0.0882 0.0304 0.0821 0.0901
7 7 0.0758 0.0157 0.0602 0.0780 0.0867
8 8 0.0743 0.0251 0.0643 0.0756 0.0835
9 9 0.0732 0.0131 0.0640 0.0736 0.0857
10 10 0.0742 0.0192 0.0591 0.0699 0.0868
seifa_cor(IRSD_d_orig)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
------------------------------------------------------------------------
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.09] | -3.39 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d_orig)
seifa_means(IRSAD_d)
# A tibble: 10 x 6
IRSAD_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.117 0.0582 0.0900 0.115 0.138
2 2 0.0843 0.0687 0.0753 0.0818 0.0901
3 3 0.0724 0.0225 0.0585 0.0776 0.0863
4 4 0.0766 0.0113 0.0647 0.08 0.0835
5 5 0.0720 0.0170 0.0604 0.0677 0.0788
6 6 0.0794 0.0195 0.0676 0.0769 0.0896
7 7 0.0743 0.0145 0.0634 0.0749 0.0807
8 8 0.0699 0.0132 0.0599 0.0709 0.0788
9 9 0.0726 0.0214 0.0596 0.0663 0.0851
10 10 0.0734 0.0231 0.0564 0.0644 0.0870
seifa_cor(IRSAD_d)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IRSAD_d | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d)
seifa_means(IRSAD_d_orig)
# A tibble: 10 x 6
IRSAD_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.125 0.0107 0.120 0.125 0.131
2 2 0.105 0.0737 0.0646 0.103 0.144
3 3 0.121 0.0280 0.106 0.125 0.141
4 4 0.137 0.0306 0.127 0.137 0.148
5 5 0.102 0.0779 0.0857 0.0907 0.0966
6 6 0.0855 0.0919 0.0326 0.0818 0.0951
7 7 0.0812 0.00990 0.0767 0.0824 0.0870
8 8 0.0701 0.0206 0.0607 0.0746 0.0832
9 9 0.0756 0.0152 0.0632 0.0735 0.0863
10 10 0.0736 0.0187 0.0591 0.0699 0.0822
seifa_cor(IRSAD_d_orig)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-------------------------------------------------------------------------
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.12] | -3.98 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d_orig)
seifa_means(IER_d)
# A tibble: 10 x 6
IER_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.0891 0.0343 0.0663 0.0924 0.116
2 2 0.0843 0.0343 0.0665 0.0765 0.0902
3 3 0.0764 0.0135 0.0656 0.0776 0.0814
4 4 0.0898 0.0477 0.0647 0.0818 0.0890
5 5 0.0806 0.0714 0.0589 0.0762 0.0894
6 6 0.0783 0.0144 0.0672 0.0794 0.0863
7 7 0.0710 0.0259 0.0632 0.0673 0.0862
8 8 0.0689 0.0235 0.0605 0.0691 0.0756
9 9 0.0706 0.0141 0.0568 0.0704 0.0808
10 10 0.0806 0.0247 0.0596 0.0744 0.0942
seifa_cor(IER_d)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
--------------------------------------------------------------------
IER_d | dogs_exp_prop | -0.10 | [-0.20, -0.01] | -2.01 | 0.045*
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d)
seifa_means(IER_d_orig)
# A tibble: 10 x 6
IER_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.0891 0.0334 0.0675 0.0904 0.115
2 2 0.0833 0.0333 0.0628 0.0750 0.0910
3 3 0.0869 0.0443 0.0753 0.0783 0.0833
4 4 0.0797 0.0242 0.0631 0.0792 0.0867
5 5 0.0790 0.0696 0.0600 0.072 0.0894
6 6 0.0799 0.0187 0.0650 0.0818 0.0876
7 7 0.0729 0.0157 0.0642 0.0716 0.0777
8 8 0.0634 0.0343 0.0611 0.0673 0.0865
9 9 0.0701 0.0142 0.0614 0.0682 0.0743
10 10 0.0762 0.0210 0.0591 0.0717 0.0889
seifa_cor(IER_d_orig)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
--------------------------------------------------------------------
IER_d_orig | dogs_exp_prop | -0.11 | [-0.21, -0.02] | -2.17 | 0.030*
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d_orig)
seifa_means(IEO_d)
# A tibble: 10 x 6
IEO_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.116 0.0583 0.0885 0.114 0.138
2 2 0.0819 0.0226 0.0761 0.0863 0.0914
3 3 0.0784 0.0690 0.0589 0.0793 0.0832
4 4 0.0728 0.0124 0.0636 0.0702 0.0790
5 5 0.0801 0.0163 0.0645 0.0814 0.0927
6 6 0.0774 0.0191 0.0610 0.0788 0.0866
7 7 0.0699 0.0133 0.0587 0.0704 0.0804
8 8 0.0746 0.0222 0.0669 0.0704 0.0792
9 9 0.0729 0.0179 0.0633 0.0684 0.0799
10 10 0.0676 0.0192 0.0554 0.0590 0.0699
seifa_cor(IEO_d)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IEO_d | dogs_exp_prop | -0.27 | [-0.35, -0.18] | -5.13 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d)
seifa_means(IEO_d_orig)
# A tibble: 10 x 6
IEO_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.143 0.0356 0.122 0.131 0.151
2 2 0.0757 0.0535 0.0499 0.0793 0.103
3 3 0.0795 0.112 0.0398 0.0795 0.119
4 4 0.126 0.0553 0.0894 0.105 0.138
5 5 0.0952 0.0116 0.0894 0.0907 0.0922
6 6 0.0897 0.00705 0.0852 0.0897 0.0942
7 7 0.0767 0.0751 0.0652 0.0769 0.0836
8 8 0.0765 0.0130 0.0647 0.0776 0.0843
9 9 0.0751 0.0156 0.0614 0.0737 0.0862
10 10 0.0727 0.0186 0.0585 0.0691 0.0810
seifa_cor(IEO_d_orig)
# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.36, -0.18] | -4.89 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d_orig)
<-
data # wide_insurance_n %>%
%>%
wide_insurance_p st_drop_geometry() %>%
select(alaskan_malamute:last_col()) %>%
as_tibble()
# View(cov(data))
<- principal_components(data,
pca standardize = FALSE,
n = 1)
pca
# Loadings from Principal Component Analysis (no rotation)
Variable | PC1 | Complexity
---------------------------------------------
alaskan_malamute | -0.03 | 1.00
boxer | -0.15 | 1.00
british_bulldog | -0.07 | 1.00
bull_terrier | -0.05 | 1.00
bullmastiff | -0.07 | 1.00
dogue_de_bordeaux | -0.02 | 1.00
french_bulldog | -0.10 | 1.00
great_dane | -0.07 | 1.00
irish_wolfhound | -0.04 | 1.00
mastiff | -0.05 | 1.00
rottweiler | -0.15 | 1.00
shar_pei | -0.07 | 1.00
yorkshire_terrier | -0.02 | 1.00
airedale_terrier | -0.01 | 1.00
irish_setter | -7.16e-03 | 1.00
pekingese | -5.47e-03 | 1.00
basset_hound | -0.01 | 1.00
bedlington_terrier | -4.79e-03 | 1.00
bernese_mountain_dog | -5.38e-03 | 1.00
italian_corso_dog | -1.54e-03 | 1.00
neapolitan_mastiff | -0.01 | 1.00
newfoundland | -2.76e-03 | 1.00
weimaraner | -0.03 | 1.00
poodle_standard | -4.55e-03 | 1.00
st_bernard | -6.28e-03 | 1.00
miniature_pinscher | -8.28e-03 | 1.00
welsh_corgi_pembroke | -1.09e-03 | 1.00
welsh_corgi_cardigan | -2.61e-04 | 1.00
The unique principal component accounted for 57.29% of the total variance of the original data.
summary(pca)
# (Explained) Variance of Components
Parameter | PC1
---------------------------------------
Eigenvalues | 0.083
Variance Explained | 0.573
Variance Explained (Cumulative) | 0.573
Variance Explained (Proportion) | 0.573
plot(pca)
<- wide_insurance_p %>%
pca_results st_drop_geometry() %>%
as_tibble() %>%
select(SSC_CODE16:caution) %>%
mutate(pca_raw = predict(pca)$Component_1,
pca = ntile(pca_raw, 10))
IRSD_d
ggplot(pca_results, aes(x = IRSD, y = pca_raw)) +
geom_point()
pca == IRSD_d <lgl>
# total N=178 valid N=178 mean=0.09 sd=0.29
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 162 | 91.01 | 91.01 | 91.01
TRUE | 16 | 8.99 | 8.99 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IRSAD_d
ggplot(pca_results, aes(x = IRSAD, y = pca_raw)) +
geom_point()
pca == IRSAD_d <lgl>
# total N=178 valid N=178 mean=0.10 sd=0.30
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 160 | 89.89 | 89.89 | 89.89
TRUE | 18 | 10.11 | 10.11 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IER_d
ggplot(pca_results, aes(x = IER, y = pca_raw)) +
geom_point()
pca == IER_d <lgl>
# total N=178 valid N=178 mean=0.08 sd=0.28
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 91.57 | 91.57 | 91.57
TRUE | 15 | 8.43 | 8.43 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IEO_d
ggplot(pca_results, aes(x = IEO, y = pca_raw)) +
geom_point()
pca == IEO_d <lgl>
# total N=178 valid N=178 mean=0.08 sd=0.28
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 91.57 | 91.57 | 91.57
TRUE | 15 | 8.43 | 8.43 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
<-
data %>%
wide_insurance_p st_drop_geometry() %>%
select(alaskan_malamute:last_col()) %>%
as_tibble()
<- n_clusters(data, package = c("easystats", "NbClust", "mclust"))
n n
# Method Agreement Procedure:
The choice of 2 clusters is supported by 9 (42.86%) methods out of 21 (Elbow, Silhouette, Duda, Pseudot2, Beale, Frey, Mcclain, Dunn, SDindex).
plot(n)
<- cluster_analysis(data, n = 2, method = "kmeans")
rez_kmeans
rez_kmeans
# Clustering Solution
The 2 clusters accounted for 3.46% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 7 | 520.29 | -0.76 | 1.87 | 0.29 | -0.39 | -0.38 | 0.15 | -0.58 | -1.15 | -0.33 | -0.80 | -1.36 | 0.33 | -0.26 | -0.53 | -0.38 | -0.46 | -0.52 | 1.27 | 0.33 | -0.23 | -0.62 | -0.22 | 1.93 | -0.26 | 2.15 | 1.76 | -0.18 | -0.10
2 | 171 | 4264.01 | 0.03 | -0.08 | -0.01 | 0.02 | 0.02 | -5.97e-03 | 0.02 | 0.05 | 0.01 | 0.03 | 0.06 | -0.01 | 0.01 | 0.02 | 0.02 | 0.02 | 0.02 | -0.05 | -0.01 | 9.36e-03 | 0.03 | 8.92e-03 | -0.08 | 0.01 | -0.09 | -0.07 | 7.57e-03 | 4.22e-03
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 171.698 | 4784.302 | 0.035
# You can access the predicted clusters via 'predict()'.
plot(rez_kmeans)
plot(summary(rez_kmeans))
<- wide_insurance_p %>%
cluster_results st_drop_geometry() %>%
as_tibble() %>%
mutate(cluster = predict(rez_kmeans))
frq(cluster_results, cluster)
cluster <integer>
# total N=178 valid N=178 mean=1.96 sd=0.19
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
1 | 7 | 3.93 | 3.93 | 3.93
2 | 171 | 96.07 | 96.07 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
aggregate(data = cluster_results, french_bulldog ~ cluster, mean)
cluster french_bulldog
1 1 0.04195011
2 2 0.10710798
aggregate(data = cluster_results, rottweiler ~ cluster, mean)
cluster rottweiler
1 1 0.0260771
2 2 0.1489861
<- cluster_analysis(data, n = 2, method = "hclust")
rez_hclust
rez_hclust
# Clustering Solution
The 2 clusters accounted for 3.86% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 175 | 4516.16 | 0.01 | 4.84e-03 | -0.01 | -2.27e-03 | 4.31e-03 | -0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.03 | -0.03 | 8.17e-03 | 9.03e-03 | 6.49e-03 | 7.90e-03 | 9.00e-03 | -0.05 | 6.40e-03 | 3.92e-03 | 0.01 | 3.73e-03 | -0.08 | 4.38e-03 | -0.04 | -0.05 | 3.17e-03 | 1.77e-03
2 | 3 | 248.54 | -0.76 | -0.28 | 0.75 | 0.13 | -0.25 | 1.33 | -0.97 | -1.33 | -0.97 | -1.14 | -1.66 | 1.48 | -0.48 | -0.53 | -0.38 | -0.46 | -0.52 | 2.70 | -0.37 | -0.23 | -0.62 | -0.22 | 4.93 | -0.26 | 2.44 | 3.21 | -0.18 | -0.10
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 191.303 | 4764.697 | 0.039
# You can access the predicted clusters via 'predict()'.
plot(rez_hclust)
<- cluster_analysis(data, n = 2, method = "hkmeans")
rez_hkmeans
rez_hkmeans
# Clustering Solution
The 2 clusters accounted for 3.86% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 175 | 4516.16 | 0.01 | 4.84e-03 | -0.01 | -2.27e-03 | 4.31e-03 | -0.02 | 0.02 | 0.02 | 0.02 | 0.02 | 0.03 | -0.03 | 8.17e-03 | 9.03e-03 | 6.49e-03 | 7.90e-03 | 9.00e-03 | -0.05 | 6.40e-03 | 3.92e-03 | 0.01 | 3.73e-03 | -0.08 | 4.38e-03 | -0.04 | -0.05 | 3.17e-03 | 1.77e-03
2 | 3 | 248.54 | -0.76 | -0.28 | 0.75 | 0.13 | -0.25 | 1.33 | -0.97 | -1.33 | -0.97 | -1.14 | -1.66 | 1.48 | -0.48 | -0.53 | -0.38 | -0.46 | -0.52 | 2.70 | -0.37 | -0.23 | -0.62 | -0.22 | 4.93 | -0.26 | 2.44 | 3.21 | -0.18 | -0.10
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 191.303 | 4764.697 | 0.039
# You can access the predicted clusters via 'predict()'.
plot(rez_hkmeans)
<- cluster_analysis(data, n = 2, method = "pam")
rez_pam
rez_pam
# Clustering Solution
The 2 clusters accounted for 3.43% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 74 | 1919.91 | 0.31 | -0.20 | -0.37 | -0.14 | 0.30 | -0.03 | -0.39 | -4.70e-03 | -0.08 | 0.34 | 0.22 | 0.50 | 0.09 | -0.35 | -0.12 | -0.12 | 0.31 | -0.06 | 0.26 | 0.02 | -0.02 | -0.04 | -0.12 | -0.05 | 4.52e-03 | -0.16 | -0.14 | 0.05
2 | 104 | 2866.20 | -0.22 | 0.14 | 0.26 | 0.10 | -0.21 | 0.02 | 0.28 | 3.34e-03 | 0.06 | -0.24 | -0.16 | -0.35 | -0.06 | 0.25 | 0.09 | 0.09 | -0.22 | 0.05 | -0.18 | -0.02 | 0.01 | 0.03 | 0.09 | 0.03 | -3.21e-03 | 0.11 | 0.10 | -0.04
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 169.887 | 4786.113 | 0.034
# You can access the predicted clusters via 'predict()'.
plot(rez_pam)
<- n_clusters_dbscan(data, min_size = 0.01)
eps
eps
The DBSCAN method, based on the total clusters sum of squares, suggests that the optimal eps = 8.76170574003452 (with min. cluster size set to 2), which corresponds to 1 clusters.
plot(eps)
<- cluster_analysis(data, method = "dbscan", dbscan_eps = 4.5)
rez_dbscan
rez_dbscan
# Clustering Solution
The 2 clusters accounted for 1.05% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 | 92 | 4060.21 | -0.02 | -2.53e-04 | -0.11 | -0.09 | -0.06 | 0.03 | 0.14 | -0.12 | -0.02 | 0.01 | -0.17 | 0.01 | 0.08 | 0.12 | 0.13 | 0.18 | 6.51e-04 | 0.11 | 0.05 | 0.09 | -0.16 | 0.07 | -6.32e-03 | 0.10 | 0.09 | 0.09 | 0.15 | 0.10
1 | 86 | 843.87 | 0.03 | 2.70e-04 | 0.12 | 0.10 | 0.06 | -0.03 | -0.15 | 0.13 | 0.02 | -0.01 | 0.18 | -0.01 | -0.09 | -0.13 | -0.14 | -0.20 | -6.96e-04 | -0.12 | -0.05 | -0.10 | 0.18 | -0.08 | 6.76e-03 | -0.11 | -0.10 | -0.09 | -0.16 | -0.10
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 51.917 | 843.870 | 0.010
# You can access the predicted clusters via 'predict()'.
plot(rez_dbscan)
<- cluster_analysis(data, method = "hdbscan")
rez_hdbscan
rez_hdbscan
# Clustering Solution
The unique cluster accounted for 0.00% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 | 178 | 4956 | 2.54e-17 | 6.80e-18 | 4.50e-17 | 6.84e-17 | 5.18e-18 | -3.69e-17 | 2.36e-17 | 5.81e-17 | 1.67e-17 | -5.60e-17 | -2.27e-17 | -3.01e-17 | 8.27e-18 | -4.44e-17 | 3.02e-19 | 3.89e-17 | 3.95e-17 | -1.12e-17 | -4.61e-18 | 7.53e-18 | 7.46e-17 | 1.17e-17 | -2.57e-17 | 6.33e-18 | 7.07e-19 | -1.51e-17 | 1.27e-17 | -6.26e-18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 0.000 | 0.000 | 0.000
# You can access the predicted clusters via 'predict()'.
# plot(rez_hdbscan)
<- cluster_analysis(data, method = "pamk")
rez_pamk
rez_pamk
# Clustering Solution
The 10 clusters accounted for 26.62% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 39 | 819.30 | 0.53 | -0.10 | -0.41 | -0.04 | 0.43 | 0.13 | -0.42 | -0.24 | -0.45 | 0.32 | 0.31 | 0.51 | -0.03 | -0.28 | -0.25 | -0.39 | 0.70 | -0.05 | 0.13 | -3.60e-03 | -0.21 | 0.02 | -0.18 | 9.18e-03 | -0.27 | -0.14 | -0.10 | -0.10
10 | 1 | 0.00 | -0.76 | 0.85 | -1.31 | -1.20 | -0.79 | -0.75 | -0.97 | 1.44 | 2.82 | 0.49 | -0.77 | -1.13 | -0.48 | -0.53 | -0.38 | -0.46 | -0.52 | -0.29 | -0.37 | -0.23 | 3.58 | 10.72 | -0.70 | -0.26 | -0.34 | 2.52 | -0.18 | -0.10
2 | 33 | 1007.90 | 0.14 | -0.23 | -0.41 | 0.08 | -0.14 | -0.33 | -0.29 | 0.54 | 0.59 | 0.27 | -0.18 | 0.22 | -0.03 | -0.26 | 0.47 | 0.71 | -0.24 | 0.02 | 0.57 | -0.23 | -0.36 | -0.08 | 0.14 | -0.14 | 0.70 | 0.01 | 0.13 | -0.10
3 | 4 | 47.41 | 0.17 | -0.58 | -0.46 | -0.60 | 0.02 | 1.26 | 0.34 | 0.16 | -0.20 | -0.14 | -0.11 | -0.26 | 0.27 | -0.34 | -0.38 | 1.67 | 0.69 | -0.03 | -0.04 | 5.45 | 1.10 | 0.12 | -0.49 | 0.02 | -0.34 | 0.26 | -0.18 | -0.10
4 | 55 | 943.44 | -0.30 | 0.44 | 0.28 | 0.06 | -0.21 | 0.07 | 0.07 | -0.09 | 0.15 | -0.41 | 0.04 | -0.36 | -0.14 | 0.56 | 0.02 | -0.24 | -0.12 | -0.01 | -0.17 | -0.10 | -0.34 | -0.04 | 0.14 | -0.13 | -0.01 | -0.03 | 0.02 | -0.10
5 | 12 | 301.83 | -0.76 | -0.73 | 0.99 | 0.04 | -0.18 | -0.47 | 2.44 | -1.17 | -0.80 | 0.27 | -1.19 | -0.22 | 1.53 | -9.05e-03 | -0.19 | -0.07 | -0.43 | -0.29 | -0.37 | -0.23 | -0.62 | -0.22 | -0.18 | 0.26 | -0.24 | -0.11 | 0.23 | -0.10
6 | 30 | 495.73 | 0.09 | -0.03 | 0.07 | -0.11 | 0.07 | -0.01 | -0.15 | 0.39 | -0.08 | 0.04 | 0.28 | -0.08 | -0.26 | -0.29 | -0.04 | 0.06 | -0.26 | -0.06 | -0.26 | -0.15 | 1.27 | -0.12 | -0.32 | 0.29 | -0.19 | -0.20 | -0.09 | -0.10
7 | 2 | 20.89 | -0.52 | -0.19 | -0.28 | 0.38 | 0.05 | -0.37 | -0.49 | -0.20 | 1.14 | -0.47 | 0.91 | -0.14 | -0.17 | 0.15 | -0.38 | -0.46 | -0.52 | -0.29 | -0.37 | -0.23 | 0.63 | -0.22 | 0.37 | 0.20 | -0.34 | 0.28 | -0.18 | 9.08
8 | 1 | 0.00 | -0.76 | -1.34 | 1.54 | -1.20 | 0.82 | 5.49 | -0.97 | -1.33 | -0.97 | -1.14 | -1.66 | 1.22 | -0.48 | -0.53 | -0.38 | -0.46 | -0.52 | -0.29 | -0.37 | -0.23 | -0.62 | -0.22 | 2.28 | -0.26 | -0.34 | 10.35 | -0.18 | -0.10
9 | 1 | 0.00 | -0.76 | 0.24 | 2.02 | 2.81 | -0.79 | -0.75 | -0.97 | -1.33 | -0.97 | -1.14 | -1.66 | -1.13 | -0.48 | -0.53 | -0.38 | -0.46 | -0.52 | 8.68 | -0.37 | -0.23 | -0.62 | -0.22 | 6.26 | -0.26 | -0.34 | -0.36 | -0.18 | -0.10
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 1319.503 | 3636.497 | 0.266
# You can access the predicted clusters via 'predict()'.
plot(rez_pamk)
p_load(mclust)
<- cluster_analysis(data, method = "mixture")
rez_mixture
rez_mixture
# Clustering Solution
The unique cluster accounted for 0.00% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | alaskan_malamute | boxer | british_bulldog | bull_terrier | bullmastiff | dogue_de_bordeaux | french_bulldog | great_dane | irish_wolfhound | mastiff | rottweiler | shar_pei | yorkshire_terrier | airedale_terrier | irish_setter | pekingese | basset_hound | bedlington_terrier | bernese_mountain_dog | italian_corso_dog | neapolitan_mastiff | newfoundland | weimaraner | poodle_standard | st_bernard | miniature_pinscher | welsh_corgi_pembroke | welsh_corgi_cardigan
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 178 | 4956 | 2.54e-17 | 6.80e-18 | 4.50e-17 | 6.84e-17 | 5.18e-18 | -3.69e-17 | 2.36e-17 | 5.81e-17 | 1.67e-17 | -5.60e-17 | -2.27e-17 | -3.01e-17 | 8.27e-18 | -4.44e-17 | 3.02e-19 | 3.89e-17 | 3.95e-17 | -1.12e-17 | -4.61e-18 | 7.53e-18 | 7.46e-17 | 1.17e-17 | -2.57e-17 | 6.33e-18 | 7.07e-19 | -1.51e-17 | 1.27e-17 | -6.26e-18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
4956.000 | 0.000 | 4956.000 | 0.000
# You can access the predicted clusters via 'predict()'.
plot(rez_mixture)
<- list(rez_kmeans, rez_hclust, rez_hkmeans, rez_pam,
list_of_results # rez_hclust2,
rez_dbscan, rez_hdbscan, rez_mixture)
<- cluster_meta(list_of_results)
probability_matrix
heatmap(probability_matrix, scale = "none",
col = grDevices::hcl.colors(256, palette = "inferno"))
<-
data %>%
wide_insurance_p st_drop_geometry() %>%
select(alaskan_malamute:last_col()) %>%
as_tibble()
p_load(mclust)
<- mclustBIC(data, G = seq(from = 2, to = 9))
BIC
plot(BIC)
# BIC
summary(BIC)
Best BIC values:
VEI,5 VEI,4 VEI,6
BIC 23064.34 23038.86932 23024.00379
BIC diff 0.00 -25.46905 -40.33457
<- Mclust(data, x = BIC)
mod1 mod1
'Mclust' model object: (VEI,5)
Available components:
[1] "call" "data" "modelName" "n"
[5] "d" "G" "BIC" "loglik"
[9] "df" "bic" "icl" "hypvol"
[13] "parameters" "z" "classification" "uncertainty"
summary(mod1, parameters = TRUE)
----------------------------------------------------
Gaussian finite mixture model fitted by EM algorithm
----------------------------------------------------
Mclust VEI (diagonal, equal shape) model with 5 components:
log-likelihood n df BIC ICL
11988.17 178 176 23064.34 23059.05
Clustering table:
1 2 3 4 5
89 13 7 53 16
Mixing probabilities:
1 2 3 4 5
0.49291878 0.07303509 0.03904661 0.30493951 0.09006000
Means:
[,1] [,2]
alaskan_malamute 0.0280260663 0.01523890420481271
boxer 0.1380130126 0.09251023230370482
british_bulldog 0.0703786970 0.06307841676974323
bull_terrier 0.0548620098 0.04967029096208535
bullmastiff 0.0750592832 0.11720898243497578
dogue_de_bordeaux 0.0160290654 0.01716535731557738
french_bulldog 0.0852446464 0.12670960528058386
great_dane 0.0849772815 0.04689081037828381
irish_wolfhound 0.0427095269 0.03036521620785435
mastiff 0.0541565500 0.01781719431315327
rottweiler 0.1672558659 0.06472812166570897
shar_pei 0.0736443850 0.06730510039360194
yorkshire_terrier 0.0149442332 0.05476075199356949
airedale_terrier 0.0087792248 0.01840293688166407
irish_setter 0.0041701205 0.00485050287557857
[,3]
alaskan_malamute 0.02307868954891480542390702623833931284025311470031738281250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
boxer 0.09317627409444825814510693362535675987601280212402343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
british_bulldog 0.06512704622438660051475523005137802101671695709228515625000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bull_terrier 0.03518900869576900569679622776675387285649776458740234375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bullmastiff 0.06475947220930737546762401279920595698058605194091796875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dogue_de_bordeaux 0.03949146590040625987017008924340188968926668167114257812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
french_bulldog 0.14895948398939018453290827892487868666648864746093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
great_dane 0.07620511741503216418536226228752639144659042358398437500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_wolfhound 0.04659996204048265366903791573349735699594020843505859375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
mastiff 0.05387535876531312356574332511627289932221174240112304687500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
rottweiler 0.13483370784932108277232032378378789871931076049804687500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
shar_pei 0.05613689976338936266087031867755285929888486862182617187500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
yorkshire_terrier 0.02299205053934313958130530863854801282286643981933593750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
airedale_terrier 0.01392807654745298502618755520643389900214970111846923828125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_setter 0.00000000000000000000183731842700712235862819943221779794839676469564437866210937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
[,4]
alaskan_malamute 0.034972438226513739845113803994536283425986766815185546875000000000000000000000000000000000000000000000000000000
boxer 0.162774405083149720230295542933163233101367950439453125000000000000000000000000000000000000000000000000000000000
british_bulldog 0.064281559077350669539896443893667310476303100585937500000000000000000000000000000000000000000000000000000000000
bull_terrier 0.043268086948827569337971965524047845974564552307128906250000000000000000000000000000000000000000000000000000000
bullmastiff 0.057933001416211514500442802955149090848863124847412109375000000000000000000000000000000000000000000000000000000
dogue_de_bordeaux 0.015720621420363591752566989612205361481755971908569335937500000000000000000000000000000000000000000000000000000
french_bulldog 0.132188651912820254175073841906851157546043395996093750000000000000000000000000000000000000000000000000000000000
great_dane 0.060850530011690669163204603364647482521831989288330078125000000000000000000000000000000000000000000000000000000
irish_wolfhound 0.031272186141217704624661877232938422821462154388427734375000000000000000000000000000000000000000000000000000000
mastiff 0.062307360547957629792747979990963358432054519653320312500000000000000000000000000000000000000000000000000000000
rottweiler 0.120872611026586973537177982507273554801940917968750000000000000000000000000000000000000000000000000000000000000
shar_pei 0.060537649694227757546372004071599803864955902099609375000000000000000000000000000000000000000000000000000000000
yorkshire_terrier 0.021856493110238572885606345153064467012882232666015625000000000000000000000000000000000000000000000000000000000
airedale_terrier 0.020748329175947131247648158591800893191248178482055664062500000000000000000000000000000000000000000000000000000
irish_setter 0.014770551368987038659552624153548094909638166427612304687500000000000000000000000000000000000000000000000000000
[,5]
alaskan_malamute 0.018890382428315834889565039134140533860772848129272460937500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
boxer 0.142958598962115990271826149182743392884731292724609375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
british_bulldog 0.045954123313447801935094361169831245206296443939208984375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bull_terrier 0.053382937498291524458515766582422656938433647155761718750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
bullmastiff 0.046800440036258564302507068077829899266362190246582031250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
dogue_de_bordeaux 0.017505275546984531281902164323582837823778390884399414062500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
french_bulldog 0.079355418404189678738269719815434655174612998962402343750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
great_dane 0.078319430557538871617850873008137568831443786621093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_wolfhound 0.054518537634597484531351341274785227142274379730224609375000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
mastiff 0.051766762430036472597549845886533148586750030517578125000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
rottweiler 0.164978520707840248249453907192219048738479614257812500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
shar_pei 0.075679002581283363881858861077489564195275306701660156250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
yorkshire_terrier 0.037399094399947867273237278595843235962092876434326171875000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
airedale_terrier 0.009802017106132451459976628882486693328246474266052246093750000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
irish_setter 0.005766900955896783753651391180028440430760383605957031250000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
[ reached getOption("max.print") -- omitted 13 rows ]
Variances:
[,,1]
alaskan_malamute boxer british_bulldog bull_terrier
alaskan_malamute 0.0006448616 0.000000000 0.00000000 0.000000000
boxer 0.0000000000 0.004448486 0.00000000 0.000000000
bullmastiff dogue_de_bordeaux french_bulldog great_dane
alaskan_malamute 0.000000000 0.000000000 0.000000000 0.000000000
boxer 0.000000000 0.000000000 0.000000000 0.000000000
irish_wolfhound mastiff rottweiler shar_pei
alaskan_malamute 0.0000000000 0.000000000 0.000000000 0.000000000
boxer 0.0000000000 0.000000000 0.000000000 0.000000000
yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute 0.0000000000 0.0000000000 0.0000000000
boxer 0.0000000000 0.0000000000 0.0000000000
pekingese basset_hound bedlington_terrier
alaskan_malamute 0.00000000000 0.0000000000 0.00000000000
boxer 0.00000000000 0.0000000000 0.00000000000
bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute 0.00000000000 0.000000000000 0.0000000000
boxer 0.00000000000 0.000000000000 0.0000000000
newfoundland weimaraner poodle_standard st_bernard
alaskan_malamute 0.00000000000 0.0000000000 0.00000000000 0.0000000000
boxer 0.00000000000 0.0000000000 0.00000000000 0.0000000000
miniature_pinscher welsh_corgi_pembroke
alaskan_malamute 0.000000000 0.000000000000
boxer 0.000000000 0.000000000000
welsh_corgi_cardigan
alaskan_malamute 0.00000000000
boxer 0.00000000000
[ reached getOption("max.print") -- omitted 26 rows ]
[,,2]
alaskan_malamute boxer british_bulldog bull_terrier
alaskan_malamute 0.02196267 0.0000000 0.00000000 0.00000000
boxer 0.00000000 0.1515064 0.00000000 0.00000000
bullmastiff dogue_de_bordeaux french_bulldog great_dane
alaskan_malamute 0.00000000 0.00000000 0.0000000 0.0000000
boxer 0.00000000 0.00000000 0.0000000 0.0000000
irish_wolfhound mastiff rottweiler shar_pei
alaskan_malamute 0.00000000 0.00000000 0.0000000 0.0000000
boxer 0.00000000 0.00000000 0.0000000 0.0000000
yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute 0.00000000 0.000000000 0.000000000
boxer 0.00000000 0.000000000 0.000000000
pekingese basset_hound bedlington_terrier
alaskan_malamute 0.000000000 0.000000000 0.000000000
boxer 0.000000000 0.000000000 0.000000000
bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute 0.0000000000 0.0000000000 0.000000000
boxer 0.0000000000 0.0000000000 0.000000000
newfoundland weimaraner poodle_standard st_bernard
alaskan_malamute 0.0000000000 0.00000000 0.000000000 0.000000000
boxer 0.0000000000 0.00000000 0.000000000 0.000000000
miniature_pinscher welsh_corgi_pembroke
alaskan_malamute 0.000000000 0.0000000000
boxer 0.000000000 0.0000000000
welsh_corgi_cardigan
alaskan_malamute 0.000000000000
boxer 0.000000000000
[ reached getOption("max.print") -- omitted 26 rows ]
[,,3]
alaskan_malamute boxer british_bulldog bull_terrier
alaskan_malamute 0.001298257 0.000000000 0.000000000 0.000000000
boxer 0.000000000 0.008955844 0.000000000 0.000000000
bullmastiff dogue_de_bordeaux french_bulldog great_dane
alaskan_malamute 0.000000000 0.0000000000 0.000000000 0.000000000
boxer 0.000000000 0.0000000000 0.000000000 0.000000000
irish_wolfhound mastiff rottweiler shar_pei
alaskan_malamute 0.000000000 0.000000000 0.000000000 0.000000000
boxer 0.000000000 0.000000000 0.000000000 0.000000000
yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute 0.0000000000 0.0000000000 0.0000000000
boxer 0.0000000000 0.0000000000 0.0000000000
pekingese basset_hound bedlington_terrier
alaskan_malamute 0.0000000000 0.0000000000 0.0000000000
boxer 0.0000000000 0.0000000000 0.0000000000
bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute 0.00000000000 0.00000000000 0.0000000000
boxer 0.00000000000 0.00000000000 0.0000000000
newfoundland weimaraner poodle_standard st_bernard
alaskan_malamute 0.00000000000 0.000000000 0.0000000000 0.0000000000
boxer 0.00000000000 0.000000000 0.0000000000 0.0000000000
miniature_pinscher welsh_corgi_pembroke
alaskan_malamute 0.0000000000 0.000000000000
boxer 0.0000000000 0.000000000000
welsh_corgi_cardigan
alaskan_malamute 0.0000000000000
boxer 0.0000000000000
[ reached getOption("max.print") -- omitted 26 rows ]
[,,4]
alaskan_malamute boxer british_bulldog bull_terrier
alaskan_malamute 0.002151014 0.00000000 0.000000000 0.000000000
boxer 0.000000000 0.01483846 0.000000000 0.000000000
bullmastiff dogue_de_bordeaux french_bulldog great_dane
alaskan_malamute 0.00000000 0.0000000000 0.00000000 0.000000000
boxer 0.00000000 0.0000000000 0.00000000 0.000000000
irish_wolfhound mastiff rottweiler shar_pei
alaskan_malamute 0.00000000 0.000000000 0.00000000 0.000000000
boxer 0.00000000 0.000000000 0.00000000 0.000000000
yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute 0.000000000 0.0000000000 0.0000000000
boxer 0.000000000 0.0000000000 0.0000000000
pekingese basset_hound bedlington_terrier
alaskan_malamute 0.0000000000 0.0000000000 0.0000000000
boxer 0.0000000000 0.0000000000 0.0000000000
bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute 0.00000000000 0.00000000000 0.0000000000
boxer 0.00000000000 0.00000000000 0.0000000000
newfoundland weimaraner poodle_standard st_bernard
alaskan_malamute 0.00000000000 0.000000000 0.0000000000 0.000000000
boxer 0.00000000000 0.000000000 0.0000000000 0.000000000
miniature_pinscher welsh_corgi_pembroke
alaskan_malamute 0.0000000000 0.00000000000
boxer 0.0000000000 0.00000000000
welsh_corgi_cardigan
alaskan_malamute 0.0000000000000
boxer 0.0000000000000
[ reached getOption("max.print") -- omitted 26 rows ]
[,,5]
alaskan_malamute boxer british_bulldog bull_terrier
alaskan_malamute 0.0009597299 0.00000000 0.000000000 0.000000000
boxer 0.0000000000 0.00662056 0.000000000 0.000000000
bullmastiff dogue_de_bordeaux french_bulldog great_dane
alaskan_malamute 0.000000000 0.0000000000 0.000000000 0.000000000
boxer 0.000000000 0.0000000000 0.000000000 0.000000000
irish_wolfhound mastiff rottweiler shar_pei
alaskan_malamute 0.000000000 0.000000000 0.000000000 0.000000000
boxer 0.000000000 0.000000000 0.000000000 0.000000000
yorkshire_terrier airedale_terrier irish_setter
alaskan_malamute 0.0000000000 0.0000000000 0.0000000000
boxer 0.0000000000 0.0000000000 0.0000000000
pekingese basset_hound bedlington_terrier
alaskan_malamute 0.0000000000 0.0000000000 0.0000000000
boxer 0.0000000000 0.0000000000 0.0000000000
bernese_mountain_dog italian_corso_dog neapolitan_mastiff
alaskan_malamute 0.00000000000 0.00000000000 0.0000000000
boxer 0.00000000000 0.00000000000 0.0000000000
newfoundland weimaraner poodle_standard st_bernard
alaskan_malamute 0.00000000000 0.000000000 0.00000000000 0.0000000000
boxer 0.00000000000 0.000000000 0.00000000000 0.0000000000
miniature_pinscher welsh_corgi_pembroke
alaskan_malamute 0.0000000000 0.000000000000
boxer 0.0000000000 0.000000000000
welsh_corgi_cardigan
alaskan_malamute 0.0000000000000
boxer 0.0000000000000
[ reached getOption("max.print") -- omitted 26 rows ]
frq(mod1$classification)
x <numeric>
# total N=178 valid N=178 mean=2.40 sd=1.55
Value | N | Raw % | Valid % | Cum. %
-------------------------------------
1 | 89 | 50.00 | 50.00 | 50.00
2 | 13 | 7.30 | 7.30 | 57.30
3 | 7 | 3.93 | 3.93 | 61.24
4 | 53 | 29.78 | 29.78 | 91.01
5 | 16 | 8.99 | 8.99 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
plot(mod1, what = "classification")
# ICL <- mclustICL(data, G = seq(from = 2, to = 9))
# summary(ICL)
# plot(ICL)
# LRT <- mclustBootstrapLRT(data, modelName = "VEI")
# summary(LRT)
# plot(LRT)
<- wide_insurance_p %>%
cluster_results st_drop_geometry() %>%
as_tibble() %>%
mutate(cluster = factor(mod1$classification))
aggregate(data = cluster_results, french_bulldog ~ cluster, mean)
cluster french_bulldog
1 1 0.08537539
2 2 0.12670835
3 3 0.14993248
4 4 0.13293652
5 5 0.07927085
aggregate(data = cluster_results, rottweiler ~ cluster, mean)
cluster rottweiler
1 1 0.16670138
2 2 0.06472732
3 3 0.13460077
4 4 0.12072372
5 5 0.16504495
R version 4.1.2 (2021-11-01) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] mclust_5.4.9 parameters_0.16.0 modelbased_0.9.0 see_0.6.8 [5] correlation_0.7.1 tmap_3.3-2 sf_1.0-5 DT_0.20 [9] sjPlot_2.8.10 sjmisc_2.8.9 scales_1.1.1 magrittr_2.0.2 [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 [17] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5 [21] tidyverse_1.3.1 pacman_0.5.1To cite R in publications use:
R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.
To cite the ggplot2 package in publications use:Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.