# orig data missing???
<- read_csv("data-raw/geo/suburb-and-adjoining-suburb-november-2019.zip") %>%
SUB clean_names() %>%
remove_empty(c("rows", "cols")) %>%
select(suburb_name) %>%
rename(SSC_NAME16 = suburb_name) %>%
distinct() %>%
mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>%
arrange(SSC_NAME16)
write_rds(SUB, "data/geo/clean/SUB.Rds")
This might very well include areas with no pops (and therefore no SEIFA), for instance:
# A tibble: 2 × 1
SSC_NAME16
<chr>
1 Brisbane Airport
2 Port Of Brisbane
Names from Brisbane containing (Brisbane - Qld)
, names
from Qld containing (Qld)
have to be cleaned to match BCC
data.
Mcdowall
is called McDowall
and
Mount Coot-tha
is Mount Coot-Tha
- these have
been unified as well.
unzip("data-raw/geo/1270055003_ssc_2016_aust_shape.zip",
exdir = "data-raw/geo")
<- st_read("data-raw/geo/1270055003_ssc_2016_aust_shape/SSC_2016_AUST.shp",
SSC stringsAsFactors = FALSE) %>%
mutate(SSC_CODE16 = as.integer(SSC_CODE16)) %>%
select(-STE_NAME16, -STE_CODE16, -AREASQKM16) %>%
st_transform(3112) %>%
filter(!st_is_empty(geometry)) %>%
mutate(SSC_NAME16_orig = SSC_NAME16) %>%
mutate(SSC_NAME16 = str_remove(SSC_NAME16,
fixed(" (Brisbane - Qld)"))) %>%
mutate(SSC_NAME16 = str_remove(SSC_NAME16,
fixed(" (Qld)"))) %>%
mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "McDowall",
"Mcdowall", SSC_NAME16)) %>%
mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Mount Coot-tha",
"Mount Coot-Tha", SSC_NAME16))
# SSC <- rmapshaper::ms_simplify(SSC, keep = 0.05, weighting = 0.7) # default settings
write_rds(SSC, "data/geo/SSC_2016_AUST.Rds")
unlink("data-raw/geo/1270055003_ssc_2016_aust_shape", recursive = TRUE)
Stones Corner
doesn’t exist in ABS but it does in BCC.
It seems it’s part of Greenslopes
.
# A tibble: 1 × 3
SSC_NAME16 SSC_CODE16 SSC_NAME16_orig
<chr> <int> <chr>
1 Stones Corner NA <NA>
After clean the datasets were merged.
Full map
<- read_xls("data-raw/SEIFA/2033055001 - ssc indexes.xls",
SEIFA sheet = "Table 1", skip = 5, n_max = 13719, na = "-") %>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
::rename(SSC_CODE16 = x1,
dplyrSSC_NAME16 = x2,
IRSD = score_3,
IRSD_d = decile_4,
IRSAD = score_5,
IRSAD_d = decile_6,
IER = score_7,
IER_d = decile_8,
IEO = score_9,
IEO_d = decile_10,
URP = x11,
caution = x12) %>%
# deciles kept as integers here
mutate(SSC_CODE16 = as.integer(SSC_CODE16),
IRSD = as.integer(IRSD),
IRSAD = as.integer(IRSAD),
IER = as.integer(IER),
IEO = as.integer(IEO),
IRSD_d = as.integer(IRSD_d),
IRSAD_d = as.integer(IRSAD_d),
IER_d = as.integer(IER_d),
IEO_d = as.integer(IEO_d),
URP = as.integer(URP)
%>%
) mutate(caution = as.logical(ifelse(is.na(caution), "False", "True")))
Comparing Australia & Brisbane, example of IRSD.
x <integer>
# total N=13713 valid N=13691 mean=5.50 sd=2.87
Value | N | Raw % | Valid % | Cum. %
---------------------------------------
1 | 1369 | 9.98 | 10.00 | 10.00
2 | 1369 | 9.98 | 10.00 | 20.00
3 | 1370 | 9.99 | 10.01 | 30.01
4 | 1370 | 9.99 | 10.01 | 40.01
5 | 1367 | 9.97 | 9.98 | 50.00
6 | 1370 | 9.99 | 10.01 | 60.00
7 | 1369 | 9.98 | 10.00 | 70.00
8 | 1369 | 9.98 | 10.00 | 80.00
9 | 1371 | 10.00 | 10.01 | 90.02
10 | 1367 | 9.97 | 9.98 | 100.00
<NA> | 22 | 0.16 | <NA> | <NA>
x <integer>
# total N=193 valid N=184 mean=7.79 sd=2.37
Value | N | Raw % | Valid % | Cum. %
-------------------------------------
1 | 3 | 1.55 | 1.63 | 1.63
2 | 6 | 3.11 | 3.26 | 4.89
3 | 5 | 2.59 | 2.72 | 7.61
4 | 5 | 2.59 | 2.72 | 10.33
5 | 13 | 6.74 | 7.07 | 17.39
6 | 12 | 6.22 | 6.52 | 23.91
7 | 28 | 14.51 | 15.22 | 39.13
8 | 18 | 9.33 | 9.78 | 48.91
9 | 33 | 17.10 | 17.93 | 66.85
10 | 61 | 31.61 | 33.15 | 100.00
<NA> | 9 | 4.66 | <NA> | <NA>
Few areas with missing SEIFA
SSC_NAME16 IRSD_d IRSAD_d IER_d IEO_d URP
1 Banks Creek NA NA NA NA NA
2 Brisbane Airport NA NA NA NA NA
3 Eagle Farm NA NA NA NA NA
4 Enoggera Reservoir NA NA NA 10 25
5 Karawatha NA NA NA NA NA
6 Larapinta NA NA NA NA NA
7 Lytton NA NA NA NA NA
8 Mount Coot-Tha NA NA NA NA NA
9 Port Of Brisbane NA NA NA NA NA
These were excluded.
Few cases with ABS flag caution
.
SSC_NAME16 IRSD IRSD_d IRSAD IRSAD_d IER IER_d IEO IEO_d URP
1 Bulwer 1014 6 996 6 1012 5 1024 7 49
2 Cowan Cowan 1014 6 996 6 1012 5 1024 7 28
3 England Creek 1004 5 978 5 1063 8 949 3 33
4 Kooringal 1014 6 996 6 1012 5 1024 7 45
Usually with very small pop numbers.
These remain included.
Original values of indices were used to calculate ‘local deciles’ using SSCs for Brisbane only
%<>%
SSC mutate(IRSD_d_orig = IRSD_d,
IRSD_d = ntile(IRSD, 10),
IRSAD_d_orig = IRSAD_d,
IRSAD_d = ntile(IRSAD, 10),
IER_d_orig = IER_d,
IER_d = ntile(IER, 10),
IEO_d_orig = IEO_d,
IEO_d = ntile(IEO, 10)) %>%
mutate(across(ends_with("_d"), factor)) %>%
mutate(across(ends_with("_d_orig"), factor)) %>%
relocate(IRSD_d_orig, .after = IRSD_d) %>%
relocate(IRSAD_d_orig, .after = IRSAD_d) %>%
relocate(IER_d_orig, .after = IER_d) %>%
relocate(IEO_d_orig, .after = IEO_d)
Note: Deciles are represented as factor
variable.
Certain application could benefit from changing it to
ordered
?
Agreement across specific indices :
IRSD_d
IRSD_d_orig == IRSD_d <lgl>
# total N=184 valid N=184 mean=0.11 sd=0.32
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 88.59 | 88.59 | 88.59
TRUE | 21 | 11.41 | 11.41 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IRSAD_d
IRSAD_d_orig == IRSAD_d <lgl>
# total N=184 valid N=184 mean=0.11 sd=0.32
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 88.59 | 88.59 | 88.59
TRUE | 21 | 11.41 | 11.41 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IER_d
IER_d_orig == IER_d <lgl>
# total N=184 valid N=184 mean=0.71 sd=0.45
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 53 | 28.80 | 28.80 | 28.80
TRUE | 131 | 71.20 | 71.20 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IEO_d
IEO_d_orig == IEO_d <lgl>
# total N=184 valid N=184 mean=0.12 sd=0.33
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 162 | 88.04 | 88.04 | 88.04
TRUE | 22 | 11.96 | 11.96 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
<- read_xlsx("data-raw/costs/dog_expensive.xlsx") %>%
dog_cost clean_names() %>%
remove_empty(c("rows", "cols")) %>%
select(-web_source) %>%
rename(dog_breed = breed) %>%
select(dog_breed) %>%
distinct() %>%
# correcting names for better matching - these one used in BCE
mutate(
dog_breed = case_when(
== "Hairless Chinese Crested" ~ "Chinese Crested Dog",
dog_breed == "Saint Bernard" ~ "St Bernard",
dog_breed TRUE ~ as.character(dog_breed))
%>%
) # synonyms
add_row(dog_breed = "Dogue de Bordeaux") %>%
add_row(dog_breed = "Bulldog") %>%
add_row(dog_breed = "British Bulldog") %>%
mutate(expensive = "yes") %>%
arrange(dog_breed)
Top 20 most expensive dogs (+3 synonyms!)
Data scraped from https://top10petinsurance.com.au/pet-insurance-prices on the 30th March 2020
# packages needed
# install.packages("rvest")
p_load(rvest, tidyverse)
# scraping table done with this using chrome: https://www.r-bloggers.com/using-rvest-to-scrape-an-html-table/
<- 'https://top10petinsurance.com.au/pet-insurance-prices/'
url
<- url %>%
dog_insurance ::read_html() %>%
xml2html_nodes(xpath='//*[@id="post-1016"]/div/table') %>%
html_table()
<- dog_insurance[[1]]
dog_insurance
head(dog_insurance)
write_rds(dog_insurance, "data-raw/costs/dog_insurance.Rds") # extracted on the 30 March 2020
<- read_rds("data-raw/costs/dog_insurance.Rds") %>%
dog_insurance as_tibble() %>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
# select(-) %>%
mutate(average_accident_policy_cost_annual =
gsub(",", "",
average_accident_policy_cost_annual, fixed = TRUE),
average_illness_policy_cost_annual =
gsub(",", "",
average_illness_policy_cost_annual, fixed = TRUE),
average_comprehensive_policy_cost_annual =
gsub(",", "",
average_comprehensive_policy_cost_annual, fixed = TRUE)
%>%
) mutate(average_accident_policy_cost_annual =
as.numeric(gsub("$", "",
average_accident_policy_cost_annual, fixed = TRUE)),
average_illness_policy_cost_annual =
as.numeric(gsub("$", "",
average_illness_policy_cost_annual, fixed = TRUE)),
average_comprehensive_policy_cost_annual =
as.numeric(gsub("$", "",
average_comprehensive_policy_cost_annual, fixed = TRUE))
%>%
) filter(dog_breed != "Unknown Dog Breed")
cost_compared_to_other_breeds <character>
# total N=557 valid N=557 mean=1.97 sd=0.42
Value | N | Raw % | Valid % | Cum. %
------------------------------------------------------------
Above average | 58 | 10.41 | 10.41 | 10.41
Below average | 457 | 82.05 | 82.05 | 92.46
Significantly above average | 42 | 7.54 | 7.54 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Breeds in Significantly above average
category:
Breeds in Above average
category:
Raw dataset named
cars-bis-open-data-animal-permits-3-jan-2019
consists of
107,405 records.
Excluding records with:
permit_name
: Breeders Permit, Cattery
Permit, Racehorses Permit, Pet Shop Permit,
Domestic Dog Permit & Guard Dog Permitdog_breed
valuesdog_breed
listed as Unknown or
CrossStones Corner values were assigned to Greenslopes SSC (see information in section above).
%<>%
dog_ownership clean_names() %>%
remove_empty(c("rows", "cols")) %>%
# all the same here
select(-permit_group, -permit_status) %>%
# special permits?
filter(!permit_name %in% c("Breeders Permit", "Cattery Permit", "Racehorses Permit", "Pet Shop Permit")) %>%
filter(!permit_name %in% c("Domestic Dog Permit", "Guard Dog Permit")) %>%
rename(dog_breed = animal_breed,
SSC_NAME16 = application_location_suburb) %>%
mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>%
# correct suburb
mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Stones Corner", "Greenslopes", SSC_NAME16)) %>%
# missing geo
filter(!is.na(SSC_NAME16)) %>%
# missing breed
filter(!is.na(dog_breed)) %>%
filter(!dog_breed %in% c("Unknown",
"Medium Cross Breed", "Small Cross Breed", "Large Cross Breed")) %>%
# few cleans for better matches
mutate(
dog_breed = case_when(
== "German Shepherd Dog (Long Stock Coat)" ~ "German Shepherd",
dog_breed == "German Shepherd Dog" ~ "German Shepherd",
dog_breed == "Central Asian Shepherd Dog " ~ "Central Asian Shepherd",
dog_breed == "Kangal Dog" ~ "Kangal",
dog_breed == "Bulldog" ~ "British Bulldog",
dog_breed == "Collie (Rough)" ~ "Rough Collie",
dog_breed == "Collie (Smooth)" ~ "Smooth Collie",
dog_breed
TRUE ~ as.character(dog_breed))
)
Prepared dataset consists of 106,018 records.
dog_cost
<- left_join(dog_ownership, dog_cost, by = "dog_breed") %>%
dog_ownership_cost # binary indicator "expensive" and "non-expensive" dog breeds according to dog_cost
mutate(
expensive = case_when(
is.na(expensive) ~ 0,
== "yes" ~ 1
expensive
),expensive = as.integer(expensive)
)
Expensive dogs in Brisbane:
expensive <integer>
# total N=106018 valid N=106018 mean=0.14 sd=0.35
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
0 | 90940 | 85.78 | 85.78 | 85.78
1 | 15078 | 14.22 | 14.22 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
dog_insurance
Correcting names for better match
%<>%
dog_insurance mutate(
dog_breed = case_when(
# this is a bit tricky! might need sensitivity?
== "French Poodle" ~ "Poodle",
dog_breed
# just naming issues
== "Poodle – Standard" ~ "Poodle (Standard)",
dog_breed == "Miniature Poodle" ~ "Poodle (Miniature)",
dog_breed == "Poodle – Toy" ~ "Poodle (Toy)",
dog_breed == "Shar-Pei" ~ "Shar Pei",
dog_breed == "German Short Haired Pointer" ~ "German Shorthaired Pointer",
dog_breed == "German Wire Haired Pointer" ~ "German Wirehaired Pointer",
dog_breed == "Collie – Rough" ~ "Rough Collie",
dog_breed == "Collie – Smooth" ~ "Smooth Collie",
dog_breed == "Miniature Schnauzer" ~ "Schnauzer (Miniature)",
dog_breed == "Schnauzer Giant" ~ "Schnauzer (Giant)",
dog_breed == "Lagotto Rom" ~ "Lagotto Romagnolo",
dog_breed == "Brittany Spaniel" ~ "Brittany",
dog_breed == "Staghound" ~ "Stag Hound",
dog_breed == "Kerry Blue" ~ "Kerry Blue Terrier",
dog_breed == "English Toy terrier" ~ "English Toy Terrier",
dog_breed == "Parson Jack Russell Terrier" ~ "Parson Russell Terrier",
dog_breed == "Welsh Corgi – Pembroke" ~ "Welsh Corgi (Pembroke)",
dog_breed == "American Cocker Spaniel" ~ "Cocker Spaniel (American)",
dog_breed == "Basset Fauve De Bretagne" ~ "Basset Fauve de Bretagne",
dog_breed == "Norwegian Elk Hound" ~ "Norwegian Elkhound",
dog_breed == "Cheasapeake Bay Retriever" ~ "Chesapeake Bay Retriever",
dog_breed == "Bouvier Des Flandres" ~ "Bouvier des Flandres",
dog_breed == "Miniature Bull Terrier" ~ "Bull Terrier (Miniature)",
dog_breed == "Munsterlander – Large" ~ "Large Munsterlander",
dog_breed == "Welsh Corgi – Cardigan" ~ "Welsh Corgi (Cardigan)",
dog_breed == "HamiltonStovare" ~ "Hamiltonstovare",
dog_breed == "Blue tick Coonhound" ~ "Bluetick Coonhound",
dog_breed == "Japanese Akita" ~ "Akita (Japanese)",
dog_breed == "Dogue De Bordeaux" ~ "Dogue de Bordeaux",
dog_breed == "Italian Cane Corso" ~ "Italian Corso Dog",
dog_breed
# different kelpies but same category anyway
== "Australian Kelpie Sheepdog" ~ "Australian Kelpie",
dog_breed
# typo
== "Neopolitan Mastiff" ~ "Neapolitan Mastiff",
dog_breed TRUE ~ as.character(dog_breed)))
Dog breeds without match
Some further corrections still possible here:
<- left_join(dog_ownership_cost,
dog_ownership_cost %>%
dog_insurance select(dog_breed, cost_compared_to_other_breeds)
%>%
) mutate(
cost_compared_to_other_breeds = case_when(
# all the same
== "Fox Terrier" ~ "Below average",
dog_breed == "Fox Terrier (Smooth)" ~ "Below average",
dog_breed == "Fox Terrier (Wire)" ~ "Below average",
dog_breed == "Schnauzer" ~ "Below average",
dog_breed == "Australian Stumpy Tail Cattle Dog" ~ "Below average",
dog_breed == "Foxhound" ~ "Below average",
dog_breed == "White Swiss Shepherd Dog" ~ "Below average",
dog_breed
# multiple options here, but all above going for conservative
== "Welsh Corgi" ~ "Above average",
dog_breed
# taking values from Chihuahua
== "Chihuahua (Smooth Coat)" ~ "Below average",
dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
dog_breed
# naming - Dogue De Bordeaux
== "French Mastiff" ~ "Significantly above average",
dog_breed TRUE ~ as.character(cost_compared_to_other_breeds))) %>%
select(-permit_name)
Few things left:
Two largest groups:
These observations will remain as NA
s.
Summarizing all dogs, and expensive only.
<- dog_ownership_cost %>%
dog_agg group_by(SSC_NAME16) %>%
summarise(dogs_total = n())
%<>%
SSC left_join(dog_agg)
Areas with no URP/SEIFA but having (small amount of) dogs:
SSC_NAME16 dogs_total
1 Eagle Farm 1
2 Enoggera Reservoir 3
3 Karawatha 6
4 Lytton 1
These areas were excluded from the dog_ownership_cost
dataset.
Area with low URP (also marked as caution == TRUE in SEIFA) and no dogs at all:
SSC_NAME16 dogs_total URP
1 England Creek NA 33
%<>%
SSC filter(dogs_total > 0) %>%
mutate(dogs_percap = (dogs_total / URP)*100) %>%
relocate(geometry, .after = last_col())
This area was excluded.
<- dog_ownership_cost %>%
wide_all_n group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = dog_breed,
values_from = n) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
<- right_join(SSC, wide_all_n)
wide_all_n
<- dog_ownership_cost %>%
wide_all_p group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(SSC_NAME16) %>%
mutate(N = sum(n)) %>%
ungroup() %>%
mutate(p = n/N) %>%
select(-n, -N) %>%
pivot_wider(names_from = dog_breed,
values_from = p) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
<- right_join(SSC, wide_all_p) wide_all_p
<- dog_ownership_cost %>%
wide_cost_n filter(expensive == 1) %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = dog_breed,
values_from = n) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
<- right_join(SSC, wide_cost_n)
wide_cost_n
<- dog_ownership_cost %>%
wide_cost_p filter(expensive == 1) %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(SSC_NAME16) %>%
mutate(N = sum(n)) %>%
ungroup() %>%
mutate(p = n/N) %>%
select(-n, -N) %>%
pivot_wider(names_from = dog_breed,
values_from = p) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
<- right_join(SSC, wide_cost_p) wide_cost_p
<- dog_ownership_cost %>%
wide_insurance_n # filter(cost_compared_to_other_breeds != "Below average") %>%
filter(cost_compared_to_other_breeds == "Significantly above average") %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = dog_breed,
values_from = n) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
<- right_join(SSC, wide_insurance_n)
wide_insurance_n
<- dog_ownership_cost %>%
wide_insurance_p # filter(cost_compared_to_other_breeds != "Below average") %>%
filter(cost_compared_to_other_breeds == "Significantly above average") %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(SSC_NAME16) %>%
mutate(N = sum(n)) %>%
ungroup() %>%
mutate(p = n/N) %>%
select(-n, -N) %>%
pivot_wider(names_from = dog_breed,
values_from = p) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
<- right_join(SSC, wide_insurance_p) wide_insurance_p
Using the Index of Education and Occupation (IEO
) only
as outcome. More info about the index from
ABS.
Keeping all breeds and wide formats as described above; droppping spatial info.
%<>%
wide_all_n ::st_drop_geometry() %>%
sf# as_tibble() %>%
select(IEO, IEO_d,
:last_col())
akita
%<>%
wide_all_p ::st_drop_geometry() %>%
sf# as_tibble() %>%
select(IEO, IEO_d,
:last_col()) %>%
akitarename(outcome = IEO,
outcome_d = IEO_d)
<- nearZeroVar(wide_all_p, saveMetrics = TRUE) nzv
<- nearZeroVar(wide_all_p)
nzv <- wide_all_p[, -nzv]
wide_all_p <- wide_all_n[, -nzv]
wide_all_n rm(nzv)
%<>%
wide_all_n mutate(weight = rowSums(across(where(is.numeric)))) %>%
select(weight)
75/25 split, taking into account distribution of deciles:
<- createDataPartition(
inTrain y = wide_all_p$outcome_d,
p = .75,
list = FALSE
)
<- wide_all_p[ inTrain, ]
training <- wide_all_p[-inTrain, ]
testing
<- wide_all_n[ inTrain,] training_weights
Training data:
outcome_d <categorical>
# total N=143 valid N=143 mean=5.45 sd=2.88
Value | N | Raw % | Valid % | Cum. %
-------------------------------------
1 | 14 | 9.79 | 9.79 | 9.79
2 | 15 | 10.49 | 10.49 | 20.28
3 | 15 | 10.49 | 10.49 | 30.77
4 | 15 | 10.49 | 10.49 | 41.26
5 | 14 | 9.79 | 9.79 | 51.05
6 | 14 | 9.79 | 9.79 | 60.84
7 | 14 | 9.79 | 9.79 | 70.63
8 | 14 | 9.79 | 9.79 | 80.42
9 | 14 | 9.79 | 9.79 | 90.21
10 | 14 | 9.79 | 9.79 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Testing data:
outcome_d <categorical>
# total N=40 valid N=40 mean=5.50 sd=2.91
Value | N | Raw % | Valid % | Cum. %
------------------------------------
1 | 4 | 10 | 10 | 10
2 | 4 | 10 | 10 | 20
3 | 4 | 10 | 10 | 30
4 | 4 | 10 | 10 | 40
5 | 4 | 10 | 10 | 50
6 | 4 | 10 | 10 | 60
7 | 4 | 10 | 10 | 70
8 | 4 | 10 | 10 | 80
9 | 4 | 10 | 10 | 90
10 | 4 | 10 | 10 | 100
<NA> | 0 | 0 | <NA> | <NA>
Outcome distro checks
R version 4.2.0 (2022-04-22 ucrt) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] caret_6.0-92 lattice_0.20-45 tmap_3.3-3 sf_1.0-8 [5] gmodels_2.18.1.1 DT_0.23 sjPlot_2.8.10 sjmisc_2.8.9 [9] janitor_2.1.0 scales_1.2.0 magrittr_2.0.3 readxl_1.4.0 [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4 [17] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7 ggplot2_3.3.6 [21] tidyverse_1.3.2 pacman_0.5.1To cite R in publications use:
R Core Team (2022). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.
To cite the ggplot2 package in publications use:
Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.
`{=html}