# orig data missing???
SUB <- read_csv("data-raw/geo/suburb-and-adjoining-suburb-november-2019.zip") %>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
select(suburb_name) %>%
rename(SSC_NAME16 = suburb_name) %>%
distinct() %>%
mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>%
arrange(SSC_NAME16)
write_rds(SUB, "data/geo/clean/SUB.Rds")This might very well include areas with no pops (and therefore no SEIFA), for instance:
# A tibble: 2 × 1
SSC_NAME16
<chr>
1 Brisbane Airport
2 Port Of Brisbane
Names from Brisbane containing (Brisbane - Qld), names
from Qld containing (Qld) have to be cleaned to match BCC
data.
Mcdowall is called McDowall and
Mount Coot-tha is Mount Coot-Tha - these have
been unified as well.
unzip("data-raw/geo/1270055003_ssc_2016_aust_shape.zip",
exdir = "data-raw/geo")
SSC <- st_read("data-raw/geo/1270055003_ssc_2016_aust_shape/SSC_2016_AUST.shp",
stringsAsFactors = FALSE) %>%
mutate(SSC_CODE16 = as.integer(SSC_CODE16)) %>%
select(-STE_NAME16, -STE_CODE16, -AREASQKM16) %>%
st_transform(3112) %>%
filter(!st_is_empty(geometry)) %>%
mutate(SSC_NAME16_orig = SSC_NAME16) %>%
mutate(SSC_NAME16 = str_remove(SSC_NAME16,
fixed(" (Brisbane - Qld)"))) %>%
mutate(SSC_NAME16 = str_remove(SSC_NAME16,
fixed(" (Qld)"))) %>%
mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "McDowall",
"Mcdowall", SSC_NAME16)) %>%
mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Mount Coot-tha",
"Mount Coot-Tha", SSC_NAME16))
# SSC <- rmapshaper::ms_simplify(SSC, keep = 0.05, weighting = 0.7) # default settings
write_rds(SSC, "data/geo/SSC_2016_AUST.Rds")
unlink("data-raw/geo/1270055003_ssc_2016_aust_shape", recursive = TRUE)Stones Corner doesn’t exist in ABS but it does in BCC.
It seems it’s part of Greenslopes.
# A tibble: 1 × 3
SSC_NAME16 SSC_CODE16 SSC_NAME16_orig
<chr> <int> <chr>
1 Stones Corner NA <NA>
After clean the datasets were merged.
Full map
SEIFA <- read_xls("data-raw/SEIFA/2033055001 - ssc indexes.xls",
sheet = "Table 1", skip = 5, n_max = 13719, na = "-") %>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
dplyr::rename(SSC_CODE16 = x1,
SSC_NAME16 = x2,
IRSD = score_3,
IRSD_d = decile_4,
IRSAD = score_5,
IRSAD_d = decile_6,
IER = score_7,
IER_d = decile_8,
IEO = score_9,
IEO_d = decile_10,
URP = x11,
caution = x12) %>%
# deciles kept as integers here
mutate(SSC_CODE16 = as.integer(SSC_CODE16),
IRSD = as.integer(IRSD),
IRSAD = as.integer(IRSAD),
IER = as.integer(IER),
IEO = as.integer(IEO),
IRSD_d = as.integer(IRSD_d),
IRSAD_d = as.integer(IRSAD_d),
IER_d = as.integer(IER_d),
IEO_d = as.integer(IEO_d),
URP = as.integer(URP)
) %>%
mutate(caution = as.logical(ifelse(is.na(caution), "False", "True")))Comparing Australia & Brisbane, example of IRSD.
x <integer>
# total N=13713 valid N=13691 mean=5.50 sd=2.87
Value | N | Raw % | Valid % | Cum. %
---------------------------------------
1 | 1369 | 9.98 | 10.00 | 10.00
2 | 1369 | 9.98 | 10.00 | 20.00
3 | 1370 | 9.99 | 10.01 | 30.01
4 | 1370 | 9.99 | 10.01 | 40.01
5 | 1367 | 9.97 | 9.98 | 50.00
6 | 1370 | 9.99 | 10.01 | 60.00
7 | 1369 | 9.98 | 10.00 | 70.00
8 | 1369 | 9.98 | 10.00 | 80.00
9 | 1371 | 10.00 | 10.01 | 90.02
10 | 1367 | 9.97 | 9.98 | 100.00
<NA> | 22 | 0.16 | <NA> | <NA>
x <integer>
# total N=193 valid N=184 mean=7.79 sd=2.37
Value | N | Raw % | Valid % | Cum. %
-------------------------------------
1 | 3 | 1.55 | 1.63 | 1.63
2 | 6 | 3.11 | 3.26 | 4.89
3 | 5 | 2.59 | 2.72 | 7.61
4 | 5 | 2.59 | 2.72 | 10.33
5 | 13 | 6.74 | 7.07 | 17.39
6 | 12 | 6.22 | 6.52 | 23.91
7 | 28 | 14.51 | 15.22 | 39.13
8 | 18 | 9.33 | 9.78 | 48.91
9 | 33 | 17.10 | 17.93 | 66.85
10 | 61 | 31.61 | 33.15 | 100.00
<NA> | 9 | 4.66 | <NA> | <NA>
Few areas with missing SEIFA
SSC_NAME16 IRSD_d IRSAD_d IER_d IEO_d URP
1 Banks Creek NA NA NA NA NA
2 Brisbane Airport NA NA NA NA NA
3 Eagle Farm NA NA NA NA NA
4 Enoggera Reservoir NA NA NA 10 25
5 Karawatha NA NA NA NA NA
6 Larapinta NA NA NA NA NA
7 Lytton NA NA NA NA NA
8 Mount Coot-Tha NA NA NA NA NA
9 Port Of Brisbane NA NA NA NA NA
These were excluded.
Few cases with ABS flag caution.
SSC_NAME16 IRSD IRSD_d IRSAD IRSAD_d IER IER_d IEO IEO_d URP
1 Bulwer 1014 6 996 6 1012 5 1024 7 49
2 Cowan Cowan 1014 6 996 6 1012 5 1024 7 28
3 England Creek 1004 5 978 5 1063 8 949 3 33
4 Kooringal 1014 6 996 6 1012 5 1024 7 45
Usually with very small pop numbers.
These remain included.
Original values of indices were used to calculate ‘local deciles’ using SSCs for Brisbane only
SSC %<>%
mutate(IRSD_d_orig = IRSD_d,
IRSD_d = ntile(IRSD, 10),
IRSAD_d_orig = IRSAD_d,
IRSAD_d = ntile(IRSAD, 10),
IER_d_orig = IER_d,
IER_d = ntile(IER, 10),
IEO_d_orig = IEO_d,
IEO_d = ntile(IEO, 10)) %>%
mutate(across(ends_with("_d"), factor)) %>%
mutate(across(ends_with("_d_orig"), factor)) %>%
relocate(IRSD_d_orig, .after = IRSD_d) %>%
relocate(IRSAD_d_orig, .after = IRSAD_d) %>%
relocate(IER_d_orig, .after = IER_d) %>%
relocate(IEO_d_orig, .after = IEO_d) Note: Deciles are represented as factor variable.
Certain application could benefit from changing it to
ordered?
Agreement across specific indices :
IRSD_dIRSD_d_orig == IRSD_d <lgl>
# total N=184 valid N=184 mean=0.11 sd=0.32
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 88.59 | 88.59 | 88.59
TRUE | 21 | 11.41 | 11.41 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IRSAD_dIRSAD_d_orig == IRSAD_d <lgl>
# total N=184 valid N=184 mean=0.11 sd=0.32
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 163 | 88.59 | 88.59 | 88.59
TRUE | 21 | 11.41 | 11.41 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IER_dIER_d_orig == IER_d <lgl>
# total N=184 valid N=184 mean=0.71 sd=0.45
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 53 | 28.80 | 28.80 | 28.80
TRUE | 131 | 71.20 | 71.20 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
IEO_dIEO_d_orig == IEO_d <lgl>
# total N=184 valid N=184 mean=0.12 sd=0.33
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 162 | 88.04 | 88.04 | 88.04
TRUE | 22 | 11.96 | 11.96 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
dog_cost <- read_xlsx("data-raw/costs/dog_expensive.xlsx") %>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
select(-web_source) %>%
rename(dog_breed = breed) %>%
select(dog_breed) %>%
distinct() %>%
# correcting names for better matching - these one used in BCE
mutate(
dog_breed = case_when(
dog_breed == "Hairless Chinese Crested" ~ "Chinese Crested Dog",
dog_breed == "Saint Bernard" ~ "St Bernard",
TRUE ~ as.character(dog_breed))
) %>%
# synonyms
add_row(dog_breed = "Dogue de Bordeaux") %>%
add_row(dog_breed = "Bulldog") %>%
add_row(dog_breed = "British Bulldog") %>%
mutate(expensive = "yes") %>%
arrange(dog_breed)Top 20 most expensive dogs (+3 synonyms!)
Data scraped from https://top10petinsurance.com.au/pet-insurance-prices on the 30th March 2020
# packages needed
# install.packages("rvest")
p_load(rvest, tidyverse)
# scraping table done with this using chrome: https://www.r-bloggers.com/using-rvest-to-scrape-an-html-table/
url <- 'https://top10petinsurance.com.au/pet-insurance-prices/'
dog_insurance <- url %>%
xml2::read_html() %>%
html_nodes(xpath='//*[@id="post-1016"]/div/table') %>%
html_table()
dog_insurance <- dog_insurance[[1]]
head(dog_insurance)
write_rds(dog_insurance, "data-raw/costs/dog_insurance.Rds") # extracted on the 30 March 2020dog_insurance <- read_rds("data-raw/costs/dog_insurance.Rds") %>%
as_tibble() %>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
# select(-) %>%
mutate(average_accident_policy_cost_annual =
gsub(",", "",
average_accident_policy_cost_annual,
fixed = TRUE),
average_illness_policy_cost_annual =
gsub(",", "",
average_illness_policy_cost_annual,
fixed = TRUE),
average_comprehensive_policy_cost_annual =
gsub(",", "",
average_comprehensive_policy_cost_annual,
fixed = TRUE)
) %>%
mutate(average_accident_policy_cost_annual =
as.numeric(gsub("$", "",
average_accident_policy_cost_annual,
fixed = TRUE)),
average_illness_policy_cost_annual =
as.numeric(gsub("$", "",
average_illness_policy_cost_annual,
fixed = TRUE)),
average_comprehensive_policy_cost_annual =
as.numeric(gsub("$", "",
average_comprehensive_policy_cost_annual,
fixed = TRUE))
) %>%
filter(dog_breed != "Unknown Dog Breed")cost_compared_to_other_breeds <character>
# total N=557 valid N=557 mean=1.97 sd=0.42
Value | N | Raw % | Valid % | Cum. %
------------------------------------------------------------
Above average | 58 | 10.41 | 10.41 | 10.41
Below average | 457 | 82.05 | 82.05 | 92.46
Significantly above average | 42 | 7.54 | 7.54 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Breeds in Significantly above average category:
Breeds in Above average category:
Raw dataset named
cars-bis-open-data-animal-permits-3-jan-2019 consists of
107,405 records.
Excluding records with:
permit_name: Breeders Permit, Cattery
Permit, Racehorses Permit, Pet Shop Permit,
Domestic Dog Permit & Guard Dog Permitdog_breed valuesdog_breed listed as Unknown or
CrossStones Corner values were assigned to Greenslopes SSC (see information in section above).
dog_ownership %<>%
clean_names() %>%
remove_empty(c("rows", "cols")) %>%
# all the same here
select(-permit_group, -permit_status) %>%
# special permits?
filter(!permit_name %in% c("Breeders Permit", "Cattery Permit", "Racehorses Permit", "Pet Shop Permit")) %>%
filter(!permit_name %in% c("Domestic Dog Permit", "Guard Dog Permit")) %>%
rename(dog_breed = animal_breed,
SSC_NAME16 = application_location_suburb) %>%
mutate(SSC_NAME16 = str_to_title(SSC_NAME16)) %>%
# correct suburb
mutate(SSC_NAME16 = ifelse(SSC_NAME16 == "Stones Corner", "Greenslopes", SSC_NAME16)) %>%
# missing geo
filter(!is.na(SSC_NAME16)) %>%
# missing breed
filter(!is.na(dog_breed)) %>%
filter(!dog_breed %in% c("Unknown",
"Medium Cross Breed", "Small Cross Breed", "Large Cross Breed")) %>%
# few cleans for better matches
mutate(
dog_breed = case_when(
dog_breed == "German Shepherd Dog (Long Stock Coat)" ~ "German Shepherd",
dog_breed == "German Shepherd Dog" ~ "German Shepherd",
dog_breed == "Central Asian Shepherd Dog " ~ "Central Asian Shepherd",
dog_breed == "Kangal Dog" ~ "Kangal",
dog_breed == "Bulldog" ~ "British Bulldog",
dog_breed == "Collie (Rough)" ~ "Rough Collie",
dog_breed == "Collie (Smooth)" ~ "Smooth Collie",
TRUE ~ as.character(dog_breed))
) Prepared dataset consists of 106,018 records.
dog_costdog_ownership_cost <- left_join(dog_ownership, dog_cost, by = "dog_breed") %>%
# binary indicator "expensive" and "non-expensive" dog breeds according to dog_cost
mutate(
expensive = case_when(
is.na(expensive) ~ 0,
expensive == "yes" ~ 1
),
expensive = as.integer(expensive)
)Expensive dogs in Brisbane:
expensive <integer>
# total N=106018 valid N=106018 mean=0.14 sd=0.35
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
0 | 90940 | 85.78 | 85.78 | 85.78
1 | 15078 | 14.22 | 14.22 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
dog_insuranceCorrecting names for better match
dog_insurance %<>%
mutate(
dog_breed = case_when(
# this is a bit tricky! might need sensitivity?
dog_breed == "French Poodle" ~ "Poodle",
# just naming issues
dog_breed == "Poodle – Standard" ~ "Poodle (Standard)",
dog_breed == "Miniature Poodle" ~ "Poodle (Miniature)",
dog_breed == "Poodle – Toy" ~ "Poodle (Toy)",
dog_breed == "Shar-Pei" ~ "Shar Pei",
dog_breed == "German Short Haired Pointer" ~ "German Shorthaired Pointer",
dog_breed == "German Wire Haired Pointer" ~ "German Wirehaired Pointer",
dog_breed == "Collie – Rough" ~ "Rough Collie",
dog_breed == "Collie – Smooth" ~ "Smooth Collie",
dog_breed == "Miniature Schnauzer" ~ "Schnauzer (Miniature)",
dog_breed == "Schnauzer Giant" ~ "Schnauzer (Giant)",
dog_breed == "Lagotto Rom" ~ "Lagotto Romagnolo",
dog_breed == "Brittany Spaniel" ~ "Brittany",
dog_breed == "Staghound" ~ "Stag Hound",
dog_breed == "Kerry Blue" ~ "Kerry Blue Terrier",
dog_breed == "English Toy terrier" ~ "English Toy Terrier",
dog_breed == "Parson Jack Russell Terrier" ~ "Parson Russell Terrier",
dog_breed == "Welsh Corgi – Pembroke" ~ "Welsh Corgi (Pembroke)",
dog_breed == "American Cocker Spaniel" ~ "Cocker Spaniel (American)",
dog_breed == "Basset Fauve De Bretagne" ~ "Basset Fauve de Bretagne",
dog_breed == "Norwegian Elk Hound" ~ "Norwegian Elkhound",
dog_breed == "Cheasapeake Bay Retriever" ~ "Chesapeake Bay Retriever",
dog_breed == "Bouvier Des Flandres" ~ "Bouvier des Flandres",
dog_breed == "Miniature Bull Terrier" ~ "Bull Terrier (Miniature)",
dog_breed == "Munsterlander – Large" ~ "Large Munsterlander",
dog_breed == "Welsh Corgi – Cardigan" ~ "Welsh Corgi (Cardigan)",
dog_breed == "HamiltonStovare" ~ "Hamiltonstovare",
dog_breed == "Blue tick Coonhound" ~ "Bluetick Coonhound",
dog_breed == "Japanese Akita" ~ "Akita (Japanese)",
dog_breed == "Dogue De Bordeaux" ~ "Dogue de Bordeaux",
dog_breed == "Italian Cane Corso" ~ "Italian Corso Dog",
# different kelpies but same category anyway
dog_breed == "Australian Kelpie Sheepdog" ~ "Australian Kelpie",
# typo
dog_breed == "Neopolitan Mastiff" ~ "Neapolitan Mastiff",
TRUE ~ as.character(dog_breed)))Dog breeds without match
Some further corrections still possible here:
dog_ownership_cost <- left_join(dog_ownership_cost,
dog_insurance %>%
select(dog_breed, cost_compared_to_other_breeds)
) %>%
mutate(
cost_compared_to_other_breeds = case_when(
# all the same
dog_breed == "Fox Terrier" ~ "Below average",
dog_breed == "Fox Terrier (Smooth)" ~ "Below average",
dog_breed == "Fox Terrier (Wire)" ~ "Below average",
dog_breed == "Schnauzer" ~ "Below average",
dog_breed == "Australian Stumpy Tail Cattle Dog" ~ "Below average",
dog_breed == "Foxhound" ~ "Below average",
dog_breed == "White Swiss Shepherd Dog" ~ "Below average",
# multiple options here, but all above going for conservative
dog_breed == "Welsh Corgi" ~ "Above average",
# taking values from Chihuahua
dog_breed == "Chihuahua (Smooth Coat)" ~ "Below average",
dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
dog_breed == "Chihuahua (Long Coat)" ~ "Below average",
# naming - Dogue De Bordeaux
dog_breed == "French Mastiff" ~ "Significantly above average",
TRUE ~ as.character(cost_compared_to_other_breeds))) %>%
select(-permit_name)Few things left:
Two largest groups:
These observations will remain as NAs.
Summarizing all dogs, and expensive only.
dog_agg <- dog_ownership_cost %>%
group_by(SSC_NAME16) %>%
summarise(dogs_total = n())
SSC %<>%
left_join(dog_agg)Areas with no URP/SEIFA but having (small amount of) dogs:
SSC_NAME16 dogs_total
1 Eagle Farm 1
2 Enoggera Reservoir 3
3 Karawatha 6
4 Lytton 1
These areas were excluded from the dog_ownership_cost
dataset.
Area with low URP (also marked as caution == TRUE in SEIFA) and no dogs at all:
SSC_NAME16 dogs_total URP
1 England Creek NA 33
SSC %<>%
filter(dogs_total > 0) %>%
mutate(dogs_percap = (dogs_total / URP)*100) %>%
relocate(geometry, .after = last_col())This area was excluded.
wide_all_n <- dog_ownership_cost %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = dog_breed,
values_from = n) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
wide_all_n <- right_join(SSC, wide_all_n)
wide_all_p <- dog_ownership_cost %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(SSC_NAME16) %>%
mutate(N = sum(n)) %>%
ungroup() %>%
mutate(p = n/N) %>%
select(-n, -N) %>%
pivot_wider(names_from = dog_breed,
values_from = p) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
wide_all_p <- right_join(SSC, wide_all_p)wide_cost_n <- dog_ownership_cost %>%
filter(expensive == 1) %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = dog_breed,
values_from = n) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
wide_cost_n <- right_join(SSC, wide_cost_n)
wide_cost_p <- dog_ownership_cost %>%
filter(expensive == 1) %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(SSC_NAME16) %>%
mutate(N = sum(n)) %>%
ungroup() %>%
mutate(p = n/N) %>%
select(-n, -N) %>%
pivot_wider(names_from = dog_breed,
values_from = p) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
wide_cost_p <- right_join(SSC, wide_cost_p)wide_insurance_n <- dog_ownership_cost %>%
# filter(cost_compared_to_other_breeds != "Below average") %>%
filter(cost_compared_to_other_breeds == "Significantly above average") %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
pivot_wider(names_from = dog_breed,
values_from = n) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
wide_insurance_n <- right_join(SSC, wide_insurance_n)
wide_insurance_p <- dog_ownership_cost %>%
# filter(cost_compared_to_other_breeds != "Below average") %>%
filter(cost_compared_to_other_breeds == "Significantly above average") %>%
group_by(SSC_NAME16, dog_breed) %>%
summarise(n = n()) %>%
ungroup() %>%
group_by(SSC_NAME16) %>%
mutate(N = sum(n)) %>%
ungroup() %>%
mutate(p = n/N) %>%
select(-n, -N) %>%
pivot_wider(names_from = dog_breed,
values_from = p) %>%
mutate_all(~replace(., is.na(.), 0)) %>%
clean_names() %>%
rename(SSC_NAME16 = ssc_name16)
wide_insurance_p <- right_join(SSC, wide_insurance_p)Using the Index of Education and Occupation (IEO) only
as outcome. More info about the index from
ABS.
Keeping all breeds and wide formats as described above; droppping spatial info.
wide_all_n %<>%
sf::st_drop_geometry() %>%
# as_tibble() %>%
select(IEO, IEO_d,
akita:last_col())
wide_all_p %<>%
sf::st_drop_geometry() %>%
# as_tibble() %>%
select(IEO, IEO_d,
akita:last_col()) %>%
rename(outcome = IEO,
outcome_d = IEO_d)nzv <- nearZeroVar(wide_all_p, saveMetrics = TRUE)nzv <- nearZeroVar(wide_all_p)
wide_all_p <- wide_all_p[, -nzv]
wide_all_n <- wide_all_n[, -nzv]
rm(nzv)wide_all_n %<>%
mutate(weight = rowSums(across(where(is.numeric)))) %>%
select(weight)75/25 split, taking into account distribution of deciles:
inTrain <- createDataPartition(
y = wide_all_p$outcome_d,
p = .75,
list = FALSE
)
training <- wide_all_p[ inTrain, ]
testing <- wide_all_p[-inTrain, ]
training_weights <- wide_all_n[ inTrain,]Training data:
outcome_d <categorical>
# total N=143 valid N=143 mean=5.45 sd=2.88
Value | N | Raw % | Valid % | Cum. %
-------------------------------------
1 | 14 | 9.79 | 9.79 | 9.79
2 | 15 | 10.49 | 10.49 | 20.28
3 | 15 | 10.49 | 10.49 | 30.77
4 | 15 | 10.49 | 10.49 | 41.26
5 | 14 | 9.79 | 9.79 | 51.05
6 | 14 | 9.79 | 9.79 | 60.84
7 | 14 | 9.79 | 9.79 | 70.63
8 | 14 | 9.79 | 9.79 | 80.42
9 | 14 | 9.79 | 9.79 | 90.21
10 | 14 | 9.79 | 9.79 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
Testing data:
outcome_d <categorical>
# total N=40 valid N=40 mean=5.50 sd=2.91
Value | N | Raw % | Valid % | Cum. %
------------------------------------
1 | 4 | 10 | 10 | 10
2 | 4 | 10 | 10 | 20
3 | 4 | 10 | 10 | 30
4 | 4 | 10 | 10 | 40
5 | 4 | 10 | 10 | 50
6 | 4 | 10 | 10 | 60
7 | 4 | 10 | 10 | 70
8 | 4 | 10 | 10 | 80
9 | 4 | 10 | 10 | 90
10 | 4 | 10 | 10 | 100
<NA> | 0 | 0 | <NA> | <NA>
Outcome distro checks
R version 4.2.0 (2022-04-22 ucrt) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] caret_6.0-92 lattice_0.20-45 tmap_3.3-3 sf_1.0-8 [5] gmodels_2.18.1.1 DT_0.23 sjPlot_2.8.10 sjmisc_2.8.9 [9] janitor_2.1.0 scales_1.2.0 magrittr_2.0.3 readxl_1.4.0 [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4 [17] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7 ggplot2_3.3.6 [21] tidyverse_1.3.2 pacman_0.5.1To cite R in publications use:
R Core Team (2022). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.
To cite the ggplot2 package in publications use:
Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.
`{=html}