-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpsy-data-desc.R
68 lines (54 loc) · 1.7 KB
/
psy-data-desc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
libraries <- c("dplyr", "magrittr", "tidyverse", "purrr"
, "sjlabelled" # read SPSS
, "caret", "doParallel"
, "stargazer", "DataExplorer", "skimr"
, "machinelearningtools"
, "knitr", "pander"
)
sapply(libraries, require, character.only = TRUE)
# nominal <- FALSE # with ordinal as ORDERED factors
nominal <- TRUE # wit
filename <- "data/Personality-Performance-Turnover-Chaehan So.sav"
file.raw <- sjlabelled::read_spss(filename,
atomic.to.fac = TRUE,
verbose = FALSE)
data.labels <- foreign::read.spss(filename) %>%
attributes %>%
.$variable.labels %T>% print
# data.labels %>% saveRDS("data/data.labels.rds")
file.raw %<>%
dplyr::select(-id, -TESTDATE) %>%
tbl_df
file.raw %>%
filter(job != 1 & job != 10) %>%
count()
data.raw <- file.raw %>%
# convert categorical variables to factors
mutate_at((c("COMNAME", "team_id", "class", "job", "gender", "educa")),
as.factor) %>%
# convert numerical variables to numeric datatype
mutate_at(vars(starts_with("TO")), as.numeric) %>%
# fix import error for PERF07
mutate_at("PERF07", as.numeric) %T>% print
dataset <- data.raw %>% select(job, gender, educa, prinum) %>% print
# job type
dataset %>%
group_by(job) %>%
tally()
# gender
dataset %>%
group_by(gender) %>%
tally() %>%
mutate(perc = n/sum(n)*100)
# age
dataset %>%
separate(prinum, c("year", "monthday"), sep = 2) %>%
select(-monthday) %>%
# calculate age from year 2010 (PERF10)
mutate(age = 110 - as.numeric(year)) %>%
psych::describe()
# education
dataset %>%
group_by(educa) %>%
tally() %>%
mutate(perc = n/sum(n)*100)