setwd("Course Outcomes")
dat <- read_rds("pal_data_25.rds")
dat[sapply(dat, is.character)] <- lapply(dat[sapply(dat, is.character)], as.factor)
dat$pal.flg2 <- rep("Non-PAL", length=length(dat$pal.flg))
temp <- which(dat$pal.grade == "CR")
dat$pal.flg2[temp] <- "PAL"
dat$pal.flg2 <- as.factor(dat$pal.flg2)
# Remove variables with >10% missing
prop.na <- function(x) sum(is.na(x))/length(x)
miss <- apply(dat, MARGIN=2, FUN=prop.na)
missGr10 <- which(miss > 0.1)
dat.up <- dat[,-missGr10]
# Want to keep "hs.gpa"
dat.up <- data.frame(dat.up, dat$hs.gpa)
## Remove course repeats
temp <- which(dat.up$rpt.seq == 1)
dat.up <- dat.up[temp,]
dat.up <- dat.up[,-which(names(dat.up) %in% c("rpt.seq"))]
# Remove variables not of interest:
dat.up <- dat.up[,-which(names(dat.up) %in%
c("course.descr", "session.code", "enrl.add.dt",
"acad.group", "dept.abbr", "grade", "dfw.flg",
"instructor1", "acad.career", "acad.prog",
"Asian", "Black", "Hispanic", "Native.American",
"Pacific.Islander", "SWANA", "White",
"last.school.attended.ext.org.id", "acad.plan1st",
"plan.desc1st", "plan.degree1st", "plan.dept1st",
"plan.college1st", "acad.plan1", "plan.desc1",
"plan.dept1", "plan.college1", "plan.degree1",
"class.section", "last.school.attended",
"acad.group", "unt.taken", "coh.term", "term",
"units.passed.remedial", "cum.previous.pal",
"pal.flg","num.pal","cum.pal","cum.pal.this.course",
"cum.previous.pal.this.course","learning.mode"))]
chem.ind <- which(dat.up$course == "CHEM 24" |
dat.up$course == "CHEM 124")
data.chem <- dat.up[chem.ind, ]
dat.chem24 <- data.chem[data.chem$course == "CHEM 24",]
dat.chem124 <- data.chem[data.chem$course == "CHEM 124",]
chem.merge <- merge(dat.chem24,
dat.chem124[c("emplid","course","grade.num","pal.flg2")], by.x = "emplid", by.y = "emplid",
suffixes = c(".1",".2"))
chem.merge[sapply(chem.merge, is.factor)] <- lapply(chem.merge[sapply(chem.merge, is.factor)], droplevels)
data.chem <- chem.merge[,-which(names(chem.merge) %in%
c("emplid","course.1","course.2"))]
dat1 <- data.chem
dat1 <- dat1[complete.cases(dat1), ]
## Identify variables causing complete separation in logistic regression
## Identify any variables that are constants
# lapply(dat1, unique)
dat1 <- dat1[,-which(names(dat1) %in% c("coh", "units.taken.remedial"))]
# Combine sparse ethnicity categories to Other
dat1 <- dat1 %>%
mutate(eth.ipeds = fct_collapse(eth.ipeds,
`Other` = c("Native American", "Pacific Islander",
"Two or More Races", "Unknown")))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `eth.ipeds = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: Native American
# Collapse sparse categories for acad.stand
# Other: Academic Dismissal, Academic Disqualification
dat1 <- dat1 %>%
mutate(acad.stndng.stat.desc = fct_other(acad.stndng.stat.desc, keep = c("Good Standing")))
# mod.final <- pal.flg2.1 ~ . -pal.flg2.2 -unt.taken.prgrss -base.time -mother.ed -grade.num.2 -enrl.tot -term.units
mod.final <- pal.flg2.2 ~ . -pal.flg2.1 -unt.taken.prgrss -base.time -mother.ed -grade.num.2 -enrl.tot -term.units
wts.full <- weightit(mod.final, data=dat1, estimand="ATE")
## Assuming "PAL" is the treated level. If not, recode the treatment so
## that 1 is treated and 0 is control.
## Assuming "PAL" is the treated level. If not, recode the treatment so
## that 1 is treated and 0 is control.
bal.tab(wts.full, un=TRUE)$Balance
## Type Diff.Un
## prop.score Distance 0.5817590250
## grade.num.1 Contin. -0.0831987073
## acad.stndng.stat.desc_Other Binary 0.0004241989
## gender_Male Binary 0.0106376036
## eth.ipeds_Asian Binary -0.0191215819
## eth.ipeds_Black Binary 0.0067219213
## eth.ipeds_Hispanic Binary -0.0069829668
## eth.ipeds_Other Binary 0.0519806826
## eth.ipeds_White Binary -0.0325980552
## foreign.flg_Not Foreign Binary 0.0201005025
## father.ed_2-Year College Graduate Binary 0.0335443451
## father.ed_4-Year College Graduate Binary 0.0411146642
## father.ed_High School Graduate Binary 0.0926711479
## father.ed_No High School Binary -0.0569079162
## father.ed_Postgraduate Binary -0.0167395419
## father.ed_Some College Binary -0.0585720812
## father.ed_Some High School Binary 0.0038177902
## father.ed_Unknown Binary -0.0389284083
## pell.term.flg_Pell Binary 0.0341643281
## term.age Contin. -0.1177030900
## last.school.attended.local.flg_Local College Binary -0.0284539581
## last.school.attended.local.flg_Local High School Binary 0.0006852444
## last.school.attended.local.flg_Other Binary 0.0277687137
## school.zip.median.income Contin. -0.1280042259
## unt.passd.prgrss Contin. 0.0751052681
## tot.cumulative.start Contin. -0.2481740536
## cum.gpa.start Contin. -0.0337664713
## ssr.cum.tr.gpa Contin. 0.0471733293
## dat.hs.gpa Contin. 0.0229125040
## Diff.Adj
## prop.score 0.030615608
## grade.num.1 0.015617428
## acad.stndng.stat.desc_Other 0.004863755
## gender_Male -0.022391639
## eth.ipeds_Asian 0.019501523
## eth.ipeds_Black -0.015292670
## eth.ipeds_Hispanic 0.024141642
## eth.ipeds_Other -0.009522559
## eth.ipeds_White -0.018827937
## foreign.flg_Not Foreign 0.016855833
## father.ed_2-Year College Graduate -0.003067472
## father.ed_4-Year College Graduate 0.007620343
## father.ed_High School Graduate 0.003526562
## father.ed_No High School 0.028539573
## father.ed_Postgraduate -0.011103716
## father.ed_Some College -0.001060460
## father.ed_Some High School -0.011972351
## father.ed_Unknown -0.012482478
## pell.term.flg_Pell 0.026716046
## term.age 0.068437564
## last.school.attended.local.flg_Local College -0.008163917
## last.school.attended.local.flg_Local High School -0.004998273
## last.school.attended.local.flg_Other 0.013162190
## school.zip.median.income -0.051197408
## unt.passd.prgrss -0.098052027
## tot.cumulative.start 0.023736591
## cum.gpa.start -0.072862463
## ssr.cum.tr.gpa -0.029836194
## dat.hs.gpa 0.015257566
love.plot(wts.full, abs=TRUE, threshold=0.1, stars="none")
## Warning: Standardized mean differences and raw mean differences are present in the same plot.
## Use the `stars` argument to distinguish between them and appropriately label the x-axis.

wts <- wts.full$weights
dat1$wts <- wts
# Chem 124 PAL students
length(dat1$grade.num.2[dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 46
weighted.mean(dat1$grade.num.2[dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL"],
w = dat1$wts[dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 2.823134
length(dat1$grade.num.2[dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 31
weighted.mean(dat1$grade.num.2[dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"],
w = dat1$wts[dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 2.256758
add.eff <- which((dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL") |
(dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"))
dat2 <- dat1[add.eff,]
dat2$ind2 <- rep("both", length(add.eff))
dat2$ind2[which(dat2$pal.flg2.1 == "Non-PAL" & dat2$pal.flg2.2 == "PAL")] <- "Second Only"
dat2$ind2 <- as.factor(dat2$ind2)
temp <- wtd.t.test(dat2$grade.num.2[dat2$ind2=="both"],
dat2$grade.num.2[dat2$ind2=="Second Only"],
weight=dat2$wts[dat2$ind2=="both"],
weighty=dat2$wts[dat2$ind2=="Second Only"])
## Warning in wtd.t.test(dat2$grade.num.2[dat2$ind2 == "both"],
## dat2$grade.num.2[dat2$ind2 == : Treating data for x and y separately because
## they are of different lengths
temp
## $test
## [1] "Two Sample Weighted T-Test (Welch)"
##
## $coefficients
## t.value df p.value
## 2.12671500 58.51108475 0.03767169
##
## $additional
## Difference Mean.x Mean.y Std. Err
## 0.5663763 2.8231343 2.2567580 0.2663151