setwd("Course Outcomes")



dat <- read_rds("pal_data_25.rds")

dat[sapply(dat, is.character)] <- lapply(dat[sapply(dat, is.character)], as.factor)



dat$pal.flg2 <- rep("Non-PAL", length=length(dat$pal.flg))

temp <- which(dat$pal.grade == "CR")

dat$pal.flg2[temp] <- "PAL"

dat$pal.flg2 <- as.factor(dat$pal.flg2)



# Remove variables with >10% missing

prop.na <- function(x) sum(is.na(x))/length(x)

miss <- apply(dat, MARGIN=2, FUN=prop.na)

missGr10 <- which(miss > 0.1)

dat.up <- dat[,-missGr10] 



# Want to keep "hs.gpa"

dat.up <- data.frame(dat.up, dat$hs.gpa)



## Remove course repeats

temp <- which(dat.up$rpt.seq == 1)

dat.up <- dat.up[temp,]

dat.up <- dat.up[,-which(names(dat.up) %in% c("rpt.seq"))]



# Remove variables not of interest:

dat.up <- dat.up[,-which(names(dat.up) %in% 

                           c("course.descr", "session.code", "enrl.add.dt", 

                             "acad.group", "dept.abbr", "grade", "dfw.flg", 

                             "instructor1", "acad.career", "acad.prog", 

                             "Asian", "Black", "Hispanic", "Native.American",

                             "Pacific.Islander", "SWANA", "White", 

                             "last.school.attended.ext.org.id", "acad.plan1st", 

                             "plan.desc1st", "plan.degree1st", "plan.dept1st",

                             "plan.college1st", "acad.plan1", "plan.desc1", 

                             "plan.dept1", "plan.college1", "plan.degree1", 

                             "class.section", "last.school.attended",

                             "acad.group", "unt.taken", "coh.term", "term",

                             "units.passed.remedial", "cum.previous.pal", 

                             "pal.flg","num.pal","cum.pal","cum.pal.this.course",

                             "cum.previous.pal.this.course","learning.mode"))]



chem.ind <- which(dat.up$course == "CHEM 24" |

                    dat.up$course == "CHEM 124")

data.chem <- dat.up[chem.ind, ]



dat.chem24 <- data.chem[data.chem$course == "CHEM 24",]

dat.chem124 <- data.chem[data.chem$course == "CHEM 124",]



chem.merge <- merge(dat.chem24, 

                    dat.chem124[c("emplid","course","grade.num","pal.flg2")], by.x = "emplid", by.y = "emplid", 

                    suffixes = c(".1",".2"))



chem.merge[sapply(chem.merge, is.factor)] <- lapply(chem.merge[sapply(chem.merge, is.factor)], droplevels)



data.chem <- chem.merge[,-which(names(chem.merge) %in% 

                                  c("emplid","course.1","course.2"))]



dat1 <- data.chem

dat1 <- dat1[complete.cases(dat1), ]



## Identify variables causing complete separation in logistic regression

## Identify any variables that are constants

# lapply(dat1, unique)

dat1 <- dat1[,-which(names(dat1) %in% c("coh", "units.taken.remedial"))]



# Combine sparse ethnicity categories to Other

dat1 <- dat1 %>%

  mutate(eth.ipeds = fct_collapse(eth.ipeds, 

                                  `Other` = c("Native American", "Pacific Islander", 

                                              "Two or More Races", "Unknown")))
## Warning: There was 1 warning in `mutate()`.

## ℹ In argument: `eth.ipeds = fct_collapse(...)`.

## Caused by warning:

## ! Unknown levels in `f`: Native American
# Collapse sparse categories for acad.stand 

# Other:  Academic Dismissal, Academic Disqualification 

dat1 <- dat1 %>%

  mutate(acad.stndng.stat.desc = fct_other(acad.stndng.stat.desc, keep = c("Good Standing")))



# mod.final <- pal.flg2.1 ~ . -pal.flg2.2 -unt.taken.prgrss -base.time -mother.ed -grade.num.2 -enrl.tot -term.units

mod.final <- pal.flg2.2 ~ . -pal.flg2.1 -unt.taken.prgrss -base.time -mother.ed -grade.num.2 -enrl.tot -term.units



wts.full <- weightit(mod.final, data=dat1, estimand="ATE")
## Assuming "PAL" is the treated level. If not, recode the treatment so

## that 1 is treated and 0 is control.

## Assuming "PAL" is the treated level. If not, recode the treatment so

## that 1 is treated and 0 is control.
bal.tab(wts.full, un=TRUE)$Balance
##                                                      Type       Diff.Un

## prop.score                                       Distance  0.5817590250

## grade.num.1                                       Contin. -0.0831987073

## acad.stndng.stat.desc_Other                        Binary  0.0004241989

## gender_Male                                        Binary  0.0106376036

## eth.ipeds_Asian                                    Binary -0.0191215819

## eth.ipeds_Black                                    Binary  0.0067219213

## eth.ipeds_Hispanic                                 Binary -0.0069829668

## eth.ipeds_Other                                    Binary  0.0519806826

## eth.ipeds_White                                    Binary -0.0325980552

## foreign.flg_Not Foreign                            Binary  0.0201005025

## father.ed_2-Year College Graduate                  Binary  0.0335443451

## father.ed_4-Year College Graduate                  Binary  0.0411146642

## father.ed_High School Graduate                     Binary  0.0926711479

## father.ed_No High School                           Binary -0.0569079162

## father.ed_Postgraduate                             Binary -0.0167395419

## father.ed_Some College                             Binary -0.0585720812

## father.ed_Some High School                         Binary  0.0038177902

## father.ed_Unknown                                  Binary -0.0389284083

## pell.term.flg_Pell                                 Binary  0.0341643281

## term.age                                          Contin. -0.1177030900

## last.school.attended.local.flg_Local College       Binary -0.0284539581

## last.school.attended.local.flg_Local High School   Binary  0.0006852444

## last.school.attended.local.flg_Other               Binary  0.0277687137

## school.zip.median.income                          Contin. -0.1280042259

## unt.passd.prgrss                                  Contin.  0.0751052681

## tot.cumulative.start                              Contin. -0.2481740536

## cum.gpa.start                                     Contin. -0.0337664713

## ssr.cum.tr.gpa                                    Contin.  0.0471733293

## dat.hs.gpa                                        Contin.  0.0229125040

##                                                      Diff.Adj

## prop.score                                        0.030615608

## grade.num.1                                       0.015617428

## acad.stndng.stat.desc_Other                       0.004863755

## gender_Male                                      -0.022391639

## eth.ipeds_Asian                                   0.019501523

## eth.ipeds_Black                                  -0.015292670

## eth.ipeds_Hispanic                                0.024141642

## eth.ipeds_Other                                  -0.009522559

## eth.ipeds_White                                  -0.018827937

## foreign.flg_Not Foreign                           0.016855833

## father.ed_2-Year College Graduate                -0.003067472

## father.ed_4-Year College Graduate                 0.007620343

## father.ed_High School Graduate                    0.003526562

## father.ed_No High School                          0.028539573

## father.ed_Postgraduate                           -0.011103716

## father.ed_Some College                           -0.001060460

## father.ed_Some High School                       -0.011972351

## father.ed_Unknown                                -0.012482478

## pell.term.flg_Pell                                0.026716046

## term.age                                          0.068437564

## last.school.attended.local.flg_Local College     -0.008163917

## last.school.attended.local.flg_Local High School -0.004998273

## last.school.attended.local.flg_Other              0.013162190

## school.zip.median.income                         -0.051197408

## unt.passd.prgrss                                 -0.098052027

## tot.cumulative.start                              0.023736591

## cum.gpa.start                                    -0.072862463

## ssr.cum.tr.gpa                                   -0.029836194

## dat.hs.gpa                                        0.015257566
love.plot(wts.full, abs=TRUE, threshold=0.1, stars="none")
## Warning: Standardized mean differences and raw mean differences are present in the same plot. 

## Use the `stars` argument to distinguish between them and appropriately label the x-axis.

wts <- wts.full$weights

dat1$wts <- wts



# Chem 124 PAL students

length(dat1$grade.num.2[dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 46
weighted.mean(dat1$grade.num.2[dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL"],

              w = dat1$wts[dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 2.823134
length(dat1$grade.num.2[dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 31
weighted.mean(dat1$grade.num.2[dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"],

              w = dat1$wts[dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"])
## [1] 2.256758
add.eff <- which((dat1$pal.flg2.1 == "PAL" & dat1$pal.flg2.2 == "PAL") | 

                   (dat1$pal.flg2.1 == "Non-PAL" & dat1$pal.flg2.2 == "PAL"))

dat2 <- dat1[add.eff,]

dat2$ind2 <- rep("both", length(add.eff))

dat2$ind2[which(dat2$pal.flg2.1 == "Non-PAL" & dat2$pal.flg2.2 == "PAL")] <- "Second Only"

dat2$ind2 <- as.factor(dat2$ind2)



temp <- wtd.t.test(dat2$grade.num.2[dat2$ind2=="both"],

           dat2$grade.num.2[dat2$ind2=="Second Only"], 

           weight=dat2$wts[dat2$ind2=="both"],

           weighty=dat2$wts[dat2$ind2=="Second Only"])
## Warning in wtd.t.test(dat2$grade.num.2[dat2$ind2 == "both"],

## dat2$grade.num.2[dat2$ind2 == : Treating data for x and y separately because

## they are of different lengths
temp
## $test

## [1] "Two Sample Weighted T-Test (Welch)"

## 

## $coefficients

##     t.value          df     p.value 

##  2.12671500 58.51108475  0.03767169 

## 

## $additional

## Difference     Mean.x     Mean.y   Std. Err 

##  0.5663763  2.8231343  2.2567580  0.2663151