### Daniel J. Hopkins ### Poverty Paper R Code ### ### Written: 12/22/07 ### Last Edited: 10/23/08 ### ### made available publicly 5/29/2009 ### ### The data (the 2001 Poverty in America Survey, by NPR and co-authors) are not mine ### to publicly distribute but are available from the Roper Center. The data set also uses ### variables from the 2000 U.S. Census which are publicly available. ### ### Please feel free to contact the author with any questions about the code or papers. ##### Load libraries library(mice) library(Zelig) library(xtable) library(lme4) ##### identify working directory setwd( "C:/Users/Dan/Documents/Poverty") ##### load data load("povdta122207.Rdata") ##### RECODES ##### ### new variables dta$MARRIED <- 1*(dta$fmtQD06=="Currently married") ### combined state/ dta$COST <- dta$STCOD*1000+dta$COCODE dta$POVDUM <- 1 dta$POVDUM[dta$fmtPOVERTY=="200%+ Poverty"] <- 0 ### merge 200 election data data bushdta <- read.csv("gorebush2.csv") bushdta$COST <- bushdta$X.1 dtaff <- merge(dta,bushdta,by="COST",all.x=T) dtaff$COBUSH <- dtaff$Bush/(dtaff$Bush+dtaff$Gore+dtaff$Nader) #### merge additional Census data #### additional county covariates r1 <- read.csv("C:/Users/Dan/Documents/census/region100CO.csv") r2 <- read.csv("C:/Users/Dan/Documents/census/region200CO.csv") r3 <- read.csv("C:/Users/Dan/Documents/census/region300CO.csv") r4 <- read.csv("C:/Users/Dan/Documents/census/region400CO.csv") r90 <- read.csv("C:/Users/Dan/Documents/census/counties1990.csv") rr <- rbind(r1,r2,r3,r4) rr$COPCHIS00 <- rr$P007010/rr$P001001 rr$COPCSHO00 <- rr$P024002/rr$P024001 rr$COMHINC00 <- rr$P053001 rr$COPCPA00 <- rr$P064002/rr$P062001 rr$COPCIMM00 <- rr$P021013/rr$P001001 rr$COHOVAC00 <- rr$H006003/rr$H006001 rr$LGPOP00 <- log(rr$P001001) library(ineq) for(i in 1:dim(rr)[1]){ #if(! i %in% c(129,134,279)){ rr$COHIGINI[i] <- Gini(c(rep(5,rr$P052002[i]),rep(12.5,rr$P052003[i]),rep(17.5,rr$P052004[i]),rep(22.5,rr$P052005[i]),rep(27.5,rr$P052006[i]),rep(32.5,rr$P052007[i]),rep(37.5,rr$P052008[i]),rep(42.5,rr$P052009[i]),rep(47.5,rr$P052010[i]),rep(55,rr$P052011[i]),rep(67.5,rr$P052012[i]),rep(67.5,rr$P052012[i]),rep(87.5,rr$P052013[i]),rep(112.5,rr$P052014[i]),rep(137.5,rr$P052015[i]),rep(175,rr$P052016[i]),rep(250,rr$P052017[i]))) #} } colnames(r90)[2] <- "COUNTY" colnames(r90)[12] <- "STATE" rr2 <- merge(rr,r90,by=c("COUNTY","STATE")) rr2$LGPOPD90S <- log(rr2$P001001)-log(rr2$POP100) dtaff$COUNTY <- dtaff$COCODE dtaff$STATE <- dtaff$STCOD dtaf2 <- merge(dtaff,rr2,by=c("STATE","COUNTY")) dtaf2$ASIAN <- 1*(dtaf2$fmtQD12=="Asian-American") ###subset out blacks dtaf <- dtaf2[dtaf2$BLK==0 & ! dtaf2$BLK %in% c(NA),] ### correlations ivs <- c("INCOME","POOR2","PID","COMBACH","COBUSH","WHITEPOV2","BLCKPOP","PCTBELOWPOV") X1 <- subset(dtaf,select=c(ivs)) cmat <- cor(X1,use="pairwise.complete.obs") rownames(cmat) <- colnames(cmat) <- c("Income","Poor","Party ID","Pct. with BA","Pct. for Bush","Pct. Poor are Wh.","Pct. Black","Pct. Below Pov.") xtable(cmat) dtaf$HISPPOV2 <- dtaf$P159H002/(dtaf$P159A002+dtaf$P159B002+dtaf$P159H002) aivs <- c("HISPPOV2","LGPOP00","COHIGINI","COHOVAC00","COPCPA00","COPCIMM00","COPCSHO00","LGPOPD90S") ivs <- c("MOREWELBL","PCTPOORAFAM","POORFRIEND","SUBURB","BLCKPOP","PCTBELOWPOV","SOUTH","POVDUM","WGHTFCTR","EMPLOYED","CITY","EDUC","INCOME","PROT2","EVAN","MARRIED","POOR2","MALE","PID","IDEO","HISP","ASIAN","POPDNS","COBUSH","HISPPOP","WHITEPOV2","COMBACH","AGE","STRUCSCORES") X1 <- subset(dta2,select=c(aivs,ivs,"POVSTRUC","DRUGS","MEDS","LOWWAGE","SINGLEPAR","NOJOBS","WELF","IMMIG","MOTIVE","LOWMORALS","BADSCHOOL")) X2 <- mice(X1) dta1 <- complete(X2,action=1) dta2 <- complete(X2,action=2) dta3 <- complete(X2,action=3) dta4 <- complete(X2,action=4) dta5 <- complete(X2,action=5) dta1$INCOMEi5 <- dta1$INCOME/1000 dta2$INCOMEi5 <- dta2$INCOME/1000 dta3$INCOMEi5 <- dta3$INCOME/1000 dta4$INCOMEi5 <- dta4$INCOME/1000 dta5$INCOMEi5 <- dta5$INCOME/1000 dta1$POVSTRUC3i <- (dta1$POVSTRUC==1)*1 dta2$POVSTRUC3i <- (dta2$POVSTRUC==1)*1 dta3$POVSTRUC3i <- (dta3$POVSTRUC==1)*1 dta4$POVSTRUC3i <- (dta4$POVSTRUC==1)*1 dta5$POVSTRUC3i <- (dta5$POVSTRUC==1)*1 T4 <- zelig(MOREWELBL ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOME + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE+MALE, model="logit",data=mi(dta1,dta2,dta3,dta4,dta5)) x1 <- setx(T4,WHITEPOV2=.3746) x2 <- setx(T4,WHITEPOV2=.959) ss <- sim(T4,x=x2,x1=x1) T4 <- zelig(PCTPOORAFAM ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOME + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE+MALE, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) x1 <- setx(T4,WHITEPOV2=.3746) x2 <- setx(T4,WHITEPOV2=.959) ss <- sim(T4,x=x2,x1=x1) #### additional measures for reply to Rodgers dtaf$PCPOOR6590 <- dtaf$Pov65p/dtaf$Per65p dtaf$PCPOOR1890 <- dtaf$PovFcu18/dtaf$FCu18 aivs <- c("HISPPOV2","LGPOP00","COHIGINI","COHOVAC00","COPCPA00","COPCIMM00","COPCSHO00","LGPOPD90S") ivs <- c("APARTMENTS","PCPOOR1890","PCPOOR6590","WHITEPOV","TRAILERS","MOREWELBL","PCTPOORAFAM","POORFRIEND","SUBURB","BLCKPOP","PCTBELOWPOV","SOUTH","POVDUM","WGHTFCTR","EMPLOYED","CITY","EDUC","INCOME","PROT2","EVAN","MARRIED","POOR2","MALE","PID","IDEO","HISP","ASIAN","POPDNS","COBUSH","HISPPOP","WHITEPOV2","COMBACH","AGE","STRUCSCORES") X1 <- subset(dtaf,select=c(aivs,ivs,"POVSTRUC","DRUGS","MEDS","LOWWAGE","SINGLEPAR","NOJOBS","WELF","IMMIG","MOTIVE","LOWMORALS","BADSCHOOL")) ### multiple imputation X2 <- mice(X1) dta1 <- complete(X2,action=1) dta2 <- complete(X2,action=2) dta3 <- complete(X2,action=3) dta4 <- complete(X2,action=4) dta5 <- complete(X2,action=5) dta1$INCOMEi5 <- dta1$INCOME/1000 dta2$INCOMEi5 <- dta2$INCOME/1000 dta3$INCOMEi5 <- dta3$INCOME/1000 dta4$INCOMEi5 <- dta4$INCOME/1000 dta5$INCOMEi5 <- dta5$INCOME/1000 dta1$POVSTRUC3i <- (dta1$POVSTRUC==1)*1 dta2$POVSTRUC3i <- (dta2$POVSTRUC==1)*1 dta3$POVSTRUC3i <- (dta3$POVSTRUC==1)*1 dta4$POVSTRUC3i <- (dta4$POVSTRUC==1)*1 dta5$POVSTRUC3i <- (dta5$POVSTRUC==1)*1 dta1$POVSTRUCN <- 1-dta1$POVSTRUC dta2$POVSTRUCN <- 1-dta2$POVSTRUC dta3$POVSTRUCN <- 1-dta3$POVSTRUC dta4$POVSTRUCN <- 1-dta4$POVSTRUC dta5$POVSTRUCN <- 1-dta5$POVSTRUC ### factor analysis fout1 <- factanal(~POVSTRUCN+DRUGS+MEDS+LOWWAGE+SINGLEPAR+NOJOBS+WELF+MOTIVE+LOWMORALS+BADSCHOOL,data=dta1,factors=2,scores="regression") cor(fout1$scores[,1],dta1$STRUCSCORES) cor(fout1$scores[,2],dta1$STRUCSCORES) XT <- fout1$loadings XT dta1$SCORESF1 <- fout1$scores[,1] dta1$SCORESF2 <- fout1$scores[,2] fout2 <- factanal(~POVSTRUC+DRUGS+MEDS+LOWWAGE+SINGLEPAR+NOJOBS+WELF+MOTIVE+LOWMORALS+BADSCHOOL,data=dta2,factors=2,scores="regression") cor(fout2$scores[,1],dta2$STRUCSCORES) cor(fout2$scores[,2],dta2$STRUCSCORES) dta2$SCORESF1 <- fout2$scores[,1] dta2$SCORESF2 <- fout2$scores[,2] fout3 <- factanal(~POVSTRUC+DRUGS+MEDS+LOWWAGE+SINGLEPAR+NOJOBS+WELF+MOTIVE+LOWMORALS+BADSCHOOL,data=dta3,factors=2,scores="regression") cor(fout3$scores[,1],dta3$STRUCSCORES) cor(fout3$scores[,2],dta3$STRUCSCORES) dta3$SCORESF1 <- fout2$scores[,1] dta3$SCORESF2 <- fout2$scores[,2] fout4 <- factanal(~POVSTRUC+DRUGS+MEDS+LOWWAGE+SINGLEPAR+NOJOBS+WELF+MOTIVE+LOWMORALS+BADSCHOOL,data=dta4,factors=2,scores="regression") cor(fout4$scores[,1],dta4$STRUCSCORES) cor(fout4$scores[,2],dta4$STRUCSCORES) dta4$SCORESF1 <- fout4$scores[,1] dta4$SCORESF2 <- fout4$scores[,2] fout5 <- factanal(~POVSTRUC+DRUGS+MEDS+LOWWAGE+SINGLEPAR+NOJOBS+WELF+MOTIVE+LOWMORALS+BADSCHOOL,data=dta5,factors=2,scores="regression") cor(fout5$scores[,1],dta5$STRUCSCORES) cor(fout5$scores[,2],dta5$STRUCSCORES) dta5$SCORESF1 <- fout5$scores[,1] dta5$SCORESF2 <- fout5$scores[,2] ###descriptive stats ### dtad <- dta1 des <- c("EDUC","INCOME","POOR2","AGE","HISP","ASIAN","MALE","IDEO","PID","EVAN","PROT","BLCKPOP","WHITEPOV2","COMBACH","PCTBELOWPOV","COBUSH","HISPPOP","CITY","SUBURB","SOUTH","POORFRIEND","PCTPOORAFAM","MOREWELBL","SCORESF1") attach(dta1) res <- matrix(NA, length(des), 4) for(i in 1:length(des)){ txt1 <- paste("dtad$VAR<-dta1$",des[i],sep="") eval(parse(text=txt1)) res[i,1] <- min(dtad$VAR,na.rm=T) res[i,2] <- max(dtad$VAR,na.rm=T) res[i,3] <- weighted.mean(dtad$VAR,dta1$WGHTFCTR,na.rm=T) res[i,4] <- sd(dtad$VAR,na.rm=T) } rownames(res) <- des round(res,digits=2) dta1$INCOMEi10 <- dta1$INCOME/100 dta2$INCOMEi10 <- dta2$INCOME/100 dta3$INCOMEi10 <- dta3$INCOME/100 dta4$INCOMEi10 <- dta4$INCOME/100 dta5$INCOMEi10 <- dta5$INCOME/100 dta1$AGE2 <- dta1$AGE/100 dta2$AGE2 <- dta2$AGE/100 dta3$AGE2 <- dta3$AGE/100 dta4$AGE2 <- dta4$AGE/100 dta5$AGE2 <- dta5$AGE/100 dta1$STRUCSCORES2 <- - dta1$STRUCSCORES dta2$STRUCSCORES2 <- - dta2$STRUCSCORES dta3$STRUCSCORES2 <- - dta3$STRUCSCORES dta4$STRUCSCORES2 <- - dta4$STRUCSCORES dta5$STRUCSCORES2 <- - dta5$STRUCSCORES dta1$POVSTRUCN3 <- dta1$POVSTRUCN2-1 dta2$POVSTRUCN3 <- dta2$POVSTRUCN2-1 dta3$POVSTRUCN3 <- dta3$POVSTRUCN2-1 dta4$POVSTRUCN3 <- dta4$POVSTRUCN2-1 dta5$POVSTRUCN3 <- dta5$POVSTRUCN2-1 dta1np <- dta1[dta1$POVDUM==0,] dta2np <- dta2[dta2$POVDUM==0,] dta3np <- dta3[dta3$POVDUM==0,] dta4np <- dta4[dta4$POVDUM==0,] dta5np <- dta5[dta5$POVDUM==0,] #### predicted probabilities T4 <- zelig(-SCORESF1 ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOV2 + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE2+MALE, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) ##### specification checks ### PCPOOR6590 dta1$PCPOORPA <- dta1$COPCPA00/dta1$PCTBELOWPOV dta2$PCPOORPA <- dta2$COPCPA00/dta2$PCTBELOWPOV dta3$PCPOORPA <- dta3$COPCPA00/dta3$PCTBELOWPOV dta4$PCPOORPA <- dta4$COPCPA00/dta4$PCTBELOWPOV dta5$PCPOORPA <- dta5$COPCPA00/dta5$PCTBELOWPOV T6 <- zelig(-SCORESF1 ~ APARTMENTS+WHITEPOV2 +COBUSH +POPDNS+ HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE2+MALE, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) summary(T6) T6 <- zelig(SCORESF2N ~ APARTMENTS+WHITEPOV2+COBUSH +POPDNS+ HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE2+MALE, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) summary(T6) T6 <- zelig(-SCORESF1 ~ COPCPA00 +COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE2+MALE, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) summary(T6) T6 <- zelig(SCORESF2N ~ COPCPA00+COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + POVDUM + AGE2+MALE, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) summary(T6) x1 <- setx(T4,COBUSH=.63)#,WHITEPOV2=.3746) x2 <- setx(T4,COBUSH=.38)#,WHITEPOV2=.959) ## x1 <- setx(T4,COBUSH=.634,WHITEPOV2=.3746) x2 <- setx(T4,COBUSH=.3343,WHITEPOV2=.959) ### x1 <- setx(T4,WHITEPOV2=.3858) x2 <- setx(T4,WHITEPOV2=.8702) ss1 <- sim(T4,x=x1) mean(ss1$qi$ev) ss <- sim(T4,x=x1,x1=x2) quantile(ss$qi$fd,.025) quantile(ss$qi$fd,.5) quantile(ss$qi$fd,.975) ########## predicted outcomes ###### just structuralism dta1$SCORESF2N <- -dta1$SCORESF2 dta2$SCORESF2N <- -dta2$SCORESF2 dta3$SCORESF2N <- -dta3$SCORESF2 dta4$SCORESF2N <- -dta4$SCORESF2 dta5$SCORESF2N <- -dta5$SCORESF2 t4S <- zelig(SCORESF2N ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + AGE2+MALE+POVDUM, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) summary(t4S) summary(t4S)$coef[3,3] rnames<-c(rownames(summary(t4S)$coef),"R Squared","df") a<-rbind(summary(t4S)$coef[,c(1,2)],c(1526,NA),c(0.138,NA)) rownames(a) <- rnames library(xtable) xtable(a,digits=rep(2,3)) print(summary(t4S),subset=1) #### TABLE FACTOR ANALYSIS SCORES ### individualism t4 <- zelig(-SCORESF1 ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + AGE2+MALE+POVDUM, model="ls",data=mi(dta1,dta2,dta3,dta4,dta5)) summary(t4) summary(t4)$coef[3,3] t4N <- zelig(-SCORESF1 ~ COBUSH + WHITEPOV2+POPDNS + HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2 + AGE2+MALE, model="ls",data=mi(dta1np,dta2np,dta3np,dta4np,dta5np)) summary(t4N) t4AA <- zelig(-SCORESF1 ~ COBUSH + WHITEPOV2+POPDNS + HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2+ AGE2+MALE+PCTPOORAFAM+MOREWELBL, model="ls",data=mi(dta1np,dta2np,dta3np,dta4np,dta5np)) summary(t4AA) t4BB <- zelig(-SCORESF1 ~ COBUSH + WHITEPOV2+POPDNS + HISPPOP + COMBACH +EDUC + INCOMEi10 + HISP + ASIAN+IDEO+PID+POOR2+PROT2+ AGE2+MALE+POORFRIEND, model="ls",data=mi(dta1np,dta2np,dta3np,dta4np,dta5np)) summary(t4BB) rnames<-c(rownames(summary(t4)$coef),"PCTPOORAFAM","MORE WELBL","POORFRIEND","R Squared","df") a<-rbind(summary(t4)$coef[,c(1,2)],c(NA,NA),c(NA,NA),c(NA,NA),c(1526,NA),c(0.133,NA)) c<-rbind(summary(t4N)$coef[,c(1,2)],c(NA,NA),c(NA,NA),c(NA,NA),c(NA,NA),c(853,NA),c(0.197,NA)) d<-rbind(summary(t4AA)$coef[1:16,c(1,2)],c(NA,NA),summary(t4AA)$coef[17:18,c(1,2)],c(NA,NA),c(851,NA),c(0.206,NA)) e<-rbind(summary(t4BB)$coef[1:16,c(1,2)],c(NA,NA),c(NA,NA),c(NA,NA),summary(t4BB)$coef[17,c(1,2)],c(852,NA),c(0.197,NA)) tab <- cbind(a,c,d,e) rownames(tab) <- rnames library(xtable) xtable(tab,digits=rep(2,9)) print(summary(t4BB),subset=1)$r.squared[1] ######## ROBUSTNESS ### logit, listwise deletion T4 <- zelig(POVSTRUC ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOME + HISP + IDEO+PID+POOR2+PROT2 + POVDUM + AGE+MALE, model="logit",data=dtaf) x1 <- setx(T4,WHITEPOV2=.33) x2 <- setx(T4,WHITEPOV2=.83) x1 <- setx(T4,WHITEPOV2=.3858) x2 <- setx(T4,WHITEPOV2=.8702) ss <- sim(T4,x=x1,x1=x2) quantile(ss$qi$fd,.025) mean(ss$qi$fd) quantile(ss$qi$fd,.975) ### logit, listwise deletion T4 <- zelig(POVSTRUC ~ COBUSH +WHITEPOV2 +POPDNS+ HISPPOP + COMBACH +EDUC + INCOME + HISP + IDEO+PID+POOR2+PROT2 + POVDUM + AGE+MALE, model="logit",data=dtaf) x1 <- setx(T4,COBUSH=.375) x2 <- setx(T4,COBUSH=.625) ss <- sim(T4,x=x1,x1=x2) quantile(ss$qi$fd,.025) mean(ss$qi$fd) quantile(ss$qi$fd,.975) x1 <- setx(T4,WHITEPOV2=.3858) x2 <- setx(T4,WHITEPOV2=.8702) s1 <- sim(T4,x=x1) mean(s1$qi$ev) s2 <- sim(T4,x=x2) mean(s2$qi$ev) ### reduced specification txt <- paste('t4 <- zelig(SCORESF1 ~ COBUSH+WHITEPOV2+POPDNS +PID+IDEO+EDUC + INCOMEi10+POOR2 + POVDUM +MALE, model="ls",data=mi(dta1np,dta2np,dta3np,dta4np,dta5np))',sep='') eval(parse(text=txt)) summary(t4) summary(t4)$coef[2,] ### without ideology, PID txt <- paste('t4 <- zelig(SCORESF1 ~ COBUSH+WHITEPOV2 +POPDNS +EDUC + INCOMEi10+POOR2 + POVDUM +MALE, model="ls",data=mi(dta1np,dta2np,dta3np,dta4np,dta5np))',sep='') eval(parse(text=txt)) summary(t4)$coef[2,] ### specification tests tivs <- c(aivs,"EVAN","MARRIED","EMPLOYED","SOUTH","SUBURB","BLCKPOP","PCTBELOWPOV","LGPOP00") resmat <- matrix(NA,length(tivs),4) for(i in 1:length(tivs)){ txt <- paste('t4E <- zelig(SCORESF1~ COBUSH+WHITEPOV2+',tivs[i],'+POPDNS + HISPPOP + COMBACH +EDUC + INCOMEi10+HISP + ASIAN+IDEO+PID+POOR2+PROT2 + AGE2+MALE, model="ls",data=mi(dta1np,dta2np,dta3np,dta4np,dta5np))',sep='') eval(parse(text=txt)) resmat[i,1] <- summary(t4E)$coef[2,1] resmat[i,2] <- summary(t4E)$coef[2,2] resmat[i,3] <- summary(t4E)$coef[3,1] resmat[i,4] <- summary(t4E)$coef[3,2] } rownames(resmat) <- tivs