Write to Excel with borders around groups
RMH

@theartofmadeline
will byers stan first human second

shark vs the universe

No title available

No title available
Not today Justin

tannertan36

No title available

JBB: An Artblog!

Discoholic 🪩
ojovivo
almost home
hello vonnie

PR's Tumblrdome

⁂
dirt enthusiast
noise dept.
Game of Thrones Daily

#extradirty
seen from United States

seen from United States

seen from United Kingdom
seen from United States

seen from United States

seen from United States
seen from United States

seen from United States
seen from Singapore

seen from United Kingdom

seen from United States

seen from United States

seen from Türkiye

seen from South Korea

seen from T1

seen from Sweden

seen from Türkiye

seen from Türkiye
seen from United States
seen from United States
@rgraphs
Write to Excel with borders around groups
Map in-sample counties
countygeo<-get_acs(geography="county",variables="B19013_001",geometry=TRUE,keep_geo_vars=TRUE) us<-get_estimates(geography="us",variables="POP",geometry=TRUE,keep_geo_vars=TRUE) states<-get_estimates(geography="state",variables="POP",geometry=TRUE,keep_geo_vars=TRUE) counties<-countygeo %>% mutate(county_name=str_remove_all(NAMELSAD,"\\.| County| Parish|\\s") %>% toupper()) %>% left_join( forplot %>% filter(count==8 & level=="Threshold = 95%" & group=="Above threshold in all 8 years") ,by=c("STUSPS"="state_postal_abbr","county_name") ) %>% mutate(insamp=if_else(is.na(meanpop),0,1)) p2<-ggplot(counties)+ geom_sf(size=0.05,color="white",aes(fill=factor(insamp,levels=c(0,1),labels=c("No","Yes"))),alpha=0.8)+ geom_sf(data=us,size=.1,color="black",alpha=0)+ geom_sf(data=states,size=.05,color="black",alpha=0)+ coord_sf(xlim=c(-125,-67.5),ylim=c(25,50))+ guides( fill=guide_legend("County in sample: ") ) CairoPNG("CountySampMap.png",width=10,height=7,units="in",res=500) print(p2) dev.off()
Density Comparison with Stats Inset
#stack file with itself to facilitate graphics forplot<-counties.collapsed %>% filter(count==8) %>% mutate(group="Above threshold in all 8 years") %>% bind_rows( counties.collapsed %>% mutate(group="All counties") ) t<-theme_set(theme_bw()) t<-theme_update( axis.title.x=element_text(hjust=0.5), #axis.line=element_line(colour="black",lineend="square"), panel.grid.minor=element_blank(), #panel.grid.major.y=element_line(colour="black",size=0.1), #panel.grid.major=element_blank(), axis.ticks=element_line(colour="black",size=0.25), #axis.ticks.x=element_blank(), #strip.text=element_blank(), legend.position = "bottom", legend.direction="horizontal", legend.box="vertical", #legend.title=element_blank(), legend.text=element_text(size=8), legend.spacing=unit(0,"cm"), legend.margin=margin(b=0,t=0,unit='cm'), axis.text.x=element_text(angle=45, hjust=1, vjust=1,size=8), axis.text.y=element_text(size=8), plot.title=element_text(size=12,margin=margin(b=10),hjust=0), plot.subtitle=element_text(size=8,color="darkslategrey",margin=margin(b=25,l=-25)), plot.caption = element_text(size=8,margin=margin(t=10),color="darkslategrey",hjust=0) ) #median pop and sample size for graphics vlines<-forplot %>% group_by(level,group) %>% summarize(median=median(meanpop),n=n()) #density plots by coverage threshold vs. all p1<-ggplot(data=forplot,aes(x=meanpop,color=reorder(group,desc(group)),fill=reorder(group,desc(group))))+ geom_density(alpha=0.3)+ annotation_custom( grob=roundrectGrob( gp=gpar() ) ,xmin=log10(400000),xmax=log10(8000000) ,ymin=.2,ymax=.75 )+ annotate(geom="text",x=500000,y=.65,label="Median",hjust=0)+ geom_text( data=vlines ,aes( x=500000 ,y=if_else(group=="All counties",.35,.5) ,label=prettyNum(median,big.mark=",",scientific=FALSE) ) ,hjust=0 )+ annotate(geom="text",x=3000000,y=.65,label="n",hjust=0)+ geom_text( data=vlines ,aes( x=3000000 ,y=if_else(group=="All counties",.35,.5) ,label=prettyNum(n,big.mark=",",scientific=FALSE) ) ,hjust=0 )+ geom_vline(data=vlines,aes(xintercept=median,color=reorder(group,desc(group))))+ scale_x_log10(breaks=c(1000,2500,5000,10000,25000,50000,100000,250000,500000,1000000),labels=scales::comma)+ facet_wrap(~level,nrow=3)+ labs( x="\nAverage Population Served in 2013:2020 (log scale)\n" ,y="Density\n" )+ guides( color=guide_legend("Group: ") ,fill=guide_legend("Group: ") ) CairoPNG(str_glue("CountySampleThresholdCompare.png",width=7,height=7,units="in",res=500) print(p1) dev.off()
Plots with extra stats at the bottom
doit<-function(var,lab,outcome,type='C',...){ library(survey) library(scales) library(Cairo) library(openxlsx) outcome.f<-paste0("~",deparse(substitute(outcome))) %>% as.formula() var.f<-paste0("~",deparse(substitute(var))) %>% as.formula() y.str<-deparse(substitute(outcome)) x.str<-deparse(substitute(var)) var<-enquo(var) outcome<-enquo(outcome) t<-theme_set(theme_bw()) t<-theme_update( axis.title.x=element_text(hjust=0.5), #axis.line=element_line(colour="black",lineend="square"), panel.grid.minor=element_blank(), #panel.grid.major.y=element_line(colour="black",size=0.1), #panel.grid.major=element_blank(), axis.ticks=element_line(colour="black",size=0.25), #axis.ticks.x=element_blank(), #strip.text=element_blank(), legend.position = "bottom", legend.direction="horizontal", legend.box="vertical", #legend.title=element_blank(), legend.text=element_text(size=8), legend.spacing=unit(0,"cm"), legend.margin=margin(b=0,t=0,unit='cm'), #axis.text.x=element_text(angle=45, hjust=1, vjust=1,size=8), axis.text.y=element_text(size=8), plot.title=element_text(size=12,margin=margin(b=10),hjust=0), plot.subtitle=element_text(size=8,color="darkslategrey",margin=margin(b=25,l=-25)), plot.caption = element_text(size=8,margin=margin(t=10),color="darkslategrey",hjust=0) ) if (type=='C'){ temp<-foranalysis %>% ungroup() %>% mutate(id=ifelse(is.na(ori),row_number(),ori)) %>% arrange(!!var) %>% mutate( pctl=row_number()/sum(is.na(!!var)==FALSE) ,quintile=case_when( pctl<=.2~1 ,pctl<=.4~2 ,pctl<=.6~3 ,pctl<=.8~4 ,pctl<=1~5 ) ) %>% group_by(quintile) dens<-temp %>% summarise(n(),den=sum(wtcomb*population)) lemas.svy<-svydesign(ids=~id,weights=~wtcomb,strata=~strat,data=temp,nest=TRUE) est<-svyby(outcome.f,~quintile,design=lemas.svy,svytotal) %>% left_join(dens,by="quintile") %>% mutate(rate=!!outcome*1000000/den,rate_se=se*1000000/den,ul=rate+1.96*rate_se,ll=rate-1.96*rate_se) print(est) forplot1<-foranalysis %>% ungroup() %>% mutate( rate=wtcomb*!!outcome*n()*1000000/sum(wtcomb*population) ,drate=wtcomb*!!outcome*1000000/(wtcomb*population) ,wt=wtcomb*population ,type=str_glue("Fatal Encounters ({y.str}) per 1M") ) forplot2<-foranalysis %>% ungroup() %>% mutate( rate=wtcomb*viol*n()*1000/sum(wtcomb*population) ,drate=wtcomb*viol*1000/(wtcomb*population) ,wt=wtcomb*population ,type="Violent Crimes per 1K" ) forplot<-bind_rows(forplot1,forplot2) fr<-forplot %>% mutate(num=case_when( type==str_glue("Fatal Encounters ({y.str}) per 1M")~wtcomb*!!outcome*1000000 ,type=="Violent Crimes per 1K"~wtcomb*viol*1000 ) ) %>% group_by(type) %>% summarise(n(),fulcrum=mean(!!var,na.rm=TRUE),rate=sum(num)/sum(wtcomb*population)) print(fr) forplot %>% group_by(type) %>% arrange(desc(rate)) %>% slice_head(n=10) %>% select(type,Year,prelink1,rate,drate,wt) %>% print() p<-forplot %>% ggplot(.)+ stat_smooth(inherit.aes=FALSE,aes(x=!!var,y=rate),formula=y~x,method="lm",color="blue",linetype=2,se=TRUE)+ geom_point(data=fr,inherit.aes=FALSE,aes(x=fulcrum,y=rate),color="white",fill="red",shape=21,size=4,alpha=0.7)+ facet_wrap(~type,nrow=1)+ labs(x=paste0("\n",lab),y="Rate\n") plotulim<-layer_scales(p)$y$range$range[[2]] plotllim<-layer_scales(p)$y$range$range[[1]] range<-plotulim-plotllim breaks.raw<-plotllim:plotulim %>% pretty(10) p<-p+ scale_y_continuous(labels=comma,breaks=breaks.raw[which(breaks.raw>=plotllim)],expand=expansion(mult = c(0.03,0.05)))+ annotation_custom(grob=grid::rectGrob(),xmin=-Inf,xmax=Inf,ymin=-Inf,ymax=plotllim)+ geom_boxplot(data=forplot,aes(x=!!var,y=plotllim-.05*range),orientation="y",width=0.05*range,outlier.alpha=0.1) } else if (type=='D'){ temp<-foranalysis %>% ungroup() %>% mutate(id=ifelse(is.na(ori),row_number(),ori)) %>% group_by(!!var) dens<-temp %>% summarise(n(),den=sum(wtcomb*population)) extargs<-c(...) l<-length(extargs) forplot1<-foranalysis %>% group_by(!!var) %>% mutate( rate=wtcomb*!!outcome*n()*1000000/sum(wtcomb*population) ,type=str_glue("Fatal Encounters ({y.str}) per 1M") ) forplot2<-foranalysis %>% group_by(!!var) %>% mutate( rate=wtcomb*viol*n()*1000/sum(wtcomb*population) ,type="Violent Crimes per 1K" ) forplot<-bind_rows(forplot1,forplot2) if (l>0 & (l %% 2==0)){ levels<-extargs[1:(l/2)] labels<-extargs[(l/2+1):l] forplot<-forplot %>% mutate(!!var:=factor(!!var,levels=levels,labels=labels)) temp<-temp %>% mutate(!!var:=factor(!!var,levels=levels,labels=labels)) dens<-dens %>% mutate(!!var:=factor(!!var,levels=levels,labels=labels)) } lemas.svy<-svydesign(ids=~id,weights=~wtcomb,strata=~strat,data=temp,nest=TRUE) est<-svyby(outcome.f,var.f,design=lemas.svy,svytotal) %>% left_join(dens,by=str_glue("{x.str}")) %>% mutate(rate=!!outcome*1000000/den,rate_se=se*1000000/den,ul=rate+1.96*rate_se,ll=rate-1.96*rate_se) print(est) means<-forplot %>% ungroup() %>% group_by(type,!!var) %>% summarise(n(),mean=mean(rate)) print(means) if (l>6){ t<-theme_update( axis.text.x=element_text(angle=45, hjust=1, vjust=1,size=8) ) } counts<-forplot %>% group_by(!!var) %>% summarise(n=n()/2) %>% mutate(nf=paste0("n=",prettyNum(n,big.mark=",",scientific=FALSE))) p<-forplot %>% ggplot(.,aes(x=factor(!!var),y=rate))+ stat_summary(fun.data="mean_cl_boot",geom="crossbar",fill="#00334d",alpha=0.3,position="dodge")+ facet_wrap(~type,nrow=1)+ labs(x=paste0("\n",lab),y="Rate\n") plotulim<-ggplot_build(p)$layout$panel_scales_y[[1]]$range$range[[2]] plotllim<-ggplot_build(p)$layout$panel_scales_y[[1]]$range$range[[1]] range<-plotulim-plotllim breaks.raw<-plotllim:plotulim %>% pretty(10) p<-p+ scale_y_continuous(labels=comma,breaks=breaks.raw[which(breaks.raw>=plotllim)],expand=expansion(mult = c(0.05,0.05)))+ annotation_custom(grob=grid::rectGrob(),xmin=-Inf,xmax=Inf,ymin=-Inf,ymax=plotllim)+ geom_text(data=counts,aes(x=!!var,y=plotllim-.05*range,label=nf)) } print(p) CairoPNG(str_glue("//Rtpnfil02/0218273_dcra/Stats/Output/Plots/LEMAS_{y.str}_{x.str}.png"),width=7,height=5,units="in",res=500) print(p) dev.off() write.xlsx(est,str_glue("//Rtpnfil02/0218273_dcra/Stats/Output/Plots/LEMAS_{y.str}_{x.str}.xlsx")) }
Imputation of Missing Months of Crime Data via a Euclidean Distance-Matching Donor Method
R code
library(tidyverse) fuzz<-1.0 #simulated 12-month crime total data with missingness simdat<-data.frame( agency=seq(1,1000,by=1) #agency id 1:1000 ) %>% rowwise() %>% mutate( lambda=rnorm(1,mean=75,sd=20) #lambda for Poisson sampling ,nrpropensity=runif(1,0,0.2) #nonresponse propensity #monthly crime totals m1-m12 ,m1=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m2=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m3=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m4=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m5=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m6=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m7=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m8=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m9=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m10=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m11=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) ,m12=ifelse(runif(1,0,1)<nrpropensity,as.numeric(NA),rpois(1,lambda)) #annualized total crime (i just used a weight adjustment) ,totcrime=sum(across(matches("m\\d+")),na.rm=TRUE)*12/sum(across(matches("m\\d+"),~ifelse(!is.na(.x),1,0))) #donor and recipient indicators (donors could be excluded based on outlier detection which i haven't done here) ,recipient=ifelse(sum(across(matches("m\\d+"),~ifelse(is.na(.x),1,0)))>0,1,0) ,donor=ifelse(sum(across(matches("m\\d+"),~ifelse(is.na(.x),1,0)))==0,1,0) ) %>% #calculate proportional ranks of totcrime arrange(totcrime) %>% ungroup() %>% mutate(q=row_number()/n()) #function for donor matching doit<-function(agency,q,...){ #subset potential donor pool based on totcrime proportional rank relative to recipient donors<-simdat %>% filter(donor==1) %>% filter(q>!!q-fuzz & q<!!q+fuzz) #potential donor IDs matchids<-donors %>% select(agency) %>% as.list() #monthly donor data as matrix donormatrix<-donors %>% select(matches("m\\d+")) %>% as.matrix() #record corresponding to recipient recipient<-simdat %>% filter(agency==!!agency) #recipient monthly data as vector - set missing months to zero (same months will be set to zero for donors) rvector<-recipient %>% select(matches("m\\d+")) %>% mutate(across(everything(),~replace_na(.,0))) %>% pivot_longer(cols=everything()) %>% pull() #indicator vector for nonmissing months in recipient fixvector<-recipient %>% select(matches("m\\d+")) %>% mutate(across(everything(),~ifelse(is.na(.x),0,1))) %>% pivot_longer(cols=everything()) %>% pull() #matrix to apply against donor matrix via element-wise multiplication... #ensures that all donors have zeros in recipient's missing months which were already set to zero above fixmatrix<-t(matrix(fixvector,length(fixvector),nrow(donormatrix))) #donor matrix with recipient's missing months set to zero donorfix<-fixmatrix*donormatrix #list of euclidean distances between recipient vector and each potential donor vector dists<-apply(donorfix,1,function(x)sqrt(sum((x-rvector)^2))) #index corresponding to donor with minimum distance relative to recipient index<-which.min(dists) #data frame with recipient and matched donor IDs temp<-data.frame( recipient_agency=agency ,donor_agency=matchids[[1]][index] ) return(temp) } #create data frame with matched recipients and donors matches<-simdat %>% filter(recipient==1) %>% #slice_head(n=10) %>% pmap_dfr(doit) %>% #join on original crime totals for recipients (*.x) and donors (*.y) left_join(simdat %>% select(agency,matches("m\\d+"),totcrime),by=c("recipient_agency"="agency")) %>% left_join(simdat %>% select(agency,matches("m\\d+"),totcrime),by=c("donor_agency"="agency"))
Download and Stack Census Shape Files
#Code
library(tidyverse) library(curl) getfiles<-function(stfips,...){ writedir<-paste0("C:/Users/gcouzens/OneDrive - Research Triangle Institute/Desktop/ShapeFiles/",...) stfips<-str_pad(stfips,width=2,pad="0") url <- paste0("https://www2.census.gov/geo/tiger/TIGER2016/",toupper(...),"/tl_2016_",stfips,"_",...,".zip") tmp<-tempfile() tryCatch( { curl_download(url,tmp) unzip(tmp,exdir=writedir,overwrite=TRUE) }, error=function(cond){ message(paste0(url," does not exist")) } ) } 1:78 %>% map(getfiles,"place") 1:78 %>% map(getfiles,"cousub") setwd("C:/Users/gcouzens/OneDrive - Research Triangle Institute/Desktop/ShapeFiles/") getshapes<-function(stfips,...){ stfips<-str_pad(stfips,width=2,pad="0") dat<-tryCatch( { st_read(paste0(...,"/tl_2016_",stfips,"_",...,".shp")) }, error=function(cond){ message(paste0("shape file does not exist for ",stfips)) } ) return(dat) } allplaces<-1:78 %>% map_dfr(getshapes,"place") allcousubs<-1:78 %>% map_dfr(getshapes,"cousub")
Sampling Distribution Simulations
library(ggplot2) library(tidyverse) library(Cairo) #fixed parameters N<-2000 #agency population size in domain pop<-10000000 #domain population served R<-250 #true crime rate in population per<-1000 #rate group size r<-R*pop/(N*per) #average number of crimes per agency #variable parameters describing four different scenarios... # 1. small sample and low element variance # 2. small sample and high element variance # 3. large sample and low element variance # 4. large sample and high element variance parms<-data.frame( ss=c(100,100,1000,1000) ,perm=c(50,125,50,125) ) #function to map over parms doit<-function(ss,perm,...){ #agency-level weight weight<-N/ss #bias in overall crime rate units converted to agency level # of crimes units rbias<-(...)*pop/(N*per) #large number of poisson sample draws (5k [sampling distribution sample size] * ss [design sample size of agencies]) #each draw utilizes lambda parameter drawn from normal dist centered on r, and shifted by bias provided in (...) dat<-rnorm(5000*ss,r-rbias,perm) %>% map_dbl(rpois,n=1) #shape into data frame - rows are sampling dist sample and cols based on agency ss samp<-data.frame(matrix(data=weight*dat,nrow=5000,ncol=ss)) #return sampling dist sample by calculating the rate estimate for each row df<-samp %>% pmap_dbl(~sum(c(...))*per/pop) %>% as.data.frame() %>% mutate(sampsize=ss,permutation=perm) colnames(df)<-c("x","ss","perm") return(df) } #plot ready df - version with zero bias nobias<-parms %>% pmap_dfr(doit,0) #plot ready df - version with some bias bias<-parms %>% pmap_dfr(doit,3) cis<-bias %>% group_by(ss,perm) %>% summarize(mean=mean(x),var=var(x),bias=abs(mean(x)-R)) %>% mutate(lb=mean-1.96*sqrt(var),ub=mean+1.96*sqrt(var)) %>% mutate(lub=mean-1.96*sqrt(var+bias^2),uub=mean+1.96*sqrt(var+bias^2)) t<-theme_set(theme_bw()) t<-theme_update( panel.grid.minor.x=element_blank(), panel.grid.major.y=element_blank(), panel.grid.minor.y=element_blank(), panel.background=element_blank(), strip.background=element_rect(fill="white"), axis.text.x=element_text(angle=0, hjust=0.5, vjust=1), axis.text.y=element_blank(), axis.ticks.y=element_blank(), legend.position="bottom", legend.direction="horizontal", legend.justification=c(0,0), legend.box.just="left", plot.title=element_text(size = rel(1.2)) ) sslabs<-c( "100"="Smaller Sample Size" ,"1000"="Larger Sample Size" ) permlabs<-c( "50"="Less Variation" ,"125"="More Variation" ) setwd("//rtpnfil02/0216153_NIBRS/05_PopulationData/VARIANCE/Lance/Plots/") plot_nb<-ggplot(nobias,aes(x=x))+ stat_density(fill="#08925D",alpha=.5,color="black")+ geom_vline(xintercept=R,linetype=2)+ facet_grid(ss~perm,labeller=labeller( ss=sslabs ,perm=permlabs )) + labs( x="\nCrime Rate" ,y="" ) CairoPNG("VarianceUnbiased.png", width=7, height=5, units="in", res=500) print(plot_nb) dev.off() plot_b<-ggplot(bias,aes(x=x))+ stat_density(fill="#08925D",alpha=.5,color="black")+ geom_vline(xintercept=R,linetype=2)+ facet_grid(ss~perm,labeller=labeller( ss=sslabs ,perm=permlabs )) + labs( x="\nCrime Rate" ,y="" ) CairoPNG("VarianceBiased.png", width=7, height=5, units="in", res=500) print(plot_b) dev.off() plot_b2<-ggplot(bias,aes(x=x))+ stat_density(fill="#08925D",alpha=.5,color="black")+ geom_vline(xintercept=R,linetype=2)+ geom_errorbar( inherit.aes=FALSE ,data=cis ,stat="identity" ,orientation="y" ,aes(xmin=lub,xmax=uub,y=.33*layer_scales(plot_b)$y$range$range[[2]]) ,width=0.1 )+ geom_errorbar( inherit.aes=FALSE ,data=cis ,stat="identity" ,orientation="y" ,aes(xmin=lb,xmax=ub,y=.67*layer_scales(plot_b)$y$range$range[[2]]) ,width=0.1 )+ facet_grid(ss~perm,labeller=labeller( ss=sslabs ,perm=permlabs )) + labs( x="\nCrime Rate" ,y="" ) CairoPNG("VarianceBiasedWithCIs.png", width=7, height=5, units="in", res=500) print(plot_b2) dev.off()
Relabel Discrete Variable Axis and Facet Panels
library(tidyverse) library(lubridate) library(scales) library(Cairo) library(openxlsx) setwd("//rtpnfil02/0216153_NIBRS/05_PopulationData/VARIANCE/Lance/Plots") dat<-read.xlsx("//rtpnfil02/0216153_NIBRS/05_PopulationData/VARIANCE/Lance/PhilMtgGraphic.xlsx",sheet=1) t<-theme_set(theme_bw()) t<-theme_update( axis.title.x=element_text(hjust=0.5), #axis.line=element_line(colour="black",lineend="square"), panel.grid.minor=element_blank(), #panel.grid.major.y=element_line(colour="black",size=0.1), #panel.grid.major=element_blank(), axis.ticks=element_line(colour="black",size=0.25), #axis.ticks.x=element_blank(), #strip.text=element_blank(), legend.position = "bottom", legend.direction="horizontal", legend.box="vertical", #legend.title=element_blank(), legend.text=element_text(size=8), legend.spacing=unit(0,"cm"), legend.margin=margin(b=0,t=0,unit='cm'), axis.text.x=element_text(angle=45, hjust=1, vjust=1,size=8), axis.text.y=element_text(size=8), plot.title=element_text(size=12,margin=margin(b=10),hjust=0), plot.subtitle=element_text(size=8,color="darkslategrey",margin=margin(b=25,l=-25)), plot.caption = element_text(size=8,margin=margin(t=10),color="darkslategrey",hjust=0) ) faclabs<-c( des="Treat Pseudo Weights as\nDesign-Based", cal="Treat Pseudo Weights as\nCalibrated", rat="Recreate Weights as N/n within\nWeighting Classes" ) dat$permutation<-factor(dat$permutation,levels=c("des","rat","cal")) p1<-ggplot(subset(dat,method!="BOOT1000"),aes(x=toc,y=rse,fill=method))+ geom_point(size=3,shape=21,color="#000000",alpha=.8)+ scale_x_discrete( limits=c( "totcrime_imp_est", "totcrime_murder_imp_est", "totcrime_manslaughter_imp_est", "totcrime_rape_imp_est", "totcrime_rob_imp_est", "totcrime_assault_imp_est", "totcrime_aggAssault_imp_est", "totcrime_simpAssault_imp_est", "totcrime_burglary_imp_est", "totcrime_larceny_imp_est", "totcrime_vhcTheft_imp_est", "totcrime_violent_imp_est", "totcrime_property_imp_est" ), labels=c( "Total Crime", "Murder", "Manslaughter", "Rape", "Robbery", "Assault", "Aggravated Assault", "Simple Assault", "Burglary", "Larceny", "Vehicle Theft", "Overall Violent", "Overall Property" ) )+ scale_fill_discrete(limits=c("BOOT50","TSL"),labels=c("Bootstrap (50 reps)","Linearized"))+ scale_y_continuous(breaks=scales::pretty_breaks(5),labels=percent)+ facet_wrap(~permutation,labeller=labeller(permutation=faclabs))+ labs( x="\nType of Crime", y="\n\nPercent Relative Standard Error\n" )+ guides( fill=guide_legend("Variance Estimator Type:",order=1,nrow=1) ) p2<-ggplot(subset(dat,method!="BOOT50"),aes(x=toc,y=rse,fill=method))+ geom_point(size=3,shape=21,color="#000000",alpha=.8)+ scale_x_discrete( limits=c( "totcrime_imp_est", "totcrime_murder_imp_est", "totcrime_manslaughter_imp_est", "totcrime_rape_imp_est", "totcrime_rob_imp_est", "totcrime_assault_imp_est", "totcrime_aggAssault_imp_est", "totcrime_simpAssault_imp_est", "totcrime_burglary_imp_est", "totcrime_larceny_imp_est", "totcrime_vhcTheft_imp_est", "totcrime_violent_imp_est", "totcrime_property_imp_est" ), labels=c( "Total Crime", "Murder", "Manslaughter", "Rape", "Robbery", "Assault", "Aggravated Assault", "Simple Assault", "Burglary", "Larceny", "Vehicle Theft", "Overall Violent", "Overall Property" ) )+ scale_fill_discrete(limits=c("BOOT1000","TSL"),labels=c("Bootstrap (1,000 reps)","Linearized"))+ scale_y_continuous(breaks=scales::pretty_breaks(5),labels=percent)+ facet_wrap(~permutation,labeller=labeller(permutation=faclabs))+ labs( x="\nType of Crime", y="\n\nPercent Relative Standard Error\n" )+ guides( fill=guide_legend("Variance Estimator Type:",order=1,nrow=1) ) CairoPNG("VarCompareBS50.png", width=10, height=5, units="in", res=500) print(p1) dev.off() CairoPNG("VarCompareBS1000.png", width=10, height=5, units="in", res=500) print(p2) dev.off()
Combine Alpha and Size Legends
library(ggplot2) library(scales) library(gridExtra) library(Cairo) library(dplyr) library(tidyr) library(viridis) t<-theme_set(theme_minimal()) t<-theme_update( axis.title.x=element_text(hjust=0.5), axis.line=element_line(colour="black",lineend="square"), panel.grid.minor=element_blank(), #panel.grid.major.y=element_line(colour="black",size=0.1), panel.grid.major=element_blank(), axis.ticks=element_line(colour="black",size=0.25), #axis.ticks.x=element_blank(), #strip.text=element_blank(), legend.position = "bottom", legend.direction="horizontal", legend.box="vertical", #legend.title=element_blank(), legend.text=element_text(size=8), legend.spacing=unit(0,"cm"), legend.margin=margin(b=0,t=0,unit='cm'), axis.text.x=element_text(angle=45, hjust=1, vjust=1,size=8), axis.text.y=element_text(size=8), plot.title=element_text(size=12,margin=margin(b=10),hjust=0), plot.subtitle=element_text(size=8,color="darkslategrey",margin=margin(b=25,l=-25)), plot.caption = element_text(size=8,margin=margin(t=10),color="darkslategrey",hjust=0) ) setwd("/rtpnfil03/rtpnfil03_vol5/NVSSP-2/ExtremeWeights/output/plots") person_sub<-subset(person,RSA>0) breaks<-pretty(person_sub$RSA_PROP,n=5) k<-length(breaks) alphas<-seq(0,1,by=1/k)[2:(k-1)] p<-ggplot(person_sub,aes(x=pctl,y=RSA,size=RSA_PROP,fill=factor(RSA_TP)))+ geom_point(color="black",shape=21,aes(alpha=RSA_PROP))+ facet_wrap(~YEAR,nrow=3,scales="free")+ scale_x_continuous(breaks=seq(0,1,by=.1),labels=percent,limits=c(0,1),expand=expand_scale(mult = c(.05, .1)))+ scale_y_continuous(breaks=scales::pretty_breaks(5),limits=c(0,NA),expand=expand_scale(mult = c(0, .1)))+ scale_fill_manual(values=c("#4daf4a","#377eb8","#e41a1c"),labels=c( "Single Incidents Only", "Series incidents Only", "Mixture of Single and Series Incidents" ))+ scale_size_continuous(breaks=breaks,labels=percent)+ scale_alpha_continuous(breaks=breaks,labels=percent)+ labs( x="\nPerson Weight Percentile",y="Total Victimizations\n" )+ guides( size=guide_legend("Percentage of National Estimate: ",nrow=1,byrow=TRUE,order=1,override.aes=list(fill="#707A8A",alpha=alphas)), alpha=FALSE, fill=guide_legend("Incident Types: ",nrow=1,byrow=TRUE,override.aes = list(size = 4),order=2) ) CairoPNG("RSA_Impact_Fixed.png", width=8, height=7, units="in", res=500) print(p) dev.off()
Extract ACS Vars and Save to XLSX
library(tidyverse) library(censusapi) library(openxlsx) workbook<-"//path/ACS_151617.xlsx" years<-c(2015,2016,2017) vars<-c("NAME","B02001_001E","B02001_002E","B19013_001E") key<-"yourkeyhere" ##get ACS place data - can do this for whole nation at once places_acs<-NULL for (year in years){ temp.df<-getCensus( name="acs/acs5", vintage=year, vars=vars, region="place:*", key=key ) temp.df$year<-year places_acs<-bind_rows(places_acs,temp.df) } ##get ACS county data - can do this for whole nation at once counties_acs<-NULL for (year in years){ temp.df<-getCensus( name="acs/acs5", vintage=year, vars=vars, region="county:*", key=key ) temp.df$year<-year counties_acs<-bind_rows(counties_acs,temp.df) } ##get ACS county sub data - have to do this state by state #first get states with PEP county sub data (these are ones with county sub as incorporated government level) pep <- read_csv("//path/sub-est2017_all.csv", locale=locale(encoding="UTF-8"), col_types = cols( SUMLEV = col_integer(), STATE = col_integer(), COUNTY = col_integer(), PLACE = col_integer(), COUSUB = col_integer(), CONCIT = col_integer(), PRIMGEO_FLAG = col_integer(), NAME = col_character(), STNAME = col_character(), CENSUS2010POP = col_character(), ESTIMATESBASE2010 = col_double(), POPESTIMATE2010 = col_double(), POPESTIMATE2011 = col_double(), POPESTIMATE2012 = col_double(), POPESTIMATE2013 = col_double(), POPESTIMATE2014 = col_double(), POPESTIMATE2015 = col_double(), POPESTIMATE2016 = col_double(), POPESTIMATE2017 = col_double() ) ) states<-filter(pep,SUMLEV==61 & !(FUNCSTAT %in% c("F","S")))$STATE %>% unique() %>% as.character() %>% str_pad(2,pad="0") #pull county sub data state by state and append cs_acs<-NULL for (year in years){ for(state in states){ currstate=paste("state:", state, sep='') currstate temp.df<-getCensus( name="acs/acs5", vintage=year, vars=vars, region="county subdivision:*", regionin=currstate, key=key ) temp.df$year<-year cs_acs<-bind_rows(cs_acs,temp.df) } } rm(temp.df) #write out to Excel wb<-createWorkbook() addWorksheet(wb,"places") addWorksheet(wb,"counties") addWorksheet(wb,"countysubs") writeData(wb,"places",places_acs,rowNames=TRUE) writeData(wb,"counties",counties_acs,rowNames=TRUE) writeData(wb,"countysubs",cs_acs,rowNames=TRUE) saveWorkbook(wb, workbook, overwrite = TRUE)
Simple Mapping of Census Data
library(tidycensus) library(tidyverse) library(viridis) library(ggplot2) library(scales) library(Cairo) library(ggthemes) census_api_key(" ") inc<-get_acs(geography = "county", variables = "B19013_001", state="36", geometry = TRUE) t<-theme_set(theme_minimal()) t<-theme_update( axis.title=element_blank(), axis.line=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), panel.grid=element_line(color="transparent"), panel.grid.major=element_line(color="transparent"), panel.spacing = unit(0,"in"), strip.background=element_blank(), strip.text=element_blank(), legend.position =c(1,0.9), legend.justification=c(0.7,1), legend.title=element_text(size=11), legend.title.align=0, legend.text=element_text(size=9), legend.key.height=unit(c(.3),"in"), legend.key.width=unit(c(.17),"in"), plot.title = element_text(size = 12, margin = margin(b = 10), hjust = 0.1), plot.subtitle = element_text(size = 8, color = "darkslategrey", margin = margin(b = 25, l = -25), hjust=0.14), plot.caption = element_text(size = 8, margin = margin(t = 10), color = "darkslategrey", hjust = 0) ) setwd(" ") p<-ggplot(inc) + geom_sf(size=0.05,color="#000000",aes(fill=as.numeric(estimate))) + coord_sf(crs = 4326) + scale_fill_viridis_c( option = "viridis", breaks=seq(0,120000,by=10000), labels=dollar )+ labs( title = "New York Income by County", subtitle = "American Community Survey 5-Year Estimates 2013-2017" )+ guides(fill=guide_colorbar("Median\nPast-Year\nHH Income")) CairoPNG('TestMap2.png',width=6,height=5,res=500,units='in') print(p) dev.off()
Simple Mapping of Census Data
library(tidycensus) library(tidyverse) library(viridis) library(ggplot2) library(scales) library(Cairo) library(ggthemes) census_api_key(" ") inc<-get_acs(geography = "county", variables = "B19013_001", geometry = TRUE, shift_geo=TRUE) state<-get_acs(geography="state",variables = "B19013_001", geometry = TRUE, shift_geo=TRUE) t<-theme_set(theme_minimal()) t<-theme_update( axis.title=element_blank(), axis.line=element_blank(), axis.text=element_blank(), axis.ticks=element_blank(), panel.grid=element_line(color="transparent"), panel.grid.major=element_line(color="transparent"), panel.spacing = unit(0,"in"), strip.background=element_blank(), strip.text=element_blank(), legend.position = "right", legend.title=element_text(size=12), legend.title.align=0, legend.text=element_text(size=9), legend.key.height=unit(c(.3),"in"), legend.key.width=unit(c(.17),"in") ) setwd(" ") p<-ggplot(inc) + geom_sf(size=0.05,color="#000000",aes(fill=as.numeric(estimate))) + geom_sf(data=state,size=0.1,color="#ffffff",fill=NA)+ coord_sf(crs = 5070) + scale_fill_viridis_c( option = "magma", breaks=seq(0,120000,by=20000), labels=c("$0","$20,000","$40,000","$60,000","$80,000","$100,000","$120,000") )+ guides(fill=guide_colorbar("Median\nPast-Year\nHH Income")) CairoPNG('TestMap.png',width=8,height=5,res=500,units='in') print(p) dev.off()
lollipops
libname in '~dir~'; data sig; set in.sig_us_state_overtime; if est eq 'VIOLENT' and dom eq 'one' and lev eq 1; run; data dat; if _n_ eq 1 then do; if 0 then set sig(keep=nstate probt); declare hash lookup(dataset:"sig(keep=nstate probt)"); lookup.definekey('nstate'); lookup.definedata(all:'yes'); lookup.definedone(); end; set in.state(where=(est eq 'VIOLENT' and dom eq 'one' and lev eq '1')) end=done; lookup.find(); year=lastyr+2000; alpha=0; sig=probt lt 0.05; output; if done then do year=2008 to 2015; geog="United States, Overall"; rate=.; probt=.; sig=.; alpha=1; output; end; keep nstate probt geog year rate alpha sig; run; proc sql; select distinct geog into :geolist separated by '~' from dat where alpha eq 0 ; quit; data natdat; if 0 then set dat(keep=geog); set in.national_3yr; if est eq 'VIOLENT' and dom eq 'one' and lev eq '1'; year=lastyr+2000; geog="United States, Overall"; alpha=1; output; %macro doit; %do i=1 %to %sysfunc(countw(&geolist,%str(~))); %let curr=%scan(&geolist,&i,%str(~)); geog="&curr"; alpha=0; output; %end; %mend; %doit run; %include '/rtpnfil02/rtpnfil02_vol6/CSDS/CENTER/GCouzens/RSAS/DriveRGrid.sas'; data _null_; %RCodeOpen(datforR=dat natdat,stripformats=0,menable=1) datalines; library(ggplot2) library(scales) library(gridExtra) library(Cairo) library(dplyr) library(tidyr) t<-theme_set(theme_minimal()) t<-theme_update( axis.title.y = element_blank(), axis.title.x=element_text(hjust=0), axis.line.x = element_line(colour="black",lineend="square"), axis.line.y = element_line(colour="black",lineend="square"), panel.grid.minor = element_blank(), panel.grid.major=element_blank(), legend.position = "bottom", legend.justification="left", legend.title=element_blank(), text = element_text(family = "Georgia"), axis.text.x=element_text(angle=45, hjust=1, vjust=1), axis.text.y = element_text(size = 7), plot.title = element_text(size = 12, margin = margin(b = 10), hjust = 0), plot.subtitle = element_text(size = 8, color = "darkslategrey", margin = margin(b = 25, l = -25)), plot.caption = element_text(size = 8, margin = margin(t = 10), color = "darkslategrey", hjust = 0.025) ) setwd("/rtpnfil02/rtpnfil02_vol7/NVSSP_0213170/0213170.009 Subnational/001_Reports/1b1 - Direct Estimates in States and MSAs/Lance/Plots/") clist<-c("#133347","#44b9ff") dat$sig<-factor(dat$sig) p<-ggplot(dat,aes(x=year,y=rate,label=format(round(rate,1),nsmall=1)))+ geom_segment(data=natdat,aes(x=year,y=0,xend=year,yend=rate),color="black",linetype=2)+ geom_segment(aes(x=year,y=0,xend=year,yend=rate),color="black")+ geom_point(data=natdat,size=8.,color="black")+ geom_text(data=natdat,color="white",size=2,aes(alpha=alpha))+ geom_point(size=7,aes(color=sig))+ geom_text(color="white",size=2)+ scale_x_continuous(limits=c(2007.5,2015.5),breaks=seq(2008,2015,by=1))+ scale_y_continuous(limits=c(0,42),breaks=seq(0,40,by=10),expand=c(0,0))+ scale_alpha(range=c(0,1))+ scale_color_manual(values=clist,labels=c( "State Trend not Significantly Different than National Trend", "State Trend Significantly Differs from Nation", "" ) )+ facet_wrap(~reorder(geog,1-alpha),nrow=3,strip.position="bottom")+ labs( title = "Violent Victimizations among all Persons", subtitle = "11 Largest U.S. States and the Nation, Overall\nFigures Shown are Rates of Victimization per 1,000 Persons", caption="National Crime Victimization Survey 2008-2015", x="", y="\nVictimizations per 1,000 Persons" )+ guides( alpha=FALSE ) CairoPNG('TestStateLPsV1.png', height=8, width=10, units="in", res=500) print(p) dev.off() ; %RCodeClose
##some useful theme stuff
%include '/rtpnfil02/rtpnfil02_vol6/CSDS/CENTER/GCouzens/RSAS/DriveRGrid.sas'; data _null_; %RCodeOpen(datforR=overall NHW NHB Hisp,stripformats=0) datalines; library(ggplot2) library(scales) library(gridExtra) library(Cairo) t<-theme_set(theme_minimal()) t<-theme_update( axis.title.x=element_text(hjust=0.5), axis.line=element_line(colour="black",lineend="square"), panel.grid.minor=element_blank(), panel.grid.major.x=element_blank(), legend.position = "bottom", legend.direction="horizontal", legend.box="vertical", legend.title=element_text(size=8), legend.text=element_text(size=7), legend.spacing=unit(0,"cm"), legend.margin=margin(b=0,t=0,unit='cm'), axis.text.y=element_text(size=7), plot.title=element_text(size=12,margin=margin(b=10),hjust=0), plot.subtitle=element_text(size=8,color="darkslategrey",margin=margin(b=25,l=-25)), plot.caption = element_text(size=8,margin=margin(t=10),color="darkslategrey",hjust=0) ) setwd("/sasdata/rtpnfil02/rtpnfil02_vol6/ECHO-DAC/Users/Lance/RAMP/Analysis/") po<-ggplot(overall,aes(x=ls,y=rrisk,color=reorder(allocation,prop)))+ geom_point(aes(size=n),alpha=.7)+ scale_x_continuous(breaks=seq(1,8,by=1),labels=c("Full-term\nBirths","After birth\nto 1 year", "1 to < 2\nyears","2 to < 4\nyears","4 to < 6\nyears","6 to < 8\nyears","8 to < 12\nyears","12 to < 19\nyears"))+ scale_y_continuous(limits=c(0,3.5),breaks=seq(0,3.5,by=.5))+ scale_size_continuous(labels=comma)+ labs(x="\nLife course stage",y="RR for high BMI-for-age\n")+ guides( color=guide_legend("Proportion exposed:",order=1,nrow=1), size=guide_legend("Total sample size:",order=2,) ) CairoPNG('Overall_PointsOnly_Alt2.png', width=5.5, height=4, units="in", res=500) print(po) dev.off() ; %RCodeClose
%include '/rtpnfil02/rtpnfil02_vol6/CSDS/CENTER/GCouzens/RSAS/DriveRGrid.sas'; data _null_; %RCodeOpen(datforR=cbsas,stripformats=0,menable=1) datalines; library(ggplot2) library(scales) library(gridExtra) library(Cairo) library(dplyr) library(tidyr) t<-theme_set(theme_minimal()) t<-theme_update( axis.title.y = element_blank(), axis.title.x=element_text(hjust=0), axis.line = element_line(colour="black",lineend="square"), panel.grid.minor = element_blank(), panel.grid.major=element_blank(), legend.position = c(.65,.1), legend.title=element_blank(), text = element_text(family = "Georgia"), axis.text.y = element_text(size = 7), plot.title = element_text(size = 12, margin = margin(b = 10), hjust = 0), plot.subtitle = element_text(size = 8, color = "darkslategrey", margin = margin(b = 25, l = -25)), plot.caption = element_text(size = 8, margin = margin(t = 10), color = "darkslategrey", hjust = 0) ) setwd("/rtpnfil02/rtpnfil02_vol7/NVSSP_0213170/0213170.009 Subnational/001_Reports/1b1 - Direct Estimates in States and MSAs/Lance/Plots/") list<-c("#000000","#133347","#44b9ff","#999999") cbsas$level<-factor(cbsas$level) p<-ggplot(cbsas,aes(x=rate,y=reorder(geog,rate),label=paste0(round(rate,1),"")))+ geom_vline(xintercept=&natrate)+ geom_segment(aes(alpha=nation,x=stop,xend=0,y=reorder(geog,rate),yend=reorder(geog,rate)),linetype="longdash",color="black")+ geom_segment(aes(x=&natrate,y=reorder(geog,rate),xend=rate,yend=reorder(geog,rate)),color="black",alpha=.5) + geom_point(size=7,aes(color=level))+ geom_text(color="white",size=2)+ scale_x_continuous(expand=c(0,0),limits=c(0,80),breaks=seq(0,60,by=20))+ scale_y_discrete(expand=c(.05,.05))+ scale_color_manual(values=list,labels=c("National rate","CBSA rate significantly lower than national","CBSA rate significantly higher than national","CBSA rate not statistically different from national"))+ scale_alpha(range=c(.1,1))+ labs( title = "Violent Victimizations among all Persons", subtitle = "51 Largest Core-Based Statistical Areas (CBSAs)", caption="National Crime Victimization Survey 2011-2015", x="\nVictimizations per 1,000 Persons" )+ guides( alpha=FALSE ) CairoPNG("test2legend.png", height=10, width=8, units="in", res=500) print(p) dev.off() ; %RcodeClose
Size-Ordered Comparison Bars with Labels
%macro doit(var,name); proc sql; create table &name.overall as select ver, sum(&var) as &var from check group by ver ; create table &name.state as select ver, d_state as state, sum(&var) as &var from check where state in(&statelist) group by ver, state ; create table &name.statelabs as select state, sum(&var*(ver='Alternative')) as num, sum(&var*(ver='Standard ')) as den, (calculated num-calculated den)/(calculated den) as lab format=percent8.1, 'Alternative' as ver, max(calculated num,calculated den) as ypos from &name.state group by state ; quit; %mend; %doit(q1b,q1) %doit(q2e,q2) %doit(q3i,q3) %doit(q4,q4) %macro plotit(var,name,lab); &name.sub<-subset(check,&var._f==4,); p1<-ggplot(&name.sub,aes(x=&var,color=ver))+ stat_ecdf()+ scale_x_log10(labels=comma)+ xlab("\n&lab")+ ylab("Cumulative Density\n")+ guides( color=guide_legend("Imputation Version:",keyheight=1,keywidth=2) ); CairoPNG("&name.ECDF.png", width=6, height=6, units="in", res=500); print(p1); dev.off(); &name.state<-transform(&name.state,state=reorder(state,&var)); p2<-ggplot(&name.state,aes(x=state,y=&var,fill=ver))+ geom_bar(position="dodge",stat="identity",color="black")+ geom_text(data=&name.statelabs,aes(x=state,y=ypos+10000,label=lab))+ scale_y_continuous(labels=comma)+ xlab("\nState")+ ylab("&lab\n")+ guides( fill=guide_legend("Imputation Version:",keyheight=1,keywidth=2) ); CairoPNG("&name.StateBars.png", width=10, height=5, units="in", res=500); print(p2); dev.off(); %mend; data _null_; %RCodeOpen( datforR=check q1state q1statelabs q2state q2statelabs q3state q3statelabs q4state q4statelabs meths, stripformats=0, menable=1 ) datalines; library(ggplot2) library(scales) library(gridExtra) library(Cairo) t<-theme_set(theme_bw()) t<-theme_update( panel.grid.minor.x=element_blank(), panel.grid.minor.y=element_blank(), panel.background=element_blank(), axis.text.x=element_text(angle=45, hjust=1, vjust=1), strip.background=element_rect(fill="white"), legend.position="bottom", legend.direction="horizontal", legend.justification=c(0,0), legend.box.just="left", plot.title=element_text(size = rel(1.2)) ) setwd("/rtpnfil03/rtpnfil03_vol4/ASPP/Imputation_Weighting/Programs/2016/impccompare") %plotit(q1b,q1,January 1 Population) %plotit(q2e,q2,Entries) %plotit(q3i,q3,Exits) %plotit(q4,q4,December 31 Population) pmeth<-ggplot(meths,aes(x=D_State,fill=entmeth))+ geom_bar(color="black",position="dodge")+ scale_y_continuous(breaks=seq(0,16,by=2))+ xlab("\nState")+ ylab("Method Frequency\n")+ guides( fill=guide_legend("Standard Imp Approach:",keyheight=1,keywidth=2) ) CairoPNG("Methods.png", width=10, height=5, units="in", res=500) print(pmeth) dev.off() ; %RCodeClose