library(httk)
library(survey)
library(data.table)
library(ggplot2)
all.reth <- levels(nhanes_mec_svy$variables[, ridreth1])
all.gendr <- levels(nhanes_mec_svy$variables[, riagendr])
Plot serum creatinine vs. age with spline fits.
par(mfcol=c(5,2),
mar=c(2,2,2.5,2)+0.1,
oma=c(4,4,4,0),
mgp=c(1,1,0))
for (gendr in all.gendr){
for (r in all.reth){
grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(ridexagm) & !is.na(loglbxscr))
svyplot(loglbxscr~ridexagm, grsub, style="transparent",
basecol="gray50", xlab=NA, ylab=NA, cex.axis=1.5)
lines(sort(unique(grsub$variables[,ridexagm])), predict(spline_serumcreat[gender==gendr & reth==r,
sc_spline[[1]]],
sort(unique(grsub$variables[,ridexagm])))$y)
title(paste(gendr, r), line=0.5, cex.main=1.5)
}
}
title(xlab="Age, months",
ylab="Log serum creatinine, log g/dL",
outer=TRUE,
line=2, cex.lab=2)
title(main="Serum creatinine vs. age, with spline fits",
outer=TRUE,
line=1, cex.main=2)
Plot log serum creatinine residuals vs. age.
par(mfcol=c(5,2),
mar=c(2,2,2.5,2)+0.1,
oma=c(4,4,4,0),
mgp=c(1,1,0))
for (gendr in all.gendr){
for (r in all.reth){
grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(ridexagm) & !is.na(loglbxscr))
grsub <- update(grsub, loglbxscr_resid=loglbxscr-
predict(spline_serumcreat[gender==gendr &
reth==r,
sc_spline[[1]]],
grsub$variables[,ridexagm])$y)
svyplot(loglbxscr_resid~ridexagm, grsub, style="transparent",
xlab=NA, ylab=NA, cex.axis=1.5)
title(paste(gendr, r), line=0.5, cex.main=1.5)
}
}
title(xlab="Age, months",
ylab="Log serum creatinine residual, log g/dL",
outer=TRUE,
line=2, cex.lab=2)
title(main = "Serum creatinine residuals vs. age",
outer=TRUE, line=1, cex.main=2)
Make QQ plots of log serum creatinine residual quantiles vs. normal quantiles.
par(mfcol=c(5,2),
mar=c(2,2,2.5,2)+0.1,
oma=c(4,4,4,0),
mgp=c(1,1,0))
for (gendr in all.gendr){
for (r in all.reth){
grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(ridexagm) & !is.na(loglbxscr))
grsub <- update(grsub, logscresid=loglbxscr-
predict(spline_serumcreat[gender==gendr &
reth==r,
sc_spline[[1]]],
grsub$variables[,ridexagm])$y)
qprobs <- ppoints(n=length(grsub$prob))
norm.q <- qnorm(p=qprobs)
logscresid.q <- svyquantile(~logscresid,
design=grsub,
quantiles=qprobs)
plot(y=logscresid.q,
x=norm.q, #Plot weighted points
type='p',
col="gray70",
xlab=NA, ylab=NA)
#Add qqline: passing through first and third quartiles
x <- qnorm(p=c(0.25, 0.75))
y <- as.vector(svyquantile(~logscresid,
design=grsub,
quantiles=c(0.25, 0.75)))
m <- diff(y)/diff(x)
intercept <- -m*x[1]+y[1]
abline(a=intercept, b=m)
title(paste(gendr, r), line=0.5, cex.main=1.5)
}
}
title(xlab='Normal quantiles',
ylab='log SC resid quantiles',
outer=TRUE,
line=2,
cex.lab=2)
Clearly, the log residuals aren’t normally distributed for most combinations of race and gender. For this reason, I decided to use a kernel density estimate of the residual distribution as a way to non-parametrically estimate the residual distribution, rather than assuming a particular theoretical parametric distribution.
Here are the KDEs for the log serum creatinine residuals for each gender and race/ethnicity combination.
par(mfcol=c(5,2),
mar=c(2,2,2.5,2)+0.1,
oma=c(4,4,4,0),
mgp=c(1,1,0))
for (gendr in all.gendr){
for (r in all.reth){
grsub <- subset(nhanes_mec_svy, riagendr==gendr & ridreth1==r & !is.na(ridexagm) & !is.na(loglbxscr))
grsub <- update(grsub, logscresid=loglbxscr-
predict(spline_serumcreat[gender==gendr &
reth==r,
sc_spline[[1]]],
grsub$variables[,ridexagm])$y)
tmp.kde <- ks::kde(x=grsub$variables[, logscresid],
w=grsub$variables[, wtmec6yr])
plot(tmp.kde, xlab=NA, ylab=NA, xlim=c(-1.72,2.7), drawpoints=TRUE, col.pt="black")
title(paste(gendr, r), line=0.5, cex.main=1.5)
}
}
title(xlab='Log serum creatinine residual',
ylab='KDE density',
outer=TRUE,
line=2,
cex.lab=2)