Wambaugh et al. (2018): Creating All Figures

John Wambaugh

2018-01-23

This vignette provides the code used to generate the figures in Wambaugh et al. (2018) “Evaluating In Vitro-In Vivo Extrapolation of Toxicokinetics” from Toxicological Sciences

To use the code in this vignette, you’ll first need to load a few packages.

library(scales)
library(ggplot2)
library(data.table)
library(gdata)
library(gtools)
library(httk)
library(RColorBrewer)
library(grid)
library(gplots)

The data we analyze were originally generated with the R package invivoPKfit, but all the outputs of that analysis are provided with httk >v1.8.

Plot functions

These functions, which have been cobbled togethe from the internet, are reused in multiple figure to make things look nice.

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL, widths=unit(rep_len(1, cols), "null")) {

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout),widths=widths)))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
                                                                
scientific_10 <- function(x) {                                  
  out <- gsub("1e", "10^", scientific_format()(x))              
  out <- gsub("\\+","",out)                                     
  out <- gsub("10\\^01","10",out)                               
  out <- parse(text=gsub("10\\^00","1",out))                    
}  

 
lm_R2 <- function(m,prefix=NULL){
    out <- substitute("MSE = "~mse*","~~italic(R)^2~"="~r2, 
         list(mse = signif(mean(m$residuals^2),3),
              r2 = format(summary(m)$adj.r.squared, digits = 2)))
    paste(prefix,as.character(as.expression(out)))                 
}
  
heatmap.3 <- function(x,
                      Rowv = TRUE, Colv = if (symm) "Rowv" else TRUE,
                      distfun = dist,
                      hclustfun = hclust,
                      dendrogram = c("both","row", "column", "none"),
                      symm = FALSE,
                      scale = c("none","row", "column"),
                      na.rm = TRUE,
                      revC = identical(Colv,"Rowv"),
                      add.expr,
                      breaks,
                      symbreaks = max(x < 0, na.rm = TRUE) || scale != "none",
                      col = "heat.colors",
                      colsep,
                      rowsep,
                      sepcolor = "white",
                      sepwidth = c(0.05, 0.05),
                      cellnote,
                      notecex = 1,
                      notecol = "cyan",
                      na.color = par("bg"),
                      trace = c("none", "column","row", "both"),
                      tracecol = "cyan",
                      hline = median(breaks),
                      vline = median(breaks),
                      linecol = tracecol,
                      margins = c(5,5),
                      ColSideColors,
                      RowSideColors,
                      side.height.fraction=0.3,
                      cexRow = 0.2 + 1/log10(nr),
                      cexCol = 0.2 + 1/log10(nc),
                      labRow = NULL,
                      labCol = NULL,
                      key = TRUE,
                      keysize = 1.5,
                      density.info = c("none", "histogram", "density"),
                      denscol = tracecol,
                      symkey = max(x < 0, na.rm = TRUE) || symbreaks,
                      densadj = 0.25,
                      main = NULL,
                      xlab = NULL,
                      ylab = NULL,
                      lmat = NULL,
                      lhei = NULL,
                      lwid = NULL,
                      ColSideColorsSize = 1,
                      RowSideColorsSize = 1,
                      KeyValueName="Value",...){

    invalid <- function (x) {
      if (missing(x) || is.null(x) || length(x) == 0)
          return(TRUE)
      if (is.list(x))
          return(all(sapply(x, invalid)))
      else if (is.vector(x))
          return(all(is.na(x)))
      else return(FALSE)
    }

    x <- as.matrix(x)
    scale01 <- function(x, low = min(x), high = max(x)) {
        x <- (x - low)/(high - low)
        x
    }
    retval <- list()
    scale <- if (symm && missing(scale))
        "none"
    else match.arg(scale)
    dendrogram <- match.arg(dendrogram)
    trace <- match.arg(trace)
    density.info <- match.arg(density.info)
    if (length(col) == 1 && is.character(col))
        col <- get(col, mode = "function")
    if (!missing(breaks) && (scale != "none"))
        warning("Using scale=\"row\" or scale=\"column\" when breaks are",
            "specified can produce unpredictable results.", "Please consider using only one or the other.")
    if (is.null(Rowv) || is.na(Rowv))
        Rowv <- FALSE
    if (is.null(Colv) || is.na(Colv))
        Colv <- FALSE
    else if (Colv == "Rowv" && !isTRUE(Rowv))
        Colv <- FALSE
    if (length(di <- dim(x)) != 2 || !is.numeric(x))
        stop("`x' must be a numeric matrix")
    nr <- di[1]
    nc <- di[2]
    if (nr <= 1 || nc <= 1)
        stop("`x' must have at least 2 rows and 2 columns")
    if (!is.numeric(margins) || length(margins) != 2)
        stop("`margins' must be a numeric vector of length 2")
    if (missing(cellnote))
        cellnote <- matrix("", ncol = ncol(x), nrow = nrow(x))
    if (!inherits(Rowv, "dendrogram")) {
        if (((!isTRUE(Rowv)) || (is.null(Rowv))) && (dendrogram %in%
            c("both", "row"))) {
            if (is.logical(Colv) && (Colv))
                dendrogram <- "column"
            else dedrogram <- "none"
            warning("Discrepancy: Rowv is FALSE, while dendrogram is `",
                dendrogram, "'. Omitting row dendogram.")
        }
    }
    if (!inherits(Colv, "dendrogram")) {
        if (((!isTRUE(Colv)) || (is.null(Colv))) && (dendrogram %in%
            c("both", "column"))) {
            if (is.logical(Rowv) && (Rowv))
                dendrogram <- "row"
            else dendrogram <- "none"
            warning("Discrepancy: Colv is FALSE, while dendrogram is `",
                dendrogram, "'. Omitting column dendogram.")
        }
    }
    if (inherits(Rowv, "dendrogram")) {
        ddr <- Rowv
        rowInd <- order.dendrogram(ddr)
    }
    else if (is.integer(Rowv)) {
        hcr <- hclustfun(distfun(x))
        ddr <- as.dendrogram(hcr)
        ddr <- reorder(ddr, Rowv)
        rowInd <- order.dendrogram(ddr)
        if (nr != length(rowInd))
            stop("row dendrogram ordering gave index of wrong length")
    }
    else if (isTRUE(Rowv)) {
        Rowv <- rowMeans(x, na.rm = na.rm)
        hcr <- hclustfun(distfun(x))
        ddr <- as.dendrogram(hcr)
        ddr <- reorder(ddr, Rowv)
        rowInd <- order.dendrogram(ddr)
        if (nr != length(rowInd))
            stop("row dendrogram ordering gave index of wrong length")
    }
    else {
        rowInd <- nr:1
    }
    if (inherits(Colv, "dendrogram")) {
        ddc <- Colv
        colInd <- order.dendrogram(ddc)
    }
    else if (identical(Colv, "Rowv")) {
        if (nr != nc)
            stop("Colv = \"Rowv\" but nrow(x) != ncol(x)")
        if (exists("ddr")) {
            ddc <- ddr
            colInd <- order.dendrogram(ddc)
        }
        else colInd <- rowInd
    }
    else if (is.integer(Colv)) {
        hcc <- hclustfun(distfun(if (symm)
            x
        else t(x)))
        ddc <- as.dendrogram(hcc)
        ddc <- reorder(ddc, Colv)
        colInd <- order.dendrogram(ddc)
        if (nc != length(colInd))
            stop("column dendrogram ordering gave index of wrong length")
    }
    else if (isTRUE(Colv)) {
        Colv <- colMeans(x, na.rm = na.rm)
        hcc <- hclustfun(distfun(if (symm)
            x
        else t(x)))
        ddc <- as.dendrogram(hcc)
        ddc <- reorder(ddc, Colv)
        colInd <- order.dendrogram(ddc)
        if (nc != length(colInd))
            stop("column dendrogram ordering gave index of wrong length")
    }
    else {
        colInd <- 1:nc
    }
    retval$rowInd <- rowInd
    retval$colInd <- colInd
    retval$call <- match.call()
    x <- x[rowInd, colInd]
    x.unscaled <- x
    cellnote <- cellnote[rowInd, colInd]
    if (is.null(labRow))
        labRow <- if (is.null(rownames(x)))
            (1:nr)[rowInd]
        else rownames(x)
    else labRow <- labRow[rowInd]
    if (is.null(labCol))
        labCol <- if (is.null(colnames(x)))
            (1:nc)[colInd]
        else colnames(x)
    else labCol <- labCol[colInd]
    if (scale == "row") {
        retval$rowMeans <- rm <- rowMeans(x, na.rm = na.rm)
        x <- sweep(x, 1, rm)
        retval$rowSDs <- sx <- apply(x, 1, sd, na.rm = na.rm)
        x <- sweep(x, 1, sx, "/")
    }
    else if (scale == "column") {
        retval$colMeans <- rm <- colMeans(x, na.rm = na.rm)
        x <- sweep(x, 2, rm)
        retval$colSDs <- sx <- apply(x, 2, sd, na.rm = na.rm)
        x <- sweep(x, 2, sx, "/")
    }
    if (missing(breaks) || is.null(breaks) || length(breaks) < 1) {
        if (missing(col) || is.function(col))
            breaks <- 16
        else breaks <- length(col) + 1
    }
    if (length(breaks) == 1) {
        if (!symbreaks)
            breaks <- seq(min(x, na.rm = na.rm), max(x, na.rm = na.rm),
                length = breaks)
        else {
            extreme <- max(abs(x), na.rm = TRUE)
            breaks <- seq(-extreme, extreme, length = breaks)
        }
    }
    nbr <- length(breaks)
    ncol <- length(breaks) - 1
    if (class(col) == "function")
        col <- col(ncol)
    min.breaks <- min(breaks)
    max.breaks <- max(breaks)
    x[x < min.breaks] <- min.breaks
    x[x > max.breaks] <- max.breaks
    if (missing(lhei) || is.null(lhei))
        lhei <- c(keysize, 4)
    if (missing(lwid) || is.null(lwid))
        lwid <- c(keysize, 4)
    if (missing(lmat) || is.null(lmat)) {
        lmat <- rbind(4:3, 2:1)

        if (!missing(ColSideColors)) {
           #if (!is.matrix(ColSideColors))
           #stop("'ColSideColors' must be a matrix")
            if (!is.character(ColSideColors) || nrow(ColSideColors) != nc)
                stop("'ColSideColors' must be a matrix of nrow(x) rows")
            lmat <- rbind(lmat[1, ] + 1, c(NA, 1), lmat[2, ] + 1)
            #lhei <- c(lhei[1], 0.2, lhei[2])
             lhei=c(lhei[1], side.height.fraction*ColSideColorsSize/2, lhei[2])
        }

        if (!missing(RowSideColors)) {
            #if (!is.matrix(RowSideColors))
            #stop("'RowSideColors' must be a matrix")
            if (!is.character(RowSideColors) || ncol(RowSideColors) != nr)
                stop("'RowSideColors' must be a matrix of ncol(x) columns")
            lmat <- cbind(lmat[, 1] + 1, c(rep(NA, nrow(lmat) - 1), 1), lmat[,2] + 1)
            #lwid <- c(lwid[1], 0.2, lwid[2])
            lwid <- c(lwid[1], side.height.fraction*RowSideColorsSize/2, lwid[2])
        }
        lmat[is.na(lmat)] <- 0
    }

    if (length(lhei) != nrow(lmat))
        stop("lhei must have length = nrow(lmat) = ", nrow(lmat))
    if (length(lwid) != ncol(lmat))
        stop("lwid must have length = ncol(lmat) =", ncol(lmat))
    op <- par(no.readonly = TRUE)
    on.exit(par(op))

    layout(lmat, widths = lwid, heights = lhei, respect = FALSE)

    if (!missing(RowSideColors)) {
        if (!is.matrix(RowSideColors)){
                par(mar = c(margins[1], 0, 0, 0.5))
                image(rbind(1:nr), col = RowSideColors[rowInd], axes = FALSE)
        } else {
            par(mar = c(margins[1], 0, 0, 0.5))
            rsc = t(RowSideColors[,rowInd, drop=F])
            rsc.colors = matrix()
            rsc.names = names(table(rsc))
            rsc.i = 1
            for (rsc.name in rsc.names) {
                rsc.colors[rsc.i] = rsc.name
                rsc[rsc == rsc.name] = rsc.i
                rsc.i = rsc.i + 1
            }
            rsc = matrix(as.numeric(rsc), nrow = dim(rsc)[1])
            image(t(rsc), col = as.vector(rsc.colors), axes = FALSE)
            if (length(rownames(RowSideColors)) > 0) {
                axis(1, 0:(dim(rsc)[2] - 1)/max(1,(dim(rsc)[2] - 1)), rownames(RowSideColors), las = 2, tick = FALSE)
            }
        }
    }

    if (!missing(ColSideColors)) {

        if (!is.matrix(ColSideColors)){
            par(mar = c(0.5, 0, 0, margins[2]))
            image(cbind(1:nc), col = ColSideColors[colInd], axes = FALSE)
        } else {
            par(mar = c(0.5, 0, 0, margins[2]))
            csc = ColSideColors[colInd, , drop=F]
            csc.colors = matrix()
            csc.names = names(table(csc))
            csc.i = 1
            for (csc.name in csc.names) {
                csc.colors[csc.i] = csc.name
                csc[csc == csc.name] = csc.i
                csc.i = csc.i + 1
            }
            csc = matrix(as.numeric(csc), nrow = dim(csc)[1])
            image(csc, col = as.vector(csc.colors), axes = FALSE)
            if (length(colnames(ColSideColors)) > 0) {
                axis(2, 0:(dim(csc)[2] - 1)/max(1,(dim(csc)[2] - 1)), colnames(ColSideColors), las = 2, tick = FALSE)
            }
        }
    }

    par(mar = c(margins[1], 0, 0, margins[2]))
    x <- t(x)
    cellnote <- t(cellnote)
    if (revC) {
        iy <- nr:1
        if (exists("ddr"))
            ddr <- rev(ddr)
        x <- x[, iy]
        cellnote <- cellnote[, iy]
    }
    else iy <- 1:nr
    image(1:nc, 1:nr, x, xlim = 0.5 + c(0, nc), ylim = 0.5 + c(0, nr), axes = FALSE, xlab = "", ylab = "", col = col, breaks = breaks, ...)
    retval$carpet <- x
    if (exists("ddr"))
        retval$rowDendrogram <- ddr
    if (exists("ddc"))
        retval$colDendrogram <- ddc
    retval$breaks <- breaks
    retval$col <- col
    if (!invalid(na.color) & any(is.na(x))) { # load library(gplots)
        mmat <- ifelse(is.na(x), 1, NA)
        image(1:nc, 1:nr, mmat, axes = FALSE, xlab = "", ylab = "",
            col = na.color, add = TRUE)
    }
    axis(1, 1:nc, labels = labCol, las = 2, line = -0.5, tick = 0,
        cex.axis = cexCol)
    if (!is.null(xlab))
        mtext(xlab, side = 1, line = margins[1] - 1.25)
    axis(4, iy, labels = labRow, las = 2, line = -0.5, tick = 0,
        cex.axis = cexRow)
    if (!is.null(ylab))
        mtext(ylab, side = 4, line = margins[2] - 1.25)
    if (!missing(add.expr))
        eval(substitute(add.expr))
    if (!missing(colsep))
        for (csep in colsep) rect(xleft = csep + 0.5, ybottom = rep(0, length(csep)), xright = csep + 0.5 + sepwidth[1], ytop = rep(ncol(x) + 1, csep), lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
    if (!missing(rowsep))
        for (rsep in rowsep) rect(xleft = 0, ybottom = (ncol(x) + 1 - rsep) - 0.5, xright = nrow(x) + 1, ytop = (ncol(x) + 1 - rsep) - 0.5 - sepwidth[2], lty = 1, lwd = 1, col = sepcolor, border = sepcolor)
    min.scale <- min(breaks)
    max.scale <- max(breaks)
    x.scaled <- scale01(t(x), min.scale, max.scale)
    if (trace %in% c("both", "column")) {
        retval$vline <- vline
        vline.vals <- scale01(vline, min.scale, max.scale)
        for (i in colInd) {
            if (!is.null(vline)) {
                abline(v = i - 0.5 + vline.vals, col = linecol,
                  lty = 2)
            }
            xv <- rep(i, nrow(x.scaled)) + x.scaled[, i] - 0.5
            xv <- c(xv[1], xv)
            yv <- 1:length(xv) - 0.5
            lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
        }
    }
    if (trace %in% c("both", "row")) {
        retval$hline <- hline
        hline.vals <- scale01(hline, min.scale, max.scale)
        for (i in rowInd) {
            if (!is.null(hline)) {
                abline(h = i + hline, col = linecol, lty = 2)
            }
            yv <- rep(i, ncol(x.scaled)) + x.scaled[i, ] - 0.5
            yv <- rev(c(yv[1], yv))
            xv <- length(yv):1 - 0.5
            lines(x = xv, y = yv, lwd = 1, col = tracecol, type = "s")
        }
    }
    if (!missing(cellnote))
        text(x = c(row(cellnote)), y = c(col(cellnote)), labels = c(cellnote),
            col = notecol, cex = notecex)
    par(mar = c(margins[1], 0, 0, 0))
    if (dendrogram %in% c("both", "row")) {
        plot(ddr, horiz = TRUE, axes = FALSE, yaxs = "i", leaflab = "none")
    }
    else plot.new()
    par(mar = c(0, 0, if (!is.null(main)) 5 else 0, margins[2]))
    if (dendrogram %in% c("both", "column")) {
        plot(ddc, axes = FALSE, xaxs = "i", leaflab = "none")
    }
    else plot.new()
    if (!is.null(main))
        title(main, cex.main = 1.5 * op[["cex.main"]])
    if (key) {
        par(mar = c(5, 4, 2, 1), cex = 0.75)
        tmpbreaks <- breaks
        if (symkey) {
            max.raw <- max(abs(c(x, breaks)), na.rm = TRUE)
            min.raw <- -max.raw
            tmpbreaks[1] <- -max(abs(x), na.rm = TRUE)
            tmpbreaks[length(tmpbreaks)] <- max(abs(x), na.rm = TRUE)
        }
        else {
            min.raw <- min(x, na.rm = TRUE)
            max.raw <- max(x, na.rm = TRUE)
        }

        z <- seq(min.raw, max.raw, length = length(col))
        image(z = matrix(z, ncol = 1), col = col, breaks = tmpbreaks,
            xaxt = "n", yaxt = "n")
        par(usr = c(0, 1, 0, 1))
        lv <- pretty(breaks)
        xv <- scale01(as.numeric(lv), min.raw, max.raw)
        axis(1, at = xv, labels = lv)
        if (scale == "row")
            mtext(side = 1, "Row Z-Score", line = 2)
        else if (scale == "column")
            mtext(side = 1, "Column Z-Score", line = 2)
        else mtext(side = 1, KeyValueName, line = 2)
        if (density.info == "density") {
            dens <- density(x, adjust = densadj, na.rm = TRUE)
            omit <- dens$x < min(breaks) | dens$x > max(breaks)
            dens$x <- dens$x[-omit]
            dens$y <- dens$y[-omit]
            dens$x <- scale01(dens$x, min.raw, max.raw)
            lines(dens$x, dens$y/max(dens$y) * 0.95, col = denscol,
                lwd = 1)
            axis(2, at = pretty(dens$y)/max(dens$y) * 0.95, pretty(dens$y))
            title("Color Key\nand Density Plot")
            par(cex = 0.5)
            mtext(side = 2, "Density", line = 2)
        }
        else if (density.info == "histogram") {
            h <- hist(x, plot = FALSE, breaks = breaks)
            hx <- scale01(breaks, min.raw, max.raw)
            hy <- c(h$counts, h$counts[length(h$counts)])
            lines(hx, hy/max(hy) * 0.95, lwd = 1, type = "s",
                col = denscol)
            axis(2, at = pretty(hy)/max(hy) * 0.95, pretty(hy))
            title("Color Key\nand Histogram")
            par(cex = 0.5)
            mtext(side = 2, "Count", line = 2)
        }
        else title("Color Key")
    }
    else plot.new()
    retval$colorTable <- data.frame(low = retval$breaks[-length(retval$breaks)],
        high = retval$breaks[-1], color = retval$col)
    invisible(retval)
}

myclust <- function(c)
{
  hclust(c,method="average")
}

na.dist <- function(x,method="euclidian",...) 
{
  t.dist <- dist(x,...)
  t.dist <- as.matrix(t.dist)
  t.limit <- 1.1*max(t.dist,na.rm=T)
  t.dist[is.na(t.dist)] <- t.limit
  t.dist <- as.dist(t.dist)
  return(t.dist)
}

Figure Three

A “heatmap” of physico-chemical properties, in vitro TK parameters (Wetmore, et al., 2013), and TK parameters estimated from in vivo plasma concentration. Rows (chemicals) are clustered by Euclidean distance so that adjacent rows are more similar to each other. Each column (chemical properties) was scaled by the standard deviation of the column and the mean value was subtracted, such that a value of 0 corresponds to the mean and values of -1 or 1 correspond to values one standard deviation above or below the chemicals, respectively. In some cases, TK parameters could not be estimated (e.g., no oral data available for estimating Fbio and kgutabs). Fraction of the compound that is neutral, positively, or negatively ionized at pH 7.4 is indicated by “neutral ph74”, “positive ph74” and “negative ph74”. The color bar at the left-hand side indicates pharmaceuticals in grey and other chemicals in black.

physprop.matrix <- chem.invivo.PK.aggregate.data[,c("MW",
                                    "logP",
                                    "WaterSol",
                                    "Neutral.pH74",
                                    "Positive.pH74",
                                    "Negative.pH74",
                                    "Clint",
                                    "Fup",
                                    "Vdist",
                                    "kelim",
                                    "kgutabs",
                                    "Fbio")]
rnames <- chem.invivo.PK.aggregate.data$Compound
rnames[rnames=="Bensulide"] <- c("Bensulide.Joint","Bensulide.NHEERL","Bensulide.RTI")
rnames[rnames=="Propyzamide"] <- c("Propyzamide.Joint","Propyzamide.NHEERL","Propyzamide.RTI")
rownames(physprop.matrix) <- rnames

row.side.cols <- rep("Black",dim(physprop.matrix)[1])
row.side.cols[sapply(chem.invivo.PK.aggregate.data$CAS,is.pharma)] <- "Grey"

apply(physprop.matrix,2,function(x) mean(x,na.rm=T))
for (this.col in c(c(1,3,7,9:11),c(4:6,8,12))) physprop.matrix[,this.col] <- log10(10^-6+physprop.matrix[,this.col])


physprop.matrix <- apply(physprop.matrix,2,function(x) (x-mean(x,na.rm=T))/sd(x,na.rm=T))

dev.new(width=8,height=8)
main_title="in vivo Toxicokinetic Parameters"
heatmap.2(physprop.matrix, 
          hclustfun=myclust,
          Colv=FALSE,
          distfun=na.dist, 
          na.rm = TRUE, 
          scale="none",
          trace="none",
          RowSideColors=row.side.cols,
          col=brewer.pal(11,"Spectral"),
          margins=c(8,10)) 
# The following has to be adjusted manually:
rect(0.65, 0.01, 0.87, 0.78, border="blue") 
text(0.75,0.81,expression(paste("Estimated from ",italic("In Vivo")," Data")))

The original scripts used a data.table called FitData that contains a lot of stuff we don’t need to make these figures. We recreate it from the httk object chem.invivo.PK.aggregate.data:

# Use joint analysis data from both labs:
FitData <- subset(chem.invivo.PK.aggregate.data,Compound!="Bensulide"|Source=="Wambaugh et al. (2018), NHEERL/RTI")
FitData <- subset(FitData,Compound!="Propyzamide"|Source=="Wambaugh et al. (2018), NHEERL/RTI")
# Rename some columns to match original data files:
FitData$Compound.abbrev <- FitData$Abbrev
# Add a column indicating chemical type:
FitData$Chemical <- "Other"
FitData[sapply(FitData$CAS,is.pharma),"Chemical"] <- "Pharmaceutical"

Figure Four

Comparison of measured volumes of distribution (Vd) with predictions based upon in vitro data and in silico methods (Pearce, et al., 2017a; Schmitt, 2008). The solid line in each panel indicates the identity line (1:1, perfect predictions). Chemicals can be identified by their chemical abbreviation given in Table 1.

# Get rid of chemicals with Fup < LOD:
FitData.Fup <- subset(FitData,!(CAS%in%subset(chem.physical_and_invitro.data,Human.Funbound.plasma==0)$CAS))

# Rename some columns to match original data files:
FitData.Fup$SelectedVdist <- FitData.Fup$Vdist
FitData.Fup$SelectedVdist.sd <- FitData.Fup$Vdist.sd

#Predict volume of Distribution
FitData.Fup$Vdist.1comp.pred <- sapply(FitData.Fup$CAS,function(x) calc_vdist(chem.cas=x,species="Rat",default.to.human = T))
FitData.Fup$Vdist.1comp.pred.nocal <- sapply(FitData.Fup$CAS,function(x) calc_vdist(chem.cas=x,regression=F,species="Rat",default.to.human = T))


FigVdista.fit <- lm(log(SelectedVdist)~log(Vdist.1comp.pred),data=subset(FitData.Fup,!is.na(SelectedVdist)&!is.na(Vdist.1comp.pred)))
summary(FigVdista.fit)

FigVdista.fit.weighted <- lm(log(SelectedVdist)~log(Vdist.1comp.pred),data=subset(FitData.Fup,!is.na(SelectedVdist)&!is.na(Vdist.1comp.pred)),weights=1/SelectedVdist.sd^2)
summary(FigVdista.fit)

FigVdista.fit.pharm <- lm(log(SelectedVdist)~log(Vdist.1comp.pred),data=subset(FitData.Fup,Chemical!="Other"&!is.na(Vdist.1comp.pred)))
summary(FigVdista.fit.pharm)

FigVdista.fit.other <- lm(log(SelectedVdist)~log(Vdist.1comp.pred),data=subset(FitData.Fup,Chemical=="Other"&!is.na(Vdist.1comp.pred)))
summary(FigVdista.fit.other)

dim(subset(FitData.Fup,!is.na(SelectedVdist.sd)&!is.na(Vdist.1comp.pred)))[1]

FigVdista <- ggplot(data=subset(FitData.Fup,!is.na(Vdist.1comp.pred))) +
    geom_segment(color="Grey",aes(x=Vdist.1comp.pred,y=exp(log(SelectedVdist)-SelectedVdist.sd),xend=Vdist.1comp.pred,yend=exp(log(SelectedVdist)+SelectedVdist.sd)))+
    geom_text(aes_string(y="SelectedVdist",
                         x="Vdist.1comp.pred",
                         label="Compound.abbrev",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits = c(5*10^-2, 10^2)) +
   scale_x_log10(label=scientific_10,limits = c(5*10^-2, 10^2)) +
   annotation_logticks() + 
   geom_abline(slope=1, intercept=0) + 
   annotate("text", x = 1*10^-1, y = 3*10^1, label = "A",size=6)  +
   ylab(expression(paste(italic("In vivo")," estimated Volume of Distribution (L/kg)"))) + 
   xlab(expression(paste("Calibrated ",italic("In vitro")," predicted ",V[d]," (L/kg)"))) +
   theme_bw()  +
   theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))
   
FigVdistb.fit <- lm(log(SelectedVdist)~log(Vdist.1comp.pred.nocal),data=subset(FitData.Fup,!is.na(Vdist.1comp.pred)))
summary(FigVdistb.fit)

FigVdistb.fit.weighted <- lm(log(SelectedVdist)~log(Vdist.1comp.pred.nocal),data=subset(FitData.Fup,!is.na(Vdist.1comp.pred)),weights=1/SelectedVdist.sd^2)
summary(FigVdistb.fit)

FigVdistb.fit.pharm <- lm(log(SelectedVdist)~log(Vdist.1comp.pred.nocal),data=subset(FitData.Fup,Chemical!="Other"&!is.na(Vdist.1comp.pred)))
summary(FigVdistb.fit.pharm)

FigVdistb.fit.other<- lm(log(SelectedVdist)~log(Vdist.1comp.pred.nocal),data=subset(FitData.Fup,Chemical=="Other"&!is.na(Vdist.1comp.pred)))
summary(FigVdistb.fit.other)

FigVdistb <- ggplot(data=subset(FitData.Fup,!is.na(Vdist.1comp.pred.nocal))) +
   geom_segment(color="Grey",aes(x=Vdist.1comp.pred.nocal,y=exp(log(SelectedVdist)-SelectedVdist.sd),xend=Vdist.1comp.pred.nocal,yend=exp(log(SelectedVdist)+SelectedVdist.sd)))+
   geom_text(aes_string(y="SelectedVdist",
                         x="Vdist.1comp.pred.nocal",
                         label="Compound.abbrev",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits = c(5*10^-2, 10^2)) +
   scale_x_log10(label=scientific_10,limits = c(5*10^-2, 10^2)) +
   annotation_logticks() + 
   geom_abline(slope=1, intercept=0) + 
   annotate("text", x = 1*10^-1, y = 3*10^1, label = "B",size=6) +
   ylab(expression(paste(italic("In vivo")," estimated Volume of Distribution (L/kg)"))) + 
   xlab(expression(paste("Original ",italic("In vitro")," predicted ",V[d]," (L/kg)"))) +
   theme_bw()  +
   theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))



dev.new(width=10,height=6)
multiplot(FigVdista,FigVdistb,cols=2,widths=c(1.75,1.75))


Fig4a.data <- subset(FitData.Fup,!is.na(SelectedVdist)&!is.na(Vdist.1comp.pred))
Fig4a.MSE <- mean((log(Fig4a.data$Vdist.1comp.pred)-log(Fig4a.data$SelectedVdist))^2)


Fig4b.data <- subset(FitData.Fup,!is.na(SelectedVdist)&!is.na(Vdist.1comp.pred.nocal))
Fig4b.MSE <- mean((log(Fig4b.data$Vdist.1comp.pred.nocal)-log(Fig4b.data$SelectedVdist))^2)

Figure Five

Comparison of the TK clearance estimated from the in vivo data with the clearance predictions made using HTTK data. The more likely empirical TK model (either one or two-compartments, as in Figure 2) is selected for each chemical using the AIC (Akaike, 1974). A non-compartmental estimate of clearance is used if neither model is selected. Estimated standard deviation is indicated by a vertical line, and is often smaller than the plot point. The solid line indicates the identity line (1:1, perfect predictions), while the dotted and dashed lines indicate the linear regression (log-scale) trend-lines for pharmaceuticals and other chemicals, respectively. Chemicals can be identified by their chemical abbreviation given in Table 1.

#FitData.Fup$CLtot.1comp <- FitData.Fup$Vdist.1comp.meas*FitData.Fup$kelim.1comp.meas
#FitData.Fup$CLtot.1comp.sd <- (FitData.Fup$Vdist.1comp.meas.sd^2+FitData.Fup$kelim.1comp.meas.sd^2)^(1/2)
#FitData.Fup[is.na(CLtot.1comp.sd)]$CLtot.1comp.sd <- Inf

#FitData.Fup[is.na(SelectedCLtot.sd)]$SelectedClearance.sd <- Inf

FitData.Fup$SelectedCLtot <- FitData.Fup$Vdist*FitData.Fup$kelim 
FitData.Fup$SelectedCLtot.sd <- (FitData.Fup$Vdist.sd^2+FitData.Fup$kelim.sd^2)^(1/2)
FitData.Fup$CLtot.1comp.pred <- sapply(FitData.Fup$CAS,function(x) calc_total_clearance(chem.cas=x,species="Rat",default.to.human = T))

Fig.SelectedClearance.fit <- lm(log(SelectedCLtot)~log(CLtot.1comp.pred),data=subset(FitData.Fup,!is.na(SelectedCLtot.sd)))
summary(Fig.SelectedClearance.fit)
Fig.SelectedClearance.fit.weighted <- lm(log(SelectedCLtot)~log(CLtot.1comp.pred),data=subset(FitData.Fup,!is.na(SelectedCLtot.sd)),weights=1/SelectedCLtot.sd^2)
summary(Fig.SelectedClearance.fit.weighted)
Fig.SelectedClearance.fit.pharma <- lm(log(SelectedCLtot)~log(CLtot.1comp.pred),data=subset(FitData.Fup,Chemical!="Other"&!is.na(SelectedCLtot.sd)),weights=1/SelectedCLtot.sd^2)
summary(Fig.SelectedClearance.fit.pharma)
Fig.SelectedClearance.fit.other<- lm(log(SelectedCLtot)~log(CLtot.1comp.pred),data=subset(FitData.Fup,Chemical=="Other"&!is.na(SelectedCLtot.sd)),weights=1/SelectedCLtot.sd^2)
summary(Fig.SelectedClearance.fit.other)
Fig.SelectedClearance.fit.pharma <- lm(log(SelectedCLtot)~log(CLtot.1comp.pred),data=subset(FitData.Fup,Chemical!="Other"))
summary(Fig.SelectedClearance.fit.pharma)
Fig.SelectedClearance.fit.other<- lm(log(SelectedCLtot)~log(CLtot.1comp.pred),data=subset(FitData.Fup,Chemical=="Other"))
summary(Fig.SelectedClearance.fit.other)

Fig.SelectedClearance<- ggplot(data=FitData.Fup,aes(y=SelectedCLtot,x=CLtot.1comp.pred)) +
    geom_segment(data=subset(FitData.Fup,is.finite(SelectedCLtot.sd)),color="grey",aes(x=CLtot.1comp.pred,y=exp(log(SelectedCLtot)-SelectedCLtot.sd),xend=CLtot.1comp.pred,yend=exp(log(SelectedCLtot)+SelectedCLtot.sd)))+
  geom_text(aes_string(y="SelectedCLtot",
                         x="CLtot.1comp.pred",
                         label="Compound.abbrev",
                         color="Chemical"))+ 
#    geom_point(data=twocomp.data,color="White",size=2,aes(shape=Source))+ 
   scale_y_log10(label=scientific_10,limits = c(10^-4, 10^3)) +
   scale_x_log10(label=scientific_10,limits = c(10^-4, 10^3)) +
    annotation_logticks() + 
    geom_abline(slope=1, intercept=0) + 
     geom_line(aes(x = CLtot.1comp.pred, y = exp(Fig.SelectedClearance.fit.pharma$coefficients[1]+Fig.SelectedClearance.fit.pharma$coefficients[2]*log(CLtot.1comp.pred))), colour = "darkturquoise", linetype="dotted", size=1.5) +
  geom_line(aes(x = CLtot.1comp.pred, y = exp(Fig.SelectedClearance.fit.other$coefficients[1]+Fig.SelectedClearance.fit.other$coefficients[2]*log(CLtot.1comp.pred))), colour = "red", linetype="dashed", size=1.5) +
     ylab(expression(paste(italic("In vivo")," estimated clearance (mg/L/h)"))) + 
    xlab(expression(paste(italic("In vitro")," predicted clearance (mg/L/h)"))) +
    theme_bw()   +
    theme(legend.position="bottom")+
    annotate("text",x = 10^2/3/3/3, y = 3*3*10^-4,size=4, label = lm_R2(Fig.SelectedClearance.fit.pharma,prefix="Pharmaceutical:"),parse=T)+ 
    annotate("text",x = 10^2/3/3/3, y = 3*3*3*10^-5,size=4, label = lm_R2(Fig.SelectedClearance.fit.other,prefix="Other:"),parse=T)+
    theme(plot.margin = unit(c(1,1,1,1), "cm"))+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))


dev.new(width=6,height=6)
print(Fig.SelectedClearance)

paste("with",with(FitData.Fup,sum(SelectedCLtot<CLtot.1comp.pred,na.rm=T)),"chemicals over-estimated and",with(FitData.Fup,sum(SelectedCLtot>CLtot.1comp.pred,na.rm=T)),"under-estimated")

Figure Six

Distribution of Gut Absorption Rate for Environmental and Pharmaceutical Chemicals.

FitData$Selectedkgutabs <- FitData$kgutabs
# Cutoff from optimizer is 1000:
Figkgutabs <- ggplot(subset(FitData,Selectedkgutabs<999), aes(Selectedkgutabs, fill = Chemical)) +
  geom_histogram(binwidth=0.5) +
     scale_x_log10(label=scientific_10) +
     ylab("Number of Chemicals")+
     xlab("Absorption Rate from Gut (1/h)")+
     theme_bw() +
        theme(legend.position="bottom")
     
dev.new(width=6,height=6)
print(Figkgutabs) 

mean(subset(FitData,Selectedkgutabs<999)$Selectedkgutabs)
min(subset(FitData,Selectedkgutabs<999)$Selectedkgutabs)
max(subset(FitData,Selectedkgutabs<999)$Selectedkgutabs)
median(subset(FitData,Selectedkgutabs<999)$Selectedkgutabs)

Figure Seven

In panel A, the observed fraction of oral dose absorbed from the gut is compared with predictions made using Gastroplus (Simulations Plus, 2017). The solid line indicates the identity line (1:1, perfect predictions). These fractions are distrusted from nearly zero to effectively 100% (Panel B). Chemicals can be identified by their chemical abbreviation given in Table 1.

FitData$SelectedFbio <- FitData$Fbio

FigFbioa <- ggplot(data=FitData) +
    geom_text(aes_string(y="SelectedFbio",
                         x="Fbio.pred",
                         label="Compound.abbrev",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits=c(10^-3,1)) +
   scale_x_log10(label=scientific_10,limits=c(10^-3,1)) +
#    xlim(0, 1)+
#    ylim(0, 1)+
    annotate("text", x = .015/10, y = 0.55, label = "A",size=6) +
    annotation_logticks() + 
    geom_abline(slope=1, intercept=0) + 
    ylab(expression(paste(italic("In vivo")," estimated Fraction Bioavailable"))) + 
    xlab(expression(paste(italic("In silico")," predicted ",F[bio]))) +
#    scale_color_brewer(palette="Set2") + 
    theme_bw()  +
    theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))


FigFbiob <- ggplot(FitData, aes(SelectedFbio, fill = Chemical)) +
  geom_histogram(binwidth=0.5) +
    scale_x_log10(label=scientific_10,limits=c(10^-3,3)) +
    annotate("text", x = .001, y = 18, label = "B",size=6) +
    ylab("Number of Chemicals")+
    xlab(expression(paste(italic("In vivo")," estimated Fraction Bioavailable"))) + 
    theme_bw()  +
    theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),fill=guide_legend(nrow=2,byrow=TRUE))

     

dev.new(width=10,height=6)
multiplot(FigFbioa,FigFbiob,cols=2,widths=c(1.75,1.75))

summary(lm(log(SelectedFbio)~log(Fbio.pred),data=FitData))

paste("Fbio ranged from",signif(min(FitData[FitData$Chemical=="Pharmaceutical","SelectedFbio"],na.rm=T),2),"to",signif(max(FitData[FitData$Chemical=="Pharmaceutical","SelectedFbio"],na.rm=T),2),"for pharmaceutical compounds, but was observed to be as low as", signif(min(FitData[FitData$Chemical=="Other","SelectedFbio"],na.rm=T),3),"for other compounds.")

min(FitData$SelectedFbio,na.rm=T)                       

Figure Eight

Rat in vivo data were collected for diverse environmental chemicals to evaluate the predictive ability of HTTK, especially with respect to predicting steady-state serum concentration (Css). Chemicals can be identified by their chemical abbreviation given in Table 1. The abbreviations are centered at the measured and predicted values. The solid line indicates the identity line (1:1, perfect predictions). In panel A, the one-compartment model is parameterized with a predicted volume of distribution and clearance, based upon in vitro measured parameters. Fbio is assumed to be 100%. In panel B, the in vivo measured Fbio is used to reduce the amount of the oral dose absorbed in the one-compartment model, to illustrate the improvement possible if Fbio could be predicted accurately.

FitData$SelectedCss <- FitData$Css
FitData$SelectedCss.sd <- FitData$Css.sd
FitData$Css.1comp.pred <- sapply(FitData$CAS,function(x) calc_analytic_css(chem.cas=x,species="Rat",default.to.human=T))

Fig.1compCss.fit <- lm(log(SelectedCss)~log(Css.1comp.pred),data=FitData,na.rm=T)
Fig.1compCss.fit.weighted <- lm(log(SelectedCss)~log(Css.1comp.pred),data=FitData,na.rm=T,weights=1/SelectedCss.sd^2)
Fig.1compCss.fit.pharma <- lm(log(SelectedCss)~log(Css.1comp.pred),data=subset(FitData,Chemical!="Other"),na.rm=T)
Fig.1compCss.fit.other <- lm(log(SelectedCss)~log(Css.1comp.pred),data=subset(FitData,Chemical=="Other"),na.rm=T)

summary(Fig.1compCss.fit)
summary(Fig.1compCss.fit.pharma)
summary(Fig.1compCss.fit.other)


textx <- 10^-3
texty <- 1*10^1

Fig.Cssa <- ggplot(data=FitData) +
  geom_segment(color="grey",aes(x=Css.1comp.pred,y=exp(log(SelectedCss)-SelectedCss.sd),xend=Css.1comp.pred,yend=exp(log(SelectedCss)+SelectedCss.sd)))+
  geom_text(size=4,aes_string(y="SelectedCss", x="Css.1comp.pred",label="Compound.abbrev",color="Chemical"))+ 
  scale_y_log10(label=scientific_10,limits = c(5*10^-6, 110)) +
  scale_x_log10(label=scientific_10,limits = c(5*10^-6, 110)) +
  annotation_logticks() + 
  geom_abline(slope=1, intercept=0) + 
  ylab(expression(paste(italic("In vivo")," estimated ",C[ss]," (mg/L)"))) + 
 xlab(expression(paste(italic("In vitro")," predicted ",C[ss]," (mg/L)"))) +
#  scale_color_brewer(palette="Set2") + 
  theme_bw()  +
  theme(legend.position="bottom")+
    annotate("text", x = textx/15, y = 5*texty, label = "A",size=6) +
  annotate("text",x = textx, y = texty,size=6, label = lm_R2(Fig.1compCss.fit),parse=T)+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE)) 
#  annotate("text",x = textx/3, y = texty/3,size=4, label = lm_R2(Fig.1compCss.fit.weighted,prefix="Weighted:"),parse=T) 
#  annotate("text",x = textx, y = texty,size=4, label = lm_R2(Fig.1compCss.fit.pharma,prefix="Pharmaceutical:"),parse=T)
#  annotate("text",x = textx, y = texty/3,size=4, label = lm_R2(Fig.1compCss.fit.other,prefix="Other:"),parse=T) 

FitData$Css.Bio <- FitData$Css.1comp.pred*FitData$SelectedFbio
Fig.1compCssb.fit <- lm(log(SelectedCss)~log(Css.Bio),data=FitData,na.rm=T)
Fig.1compCssb.fit.weighted <- lm(log(SelectedCss)~log(Css.Bio),data=FitData,na.rm=T,weights=1/SelectedCss.sd^2)
Fig.1compCssb.fit.pharma <- lm(log(SelectedCss)~log(Css.Bio),data=subset(FitData,Chemical!="Other"),na.rm=T)
Fig.1compCssb.fit.other <- lm(log(SelectedCss)~log(Css.Bio),data=subset(FitData,Chemical=="Other"),na.rm=T)

Fig.Cssb <- ggplot(data=FitData) +
  geom_segment(color="grey",aes(x=Css.Bio,y=exp(log(SelectedCss)-SelectedCss.sd),xend=Css.Bio,yend=exp(log(SelectedCss)+SelectedCss.sd)))+
  geom_text(size=4,aes_string(y="SelectedCss", x="Css.Bio",label="Compound.abbrev",color="Chemical"))+ 
  scale_y_log10(label=scientific_10,limits = c(5*10^-6, 110)) +
  scale_x_log10(label=scientific_10,limits = c(5*10^-6, 110)) +
  annotation_logticks() + 
  geom_abline(slope=1, intercept=0) + 
  ylab(expression(paste(italic("In vivo")," estimated ",C[ss]," (mg/L)"))) + 
 xlab(expression(paste(italic("In vitro")," predicted ",C[ss]," (mg/L) Using Measured ", F[bio]))) + 
 # scale_color_brewer(palette="Set2") + 
  theme_bw()  +
  theme(legend.position="bottom")+
    annotate("text", x = textx/15, y = 5*texty, label = "B",size=6) +
  annotate("text",x = textx, y = texty,size=6, label = lm_R2(Fig.1compCssb.fit),parse=T)+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE)) 

dev.new(width=10,height=6)
multiplot(Fig.Cssa,Fig.Cssb,cols=2,widths=c(1.75,1.75))

                        

Figure Nine

Evaluation of Wambaugh et al. (2015) classification scheme for predicting errors made when using HTTK data to predict in vivo steady state plasma concentration (Css). Chemicals were placed into categories, depending upon the size of the error. “On the Order” represented the best case, where errors were within 3.2 times over or under the true value. Some chemicals were determined to be problematic due to limitations in the HTTK methods (e.g., plasma protein binding estimation) or failure to come to steady state. Wherever the chemical names overlap the vertical, gray bands, the observed errors are consistent with predicted error. Chemicals can be identified by their chemical abbreviation given in Table 1.

FitData$TriageCat <- FitData$Triage.Call
FitData$CssResidual <- log10(FitData$Css.1comp.pred/FitData$Css)
FitData[FitData$TriageCat%in%c("Does Not Reach Steady State","Plasma Binding Assay Failed"),"TriageCat"]<-"Problem"
FitData$TriageCat <- factor(FitData$TriageCat,levels=c(">10x Underestimated",">3.2x Underestimated","On the Order",">3.2x Overestimated",">10x Overestimated","Problem"))


Fig.Triage <- ggplot(data=subset(FitData,!is.na(TriageCat)&is.finite(CssResidual))) +
  geom_text(size=4,aes_string(y="CssResidual", x="TriageCat",label="Compound.abbrev",color="Chemical"))+ 
  geom_segment(aes(x = factor(">10x Underestimated",levels=levels(FitData$TriageCat)), y = log10(1/10), xend = factor(">10x Underestimated",levels=levels(FitData$TriageCat)), yend = log10(1/30)), size=3,colour = "grey") +
 # geom_segment(aes(x = factor(">3.2x Underestimated",levels=levels(FitData$TriageCat)), y = log10(1/10), xend = factor(">3.2x Underestimated",levels=levels(FitData$TriageCat)), yend = log10(1/3.2)), size=3,colour = "grey") +
  geom_segment(aes(x = factor("On the Order",levels=levels(FitData$TriageCat)), y = log10(1/3.2), xend = factor("On the Order",levels=levels(FitData$TriageCat)), yend = log10(3.2)), size=3,colour = "grey") +
  geom_segment(aes(x = factor(">3.2x Overestimated",levels=levels(FitData$TriageCat)), y = log10(3.2), xend = factor(">3.2x Overestimated",levels=levels(FitData$TriageCat)), yend = log10(10)), size=3,colour = "grey") +
  geom_segment(aes(x = factor(">10x Overestimated",levels=levels(FitData$TriageCat)), y = log10(10), xend = factor(">10x Overestimated",levels=levels(FitData$TriageCat)), yend = log10(500)), size=3,colour = "grey") +
  geom_hline(yintercept=0)+
  geom_hline(yintercept=log10(1/10),linetype="dashed")+
    geom_hline(yintercept=log10(1/3.2),linetype="dashed")+
  geom_hline(yintercept=log10(3.2),linetype="dashed")+
  geom_hline(yintercept=log10(10),linetype="dashed")+
  geom_text(size=4,aes_string(y="CssResidual", x="TriageCat",label="Compound.abbrev",color="Chemical"))+ 
  scale_y_continuous(name=expression(paste("Actual error in ",C[ss]," prediction")),breaks=c(log10(1/10),log10(1/3.2),0,log10(3.2),1),labels=c(">10x Under",">3.2x Under","On the Order",">3.2x Over",">10x Over")) + 
  xlab("Predicted Error") +
  theme_bw()  +
  theme(legend.position="bottom")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))


dev.new(width=6,height=6)
print(Fig.Triage)

                        

Figure Ten

Evaluation of the ability of in vitro HTTK data, coupled with a one-compartment model, to predict important TK statistics like the maximum plasma concentration (Cmax) for characterizing tissue exposure. A single point is plotted for each combination of chemical, dose amount, and dose route, either intravenous (iv) or per oral (po). In panel A, the one-compartment model is parameterized with a predicted volume of distribution and clearance, based upon in vitro measured parameters. Fbio is assumed to be 100%. In panel B, the in vivo measured Fbio is used to reduce the amount of the oral dose absorbed in the one-compartment model, to illustrate the improvement possible if Fbio could be predicted accurately.

PKstats <- chem.invivo.PK.summary.data
# Add a column indicating chemical type:
PKstats$Chemical <- "Other"
PKstats[sapply(PKstats$CAS,is.pharma),"Chemical"] <- "Pharmaceutical"

FigCmaxa.fit <- lm(log(Cmax)~log(Cmax.pred),data=subset(PKstats,is.finite(Cmax)))
summary(FigCmaxa.fit)
                    
FigCmaxa <- ggplot(data=subset(PKstats,is.finite(Cmax))) +
    geom_point(size=3,aes_string(y="Cmax",
                         x="Cmax.pred",
                         shape="Route",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits = c(10^-4, 10^4)) +
   scale_x_log10(label=scientific_10,limits = c(10^-4, 10^4)) +
    annotation_logticks() + 
    geom_abline(slope=1, intercept=0) + 
    ylab(expression(paste(italic("In vivo")," estimated ",C[max]))) + 
    xlab(expression(paste(italic("In vitro")," predicted ",C[max]))) +
#    scale_color_brewer(palette="Set2") +
    annotate("text", x = 10^-3, y = 300, label = "A",size=6) +
    annotate("text",x = 3*10^1, y = 10^-3, label = lm_R2(FigCmaxa.fit),parse=T,size=6) + 
    theme_bw()  +
    theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))




FigCmaxb.fit <- lm(log(Cmax)~log(Cmax.pred.Fbio),data=subset(PKstats,is.finite(Cmax)))
summary(FigCmaxb.fit)

FigCmaxb <- ggplot(data=subset(PKstats,is.finite(Cmax))) +
    geom_point(size=3,aes_string(y="Cmax",
                         x="Cmax.pred.Fbio",
                         shape="Route",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits = c(10^-4, 10^4)) +
   scale_x_log10(label=scientific_10,limits = c(10^-4, 10^4)) +
    annotation_logticks() + 
    geom_abline(slope=1, intercept=0) + 
    ylab(expression(paste(italic("In vivo")," estimated ",C[max]))) + 
    xlab(expression(paste("Predicted ", C[max], " Using Measured ", F[bio ]))) + 
    annotate("text", x = 10^-3, y = 300, label = "B",size=6) +
    annotate("text",x = 3*10^1, y = 10^-3, label = lm_R2(FigCmaxb.fit),parse=T,size=6) + 
#    scale_color_brewer(palette="Set2") + 
    theme_bw()  +
    theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))

dev.new(width=10,height=6)
multiplot(FigCmaxa,FigCmaxb,cols=2,widths=c(1.75,1.75))
                        

Figure Eleven

Evaluation of predictions for the time integrated plasma concentration (i.e., area under the curve or AUC). A single point is plotted for each combination of chemical, dose amount, and dose route, either intravenous (iv) or per oral (po). In panel A, the one-compartment model is parameterized with a predicted volume of distribution and clearance, based upon in vitro measured parameters. Fbio is assumed to be 100%. In panel B, the in vivo measured Fbio is used to reduce the amount of the oral dose absorbed in the one-compartment model, to illustrate the improvement possible if Fbio could be predicted accurately. The solid line in each panel indicates the identity line (1:1, perfect predictions).

FigAUCa.fit <- lm(log(AUC)~log(AUC.pred),data=subset(PKstats,is.finite(log(AUC))))
FigAUCb.fit <- lm(log(AUC)~log(AUC.pred.Fbio),data=subset(PKstats,is.finite(log(AUC))))
summary(FigAUCa.fit)
summary(FigAUCb.fit)
                                              
FigAUCa <- ggplot(data=subset(PKstats,is.finite(log(AUC)))) +
    geom_point(size=3,aes_string(y="AUC",
                         x="AUC.pred",
                         shape="Route",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits = c(10^-3, 10^4)) +
   scale_x_log10(label=scientific_10,limits = c(10^-3, 10^4)) +
    annotation_logticks() + 
    geom_abline(slope=1, intercept=0) + 
    ylab(expression(paste(italic("In vivo")," estimated AUC (mg*h/L)"))) + 
    xlab(expression(paste(italic("In vitro")," predicted Area under the Curve (AUC, mg*h/L)"))) +
    annotate("text", x = 10^-2, y = 3000, label = "A",size=6) +
    annotate("text",x = 3*10^1, y = 10^-2, label = lm_R2(FigAUCa.fit),parse=T,size=6) + 
    theme_bw()  +
    theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))


FigAUCb <- ggplot(data=subset(PKstats,is.finite(log(AUC)))) +
    geom_point(size=3,aes_string(y="AUC",
                         x="AUC.pred.Fbio",
                         shape="Route",
                         color="Chemical"))+ 
   scale_y_log10(label=scientific_10,limits = c(10^-3, 10^4)) +
   scale_x_log10(label=scientific_10,limits = c(10^-3, 10^4)) +
    annotation_logticks() + 
    geom_abline(slope=1, intercept=0) + 
    ylab(expression(paste(italic("In vivo")," estimated AUC (mg*h/L)"))) + 
    xlab(expression(paste("Predicted AUC (mg*h/L) Using Measured ", F[bio]))) + 
    annotate("text", x = 10^-2, y = 3000, label = "B",size=6) +
    annotate("text",x = 3*10^1, y = 10^-2, label = lm_R2(FigAUCb.fit),parse=T,size=6) + 
    theme_bw()  +
    theme(legend.position="bottom")+
    guides(shape=guide_legend(nrow=2,byrow=TRUE),color=guide_legend(nrow=2,byrow=TRUE))


dev.new(width=10,height=6)
multiplot(FigAUCa,FigAUCb,cols=2,widths=c(1.75,1.75))