traineR Package

2019-10-04

The traineR package seeks to unify the different ways of creating predictive models and their different predictive formats. It includes methods such as K-Nearest Neighbors, Decision Trees, ADA Boosting, Extreme Gradient Boosting, Random Forest, Neural Networks, Deep Learning, Support Vector Machines, Bayesian and Logical Regression.

The main idea of the package is that all predictions can be execute using a standard syntax, also that all predictive methods can be used in the same way by default, for example, that all packages are use classification in their default invocation and all methods use a formula to determine the predictor variables (independent variables) and the response variable.

Examples:

For the following examples we will use the Puromycin dataset:

conc rate state
0.02 76 treated
0.02 47 treated
0.06 97 treated
0.06 107 treated
0.11 123 treated
0.11 139 treated
0.22 159 treated
0.22 152 treated
0.56 191 treated
0.56 201 treated
n <- seq_len(nrow(Puromycin))
.sample <- sample(n, length(n) * 0.7)
data.train <- Puromycin[.sample,]
data.test  <- Puromycin[-.sample,]

Logistic Regression

Modeling:

model <- train.glm(state~., data.train)
model
#>
#> Call:  glm(formula = state ~ ., family = binomial, data = data.train)
#>
#> Coefficients:
#> (Intercept)         conc         rate
#>     3.39624      3.42996     -0.03899
#>
#> Degrees of Freedom: 15 Total (i.e. Null);  13 Residual
#> Null Deviance:       21.93
#> Residual Deviance: 18.1  AIC: 24.1

Prediction as probability:

Note: the result is always a matrix.

prediction <- predict(model, data.test , type = "prob")
prediction
#>        treated  untreated
#> [1,] 0.6388459 0.36115413
#> [2,] 0.7355117 0.26448826
#> [3,] 0.9255903 0.07440971
#> [4,] 0.2989682 0.70103183
#> [5,] 0.6705812 0.32941877
#> [6,] 0.7226067 0.27739331
#> [7,] 0.6993258 0.30067423

Prediction as classification:

Note: the result is always a factor.

prediction <- predict(model, data.test , type = "class")
prediction
#> [1] treated   treated   treated   untreated treated   treated   treated
#> Levels: treated untreated

Confusion Matrix

mc <- confusion.matrix(data.test, prediction)
mc
#>            prediction
#> real        treated untreated
#>   treated         3         0
#>   untreated       3         1

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>            prediction
#> real        treated untreated
#>   treated         3         0
#>   untreated       3         1
#>
#> Overall Accuracy: 0.5714
#> Overall Error:    0.4286
#>
#> Category Accuracy:
#>
#>       treated    untreated
#>      1.000000     0.250000

Modeling:

model <- train.ada(state~., data.train, iter = 200)
model
#> Call:
#> ada(state ~ ., data = data.train, iter = 200)
#>
#> Loss: exponential Method: discrete   Iteration: 200
#>
#> Final Confusion Matrix for Data:
#>            Final Prediction
#> True value  treated
#>   treated         9
#>   untreated       7
#>
#> Train Error: 0.438
#>
#> Out-Of-Bag Error:  0.438  iteration= 6
#>
#> Additional Estimates of number of iterations:
#>
#> train.err1 train.kap1
#>          1          1

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>      treated untreated
#> [1,]  0.5625    0.4375
#> [2,]  0.5625    0.4375
#> [3,]  0.5625    0.4375
#> [4,]  0.5625    0.4375
#> [5,]  0.5625    0.4375
#> [6,]  0.5625    0.4375
#> [7,]  0.5625    0.4375

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#> [1] treated treated treated treated treated treated treated
#> Levels: treated untreated

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>            prediction
#> real        treated untreated
#>   treated         3         0
#>   untreated       4         0

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>            prediction
#> real        treated untreated
#>   treated         3         0
#>   untreated       4         0
#>
#> Overall Accuracy: 0.4286
#> Overall Error:    0.5714
#>
#> Category Accuracy:
#>
#>       treated    untreated
#>      1.000000     0.000000

For the following examples we will use the iris dataset:

Sepal.Length Sepal.Width Petal.Length Petal.Width Species
5.1 3.5 1.4 0.2 setosa
4.9 3.0 1.4 0.2 setosa
4.7 3.2 1.3 0.2 setosa
4.6 3.1 1.5 0.2 setosa
5.0 3.6 1.4 0.2 setosa
5.4 3.9 1.7 0.4 setosa
4.6 3.4 1.4 0.3 setosa
5.0 3.4 1.5 0.2 setosa
4.4 2.9 1.4 0.2 setosa
4.9 3.1 1.5 0.1 setosa
data("iris")
n <- seq_len(nrow(iris))
.sample <- sample(n, length(n) * 0.75)
data.train <- iris[.sample,]
data.test <- iris[-.sample,]

Decision Trees

Modeling:

model <- train.rpart(Species~., data.train)
model
#> n= 112
#>
#> node), split, n, loss, yval, (yprob)
#>       * denotes terminal node
#>
#> 1) root 112 70 virginica (0.34821429 0.27678571 0.37500000)
#>   2) Petal.Length< 2.6 39  0 setosa (1.00000000 0.00000000 0.00000000) *
#>   3) Petal.Length>=2.6 73 31 virginica (0.00000000 0.42465753 0.57534247)
#>     6) Petal.Length< 4.85 32  2 versicolor (0.00000000 0.93750000 0.06250000) *
#>     7) Petal.Length>=4.85 41  1 virginica (0.00000000 0.02439024 0.97560976) *

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>     setosa versicolor virginica
#> 4        1 0.00000000 0.0000000
#> 6        1 0.00000000 0.0000000
#> 8        1 0.00000000 0.0000000
#> 10       1 0.00000000 0.0000000
#> 15       1 0.00000000 0.0000000
#> 25       1 0.00000000 0.0000000
#> 28       1 0.00000000 0.0000000
#> 35       1 0.00000000 0.0000000
#> 40       1 0.00000000 0.0000000
#> 41       1 0.00000000 0.0000000
#> 44       1 0.00000000 0.0000000
#> 53       0 0.02439024 0.9756098
#> 54       0 0.93750000 0.0625000
#> 59       0 0.93750000 0.0625000
#> 65       0 0.93750000 0.0625000
#> 66       0 0.93750000 0.0625000
#> 69       0 0.93750000 0.0625000
#> 73       0 0.02439024 0.9756098
#> 75       0 0.93750000 0.0625000
#> 76       0 0.93750000 0.0625000
#> 77       0 0.93750000 0.0625000
#> 78       0 0.02439024 0.9756098
#> 83       0 0.93750000 0.0625000
#> 86       0 0.93750000 0.0625000
#> 88       0 0.93750000 0.0625000
#> 92       0 0.93750000 0.0625000
#> 93       0 0.93750000 0.0625000
#> 96       0 0.93750000 0.0625000
#> 97       0 0.93750000 0.0625000
#> 99       0 0.93750000 0.0625000
#> 101      0 0.02439024 0.9756098
#> 113      0 0.02439024 0.9756098
#> 120      0 0.02439024 0.9756098
#> 121      0 0.02439024 0.9756098
#> 130      0 0.02439024 0.9756098
#> 133      0 0.02439024 0.9756098
#> 139      0 0.93750000 0.0625000
#> 142      0 0.02439024 0.9756098

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     virginica
#> [13] versicolor versicolor versicolor versicolor versicolor virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  virginica  virginica  virginica  virginica
#> [37] versicolor virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         16         3
#>   virginica       0          1         7

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         16         3
#>   virginica       0          1         7
#>
#> Overall Accuracy: 0.8947
#> Overall Error:    0.1053
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.842105     0.875000

The model still supports the functions of the original package.

library(rpart.plot)
prp(model, extra = 104, branch.type = 2,
box.col = c("pink", "palegreen3", "cyan")[model$frame$yval])

Bayesian Method

Modeling:

model <- train.bayes(Species~., data.train)
model
#>
#> Naive Bayes Classifier for Discrete Predictors
#>
#> Call:
#> naiveBayes.default(x = X, y = Y, laplace = laplace)
#>
#> A-priori probabilities:
#> Y
#>     setosa versicolor  virginica
#>  0.3482143  0.2767857  0.3750000
#>
#> Conditional probabilities:
#>             Sepal.Length
#> Y                [,1]      [,2]
#>   setosa     4.989744 0.3633143
#>   versicolor 5.806452 0.4857540
#>   virginica  6.592857 0.6696897
#>
#>             Sepal.Width
#> Y                [,1]      [,2]
#>   setosa     3.423077 0.4022709
#>   versicolor 2.754839 0.3150013
#>   virginica  2.978571 0.3234989
#>
#>             Petal.Length
#> Y                [,1]      [,2]
#>   setosa     1.446154 0.1699011
#>   versicolor 4.219355 0.4621828
#>   virginica  5.573810 0.5746938
#>
#>             Petal.Width
#> Y                 [,1]       [,2]
#>   setosa     0.2435897 0.09677666
#>   versicolor 1.3032258 0.22133515
#>   virginica  2.0238095 0.26021522

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>              setosa   versicolor    virginica
#>  [1,]  1.000000e+00 1.071383e-15 6.701222e-25
#>  [2,]  1.000000e+00 4.268729e-13 5.801983e-21
#>  [3,]  1.000000e+00 5.391183e-16 6.217552e-25
#>  [4,]  1.000000e+00 6.141601e-16 2.275240e-25
#>  [5,]  1.000000e+00 1.142206e-17 1.640288e-25
#>  [6,]  1.000000e+00 1.080282e-12 1.230312e-21
#>  [7,]  1.000000e+00 5.851351e-16 9.265819e-25
#>  [8,]  1.000000e+00 2.380378e-15 1.333151e-24
#>  [9,]  1.000000e+00 7.775698e-16 9.180135e-25
#> [10,]  1.000000e+00 2.457791e-16 6.513699e-25
#> [11,]  1.000000e+00 1.728229e-09 2.278467e-17
#> [12,] 5.871962e-131 2.139680e-01 7.860320e-01
#> [13,]  2.774835e-76 9.999635e-01 3.645541e-05
#> [14,] 2.220755e-104 9.816160e-01 1.838404e-02
#> [15,]  4.334317e-61 9.999637e-01 3.627346e-05
#> [16,] 2.344627e-100 9.526729e-01 4.732711e-02
#> [17,] 6.409147e-110 9.918752e-01 8.124817e-03
#> [18,] 3.664958e-129 8.812189e-01 1.187811e-01
#> [19,]  3.229216e-90 9.974054e-01 2.594644e-03
#> [20,] 3.473957e-100 9.727787e-01 2.722126e-02
#> [21,] 3.836698e-120 8.048875e-01 1.951125e-01
#> [22,] 2.526170e-148 3.500945e-02 9.649906e-01
#> [23,]  6.105094e-68 9.999691e-01 3.087840e-05
#> [24,] 5.599902e-113 8.139719e-01 1.860281e-01
#> [25,]  3.947529e-95 9.991235e-01 8.764975e-04
#> [26,] 1.615086e-107 9.834664e-01 1.653357e-02
#> [27,]  6.343141e-72 9.999639e-01 3.613649e-05
#> [28,]  5.981069e-79 9.998411e-01 1.589346e-04
#> [29,]  7.084951e-84 9.996427e-01 3.572576e-04
#> [30,]  7.558802e-34 9.999997e-01 2.919161e-07
#> [31,] 2.052543e-275 6.921915e-10 1.000000e+00
#> [32,] 9.672565e-209 5.090873e-06 9.999949e-01
#> [33,] 1.239089e-133 9.418073e-01 5.819272e-02
#> [34,] 1.830530e-239 2.628877e-08 1.000000e+00
#> [35,] 2.270355e-192 1.331016e-04 9.998669e-01
#> [36,] 8.909747e-222 3.444607e-06 9.999966e-01
#> [37,] 1.106930e-141 1.685430e-01 8.314570e-01
#> [38,] 8.843208e-204 1.255829e-06 9.999987e-01

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     virginica
#> [13] versicolor versicolor versicolor versicolor versicolor versicolor
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  versicolor virginica  virginica  virginica
#> [37] virginica  virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          1         7

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          1         7
#>
#> Overall Accuracy: 0.9211
#> Overall Error:    0.0789
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.894737     0.875000

Random Forest

Modeling:

model <- train.randomForest(Species~., data.train)
model
#>
#> Call:
#>  randomForest(formula = Species ~ ., data = data.train, importance = TRUE)
#>                Type of random forest: classification
#>                      Number of trees: 500
#> No. of variables tried at each split: 2
#>
#>         OOB estimate of  error rate: 5.36%
#> Confusion matrix:
#>            setosa versicolor virginica class.error
#> setosa         39          0         0  0.00000000
#> versicolor      0         29         2  0.06451613
#> virginica       0          4        38  0.09523810

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>     setosa versicolor virginica
#> 4    1.000      0.000     0.000
#> 6    1.000      0.000     0.000
#> 8    1.000      0.000     0.000
#> 10   1.000      0.000     0.000
#> 15   0.926      0.074     0.000
#> 25   1.000      0.000     0.000
#> 28   1.000      0.000     0.000
#> 35   1.000      0.000     0.000
#> 40   1.000      0.000     0.000
#> 41   1.000      0.000     0.000
#> 44   1.000      0.000     0.000
#> 53   0.000      0.538     0.462
#> 54   0.000      1.000     0.000
#> 59   0.000      0.980     0.020
#> 65   0.002      0.996     0.002
#> 66   0.000      0.988     0.012
#> 69   0.000      0.900     0.100
#> 73   0.000      0.466     0.534
#> 75   0.000      0.968     0.032
#> 76   0.000      0.980     0.020
#> 77   0.000      0.722     0.278
#> 78   0.000      0.176     0.824
#> 83   0.000      1.000     0.000
#> 86   0.004      0.994     0.002
#> 88   0.000      0.916     0.084
#> 92   0.000      0.992     0.008
#> 93   0.000      0.992     0.008
#> 96   0.002      0.998     0.000
#> 97   0.002      0.998     0.000
#> 99   0.002      0.986     0.012
#> 101  0.000      0.002     0.998
#> 113  0.000      0.000     1.000
#> 120  0.000      0.608     0.392
#> 121  0.000      0.000     1.000
#> 130  0.000      0.288     0.712
#> 133  0.000      0.000     1.000
#> 139  0.000      0.488     0.512
#> 142  0.000      0.000     1.000

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     versicolor
#> [13] versicolor versicolor versicolor versicolor versicolor virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  versicolor virginica  virginica  virginica
#> [37] virginica  virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          1         7

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          1         7
#>
#> Overall Accuracy: 0.9211
#> Overall Error:    0.0789
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.894737     0.875000

The model still supports the functions of the original package.

library(randomForest)
varImpPlot(model)

K-Nearest Neighbors

Modeling:

model <- train.knn(Species~., data.train)
model
#>
#> Call:
#> kknn::train.kknn(formula = Species ~ ., data = data.train)
#>
#> Type of response variable: nominal
#> Minimal misclassification: 0.04464286
#> Best kernel: optimal
#> Best k: 6

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>       setosa versicolor  virginica
#>  [1,]      1 0.00000000 0.00000000
#>  [2,]      1 0.00000000 0.00000000
#>  [3,]      1 0.00000000 0.00000000
#>  [4,]      1 0.00000000 0.00000000
#>  [5,]      1 0.00000000 0.00000000
#>  [6,]      1 0.00000000 0.00000000
#>  [7,]      1 0.00000000 0.00000000
#>  [8,]      1 0.00000000 0.00000000
#>  [9,]      1 0.00000000 0.00000000
#> [10,]      1 0.00000000 0.00000000
#> [11,]      1 0.00000000 0.00000000
#> [12,]      0 0.79289322 0.20710678
#> [13,]      0 1.00000000 0.00000000
#> [14,]      0 0.88155533 0.11844467
#> [15,]      0 1.00000000 0.00000000
#> [16,]      0 1.00000000 0.00000000
#> [17,]      0 0.54957247 0.45042753
#> [18,]      0 0.11844467 0.88155533
#> [19,]      0 1.00000000 0.00000000
#> [20,]      0 1.00000000 0.00000000
#> [21,]      0 0.68160686 0.31839314
#> [22,]      0 0.43112780 0.56887220
#> [23,]      0 1.00000000 0.00000000
#> [24,]      0 0.91133789 0.08866211
#> [25,]      0 0.57102402 0.42897598
#> [26,]      0 0.97854845 0.02145155
#> [27,]      0 1.00000000 0.00000000
#> [28,]      0 1.00000000 0.00000000
#> [29,]      0 1.00000000 0.00000000
#> [30,]      0 1.00000000 0.00000000
#> [31,]      0 0.00000000 1.00000000
#> [32,]      0 0.00000000 1.00000000
#> [33,]      0 0.45257934 0.54742066
#> [34,]      0 0.00000000 1.00000000
#> [35,]      0 0.02145155 0.97854845
#> [36,]      0 0.00000000 1.00000000
#> [37,]      0 0.31768962 0.68231038
#> [38,]      0 0.00000000 1.00000000

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     versicolor
#> [13] versicolor versicolor versicolor versicolor versicolor virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  virginica  virginica  virginica  virginica
#> [37] virginica  virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          0         8

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          0         8
#>
#> Overall Accuracy: 0.9474
#> Overall Error:    0.0526
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.894737     1.000000

Neural Networks (nnet)

Modeling:

model <- train.nnet(Species~., data.train, size = 20)
#> # weights:  163
#> initial  value 153.617515
#> iter  10 value 28.819486
#> iter  20 value 4.924200
#> iter  30 value 3.615155
#> iter  40 value 1.854831
#> iter  50 value 0.011083
#> iter  60 value 0.001094
#> final  value 0.000061
#> converged
model
#> a 4-20-3 network with 163 weights
#> inputs: Sepal.Length Sepal.Width Petal.Length Petal.Width
#> output(s): Species
#> options were - softmax modelling

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>           setosa   versicolor     virginica
#> 4   1.000000e+00 1.546687e-56 2.336245e-198
#> 6   1.000000e+00 3.332658e-47 7.387809e-160
#> 8   1.000000e+00 8.605065e-58 4.171322e-196
#> 10  1.000000e+00 5.396825e-58 9.345036e-196
#> 15  1.000000e+00 8.739341e-47 1.322881e-160
#> 25  1.000000e+00 1.407381e-57 6.035234e-197
#> 28  1.000000e+00 7.577839e-58 2.347442e-194
#> 35  1.000000e+00 9.066583e-58 3.704699e-196
#> 40  1.000000e+00 4.940023e-58 1.283180e-195
#> 41  1.000000e+00 1.971711e-44 8.388773e-165
#> 44  1.000000e+00 9.797117e-46 1.776559e-162
#> 53  2.476404e-37 3.742841e-01  6.257159e-01
#> 54  6.584845e-13 1.000000e+00  2.519266e-43
#> 59  9.153057e-51 1.000000e+00 3.024217e-130
#> 65  8.654427e-43 1.000000e+00 2.648430e-146
#> 66  5.301057e-53 1.000000e+00 2.866997e-128
#> 69  4.478239e-37 1.609650e-03  9.983904e-01
#> 73  1.467830e-55 6.683108e-13  1.000000e+00
#> 75  2.324727e-50 1.000000e+00 1.190386e-130
#> 76  1.603854e-52 1.000000e+00 1.273821e-127
#> 77  1.927115e-24 1.000000e+00  4.748771e-19
#> 78  6.278086e-80 7.828374e-30  1.000000e+00
#> 83  7.224007e-42 1.000000e+00 1.621730e-155
#> 86  3.968306e-52 1.000000e+00 3.269074e-122
#> 88  4.987657e-24 1.000000e+00  8.733128e-20
#> 92  3.737283e-47 1.000000e+00 3.505326e-139
#> 93  1.074203e-39 1.000000e+00 9.119432e-156
#> 96  3.783578e-19 1.000000e+00 2.592248e-174
#> 97  1.769415e-21 1.000000e+00 9.181676e-173
#> 99  1.450102e-38 1.000000e+00 1.149523e-148
#> 101 3.122249e-59 1.469631e-16  1.000000e+00
#> 113 4.837125e-92 1.854271e-37  1.000000e+00
#> 120 1.817907e-56 1.814376e-13  1.000000e+00
#> 121 1.251625e-91 4.226529e-37  1.000000e+00
#> 130 2.514084e-57 5.207645e-14  1.000000e+00
#> 133 9.245415e-87 3.670472e-34  1.000000e+00
#> 139 7.890998e-52 3.090833e-09  1.000000e+00
#> 142 8.108284e-67 6.326458e-18  1.000000e+00

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     virginica
#> [13] versicolor versicolor versicolor versicolor virginica  virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  virginica  virginica  virginica  virginica
#> [37] virginica  virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         15         4
#>   virginica       0          0         8

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         15         4
#>   virginica       0          0         8
#>
#> Overall Accuracy: 0.8947
#> Overall Error:    0.1053
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.789474     1.000000

Neural Networks (neuralnet)

Modeling:

model <- train.neuralnet(Species~., data.train, hidden = c(5, 7, 6),
linear.output = FALSE, threshold = 0.01, stepmax = 1e+06)
summary(model)
#>                     Length Class      Mode
#> call                  7    -none-     call
#> response            336    -none-     logical
#> covariate           448    -none-     numeric
#> model.list            2    -none-     list
#> err.fct               1    -none-     function
#> act.fct               1    -none-     function
#> linear.output         1    -none-     logical
#> data                  5    data.frame list
#> exclude               0    -none-     NULL
#> net.result            1    -none-     list
#> weights               1    -none-     list
#> generalized.weights   1    -none-     list
#> startweights          1    -none-     list
#> result.matrix       139    -none-     numeric
#> prmdt                 4    -none-     list

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>            setosa   versicolor    virginica
#> 4    1.000000e+00 7.520966e-09 2.151651e-73
#> 6    1.000000e+00 3.186572e-09 3.872734e-74
#> 8    1.000000e+00 4.956146e-09 1.176603e-73
#> 10   1.000000e+00 1.111382e-08 7.268415e-73
#> 15   1.000000e+00 2.738011e-09 5.357574e-74
#> 25   1.000000e+00 6.052141e-09 9.680225e-74
#> 28   1.000000e+00 4.624278e-09 1.233391e-73
#> 35   1.000000e+00 1.020539e-08 6.916276e-73
#> 40   1.000000e+00 5.304083e-09 1.620110e-73
#> 41   1.000000e+00 3.747112e-09 6.508743e-74
#> 44   1.000000e+00 4.535698e-09 9.850490e-74
#> 53   3.131123e-27 1.000000e+00 1.870941e-11
#> 54   2.152842e-28 1.000000e+00 4.846579e-07
#> 59   1.048216e-26 1.000000e+00 6.407027e-16
#> 65   2.006298e-17 9.943706e-01 1.676224e-49
#> 66   1.677645e-22 1.000000e+00 1.099708e-30
#> 69  3.524108e-136 1.191678e-07 9.999999e-01
#> 73  5.622027e-136 3.895389e-03 9.960149e-01
#> 75   4.675819e-24 1.000000e+00 6.456487e-26
#> 76   5.885650e-24 1.000000e+00 3.982887e-25
#> 77   2.224245e-28 1.000000e+00 4.511630e-06
#> 78   2.168082e-88 1.304623e-01 9.996563e-01
#> 83   9.023581e-23 1.000000e+00 1.608341e-31
#> 86   1.581528e-19 9.998680e-01 2.179492e-41
#> 88  1.362631e-133 8.461852e-07 9.999987e-01
#> 92   2.411740e-26 1.000000e+00 1.204970e-17
#> 93   1.700887e-25 1.000000e+00 1.305427e-21
#> 96   5.009343e-21 1.000000e+00 1.153501e-38
#> 97   2.655415e-23 1.000000e+00 7.947050e-30
#> 99   1.082483e-16 9.967580e-01 5.176147e-51
#> 101 2.652424e-136 2.131374e-10 1.000000e+00
#> 113 2.547936e-136 8.710763e-11 1.000000e+00
#> 120 4.832810e-136 1.347726e-04 9.998669e-01
#> 121 2.276329e-136 7.082123e-12 1.000000e+00
#> 130 7.604880e-136 7.652724e-01 2.182817e-01
#> 133 2.838645e-136 9.654628e-10 1.000000e+00
#> 139 2.946490e-129 1.024946e-07 9.999999e-01
#> 142 2.265475e-136 9.694502e-13 1.000000e+00

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     versicolor
#> [13] versicolor versicolor versicolor versicolor virginica  virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] virginica  versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  virginica  virginica  versicolor virginica
#> [37] virginica  virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         15         4
#>   virginica       0          1         7

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         15         4
#>   virginica       0          1         7
#>
#> Overall Accuracy: 0.8684
#> Overall Error:    0.1316
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.789474     0.875000

Support Vector Machines

Modeling:

model <- train.svm(Species~., data.train)
model
#>
#> Call:
#> svm(formula = Species ~ ., data = data.train, probability = TRUE)
#>
#>
#> Parameters:
#>    SVM-Type:  C-classification
#>        cost:  1
#>
#> Number of Support Vectors:  43

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>          setosa  versicolor   virginica
#> 4   0.968608365 0.019105447 0.012286189
#> 6   0.968623363 0.019772682 0.011603956
#> 8   0.974328946 0.014908117 0.010762937
#> 10  0.970821088 0.017625544 0.011553368
#> 15  0.958780614 0.024078859 0.017140527
#> 25  0.968897255 0.018498886 0.012603859
#> 28  0.973350694 0.015725435 0.010923871
#> 35  0.970035447 0.018778221 0.011186332
#> 40  0.973868646 0.015324132 0.010807223
#> 41  0.974743448 0.014651355 0.010605197
#> 44  0.965648099 0.021950011 0.012401889
#> 53  0.020467167 0.743553672 0.235979162
#> 54  0.012058435 0.934863781 0.053077784
#> 59  0.016593078 0.933555711 0.049851211
#> 65  0.031766804 0.959771890 0.008461306
#> 66  0.019852828 0.948222406 0.031924766
#> 69  0.030624405 0.552459945 0.416915650
#> 73  0.018364613 0.360298305 0.621337083
#> 75  0.015741538 0.965869276 0.018389186
#> 76  0.016958438 0.944192767 0.038848795
#> 77  0.021172928 0.716342128 0.262484944
#> 78  0.014931436 0.299819051 0.685249513
#> 83  0.015327806 0.977456080 0.007216114
#> 86  0.048804186 0.919170196 0.032025618
#> 88  0.027634621 0.790585931 0.181779448
#> 92  0.014418541 0.952965013 0.032616447
#> 93  0.013014844 0.973713457 0.013271699
#> 96  0.027471974 0.965509793 0.007018232
#> 97  0.017498103 0.973630889 0.008871009
#> 99  0.053956219 0.927767038 0.018276743
#> 101 0.015500247 0.006789848 0.977709905
#> 113 0.008833892 0.009111355 0.982054753
#> 120 0.025820297 0.385502876 0.588676827
#> 121 0.009298254 0.007023832 0.983677914
#> 130 0.016066083 0.106711423 0.877222494
#> 133 0.009122860 0.002691522 0.988185619
#> 139 0.014484964 0.395409668 0.590105368
#> 142 0.011044964 0.012890575 0.976064461

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     versicolor
#> [13] versicolor versicolor versicolor versicolor versicolor virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  virginica  virginica  virginica  virginica
#> [37] virginica  virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          0         8

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         17         2
#>   virginica       0          0         8
#>
#> Overall Accuracy: 0.9474
#> Overall Error:    0.0526
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.894737     1.000000

Modeling:

model <- train.xgboost(Species~., data.train, nrounds = 79, maximize = FALSE, verbose = 0)
model
#> ##### xgb.Booster
#> raw: 61.3 Kb
#> call:
#>   xgb.train(params = params, data = train_aux, nrounds = nrounds,
#>     watchlist = watchlist, obj = obj, feval = feval, verbose = verbose,
#>     print_every_n = print_every_n, early_stopping_rounds = early_stopping_rounds,
#>     maximize = maximize, save_period = save_period, save_name = save_name,
#>     xgb_model = xgb_model, callbacks = callbacks, eval_metric = "mlogloss")
#> params (as set within xgb.train):
#>   booster = "gbtree", objective = "multi:softprob", eta = "0.3", gamma = "0", max_depth = "6", min_child_weight = "1", subsample = "1", colsample_bytree = "1", num_class = "3", eval_metric = "mlogloss", silent = "1"
#> xgb.attributes:
#>   niter
#> callbacks:
#>   cb.evaluation.log()
#> # of features: 4
#> niter: 79
#> nfeatures : 4
#> evaluation_log:
#>     iter train_mlogloss
#>        1       0.740732
#>        2       0.527475
#> ---
#>       78       0.013840
#>       79       0.013824

Prediction as probability:

prediction <- predict(model, data.test , type = "prob")
prediction
#>             setosa  versicolor    virginica
#>  [1,] 0.9937019348 0.004694931 0.0016031280
#>  [2,] 0.9937019348 0.004694931 0.0016031280
#>  [3,] 0.9937019348 0.004694931 0.0016031280
#>  [4,] 0.9937019348 0.004694931 0.0016031280
#>  [5,] 0.9809110165 0.017506495 0.0015824926
#>  [6,] 0.9937019348 0.004694931 0.0016031280
#>  [7,] 0.9937019348 0.004694931 0.0016031280
#>  [8,] 0.9937019348 0.004694931 0.0016031280
#>  [9,] 0.9937019348 0.004694931 0.0016031280
#> [10,] 0.9937019348 0.004694931 0.0016031280
#> [11,] 0.9937019348 0.004694931 0.0016031280
#> [12,] 0.0083601316 0.096496999 0.8951427937
#> [13,] 0.0058255759 0.990090132 0.0040843207
#> [14,] 0.0043704547 0.985747635 0.0098818727
#> [15,] 0.0051471349 0.992852747 0.0020001004
#> [16,] 0.0043704547 0.985747635 0.0098818727
#> [17,] 0.0036267822 0.978951156 0.0174220894
#> [18,] 0.0082334196 0.110191204 0.8815754056
#> [19,] 0.0043704547 0.985747635 0.0098818727
#> [20,] 0.0043704547 0.985747635 0.0098818727
#> [21,] 0.0053602201 0.958572268 0.0360674523
#> [22,] 0.0006793801 0.001405758 0.9979148507
#> [23,] 0.0037102066 0.993688643 0.0026012317
#> [24,] 0.0010788182 0.998502016 0.0004192127
#> [25,] 0.0045832088 0.976719618 0.0186972171
#> [26,] 0.0021105944 0.993117213 0.0047721863
#> [27,] 0.0037102066 0.993688643 0.0026012317
#> [28,] 0.0045003397 0.993750870 0.0017487663
#> [29,] 0.0045003397 0.993750870 0.0017487663
#> [30,] 0.0159253646 0.974438965 0.0096356086
#> [31,] 0.0011289724 0.002253027 0.9966179729
#> [32,] 0.0006796400 0.001023654 0.9982966781
#> [33,] 0.0121437991 0.882153332 0.1057028919
#> [34,] 0.0009805845 0.001956897 0.9970625043
#> [35,] 0.0086283339 0.067511708 0.9238599539
#> [36,] 0.0006206841 0.000934856 0.9984444976
#> [37,] 0.0104090199 0.574849725 0.4147412181
#> [38,] 0.0006794436 0.001312412 0.9980081916

Prediction as classification:

prediction <- predict(model, data.test , type = "class")
prediction
#>  [1] setosa     setosa     setosa     setosa     setosa     setosa
#>  [7] setosa     setosa     setosa     setosa     setosa     virginica
#> [13] versicolor versicolor versicolor versicolor versicolor virginica
#> [19] versicolor versicolor versicolor virginica  versicolor versicolor
#> [25] versicolor versicolor versicolor versicolor versicolor versicolor
#> [31] virginica  virginica  versicolor virginica  virginica  virginica
#> [37] versicolor virginica
#> Levels: setosa versicolor virginica

Confusion Matrix:

mc <- confusion.matrix(data.test, prediction)
mc
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         16         3
#>   virginica       0          2         6

Some Rates:

general.indexes(mc = mc)
#>
#> Confusion Matrix:
#>             prediction
#> real         setosa versicolor virginica
#>   setosa         11          0         0
#>   versicolor      0         16         3
#>   virginica       0          2         6
#>
#> Overall Accuracy: 0.8684
#> Overall Error:    0.1316
#>
#> Category Accuracy:
#>
#>        setosa   versicolor    virginica
#>      1.000000     0.842105     0.750000