The function that does the regression is lm
. To see how
it works, let’s use this dataset as an example.
library(datasets)
head(mtcars) # this prints the first 6 observations of the dataset
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
If we want to regress mpg
on a constant and
wt
, we write this.
regressionFit = lm(mpg ~ wt, data = mtcars)
We saved the return value of lm
to
regressionFit
. It is a complicated object:
str(regressionFit)
## List of 12
## $ coefficients : Named num [1:2] 37.29 -5.34
## ..- attr(*, "names")= chr [1:2] "(Intercept)" "wt"
## $ residuals : Named num [1:32] -2.28 -0.92 -2.09 1.3 -0.2 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ effects : Named num [1:32] -113.65 -29.116 -1.661 1.631 0.111 ...
## ..- attr(*, "names")= chr [1:32] "(Intercept)" "wt" "" "" ...
## $ rank : int 2
## $ fitted.values: Named num [1:32] 23.3 21.9 24.9 20.1 18.9 ...
## ..- attr(*, "names")= chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ assign : int [1:2] 0 1
## $ qr :List of 5
## ..$ qr : num [1:32, 1:2] -5.657 0.177 0.177 0.177 0.177 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:32] "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## .. .. ..$ : chr [1:2] "(Intercept)" "wt"
## .. ..- attr(*, "assign")= int [1:2] 0 1
## ..$ qraux: num [1:2] 1.18 1.05
## ..$ pivot: int [1:2] 1 2
## ..$ tol : num 1e-07
## ..$ rank : int 2
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 30
## $ xlevels : Named list()
## $ call : language lm(formula = mpg ~ wt, data = mtcars)
## $ terms :Classes 'terms', 'formula' language mpg ~ wt
## .. ..- attr(*, "variables")= language list(mpg, wt)
## .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. ..$ : chr "wt"
## .. ..- attr(*, "term.labels")= chr "wt"
## .. ..- attr(*, "order")= int 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## $ model :'data.frame': 32 obs. of 2 variables:
## ..$ mpg: num [1:32] 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## ..$ wt : num [1:32] 2.62 2.88 2.32 3.21 3.44 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language mpg ~ wt
## .. .. ..- attr(*, "variables")= language list(mpg, wt)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "mpg" "wt"
## .. .. .. .. ..$ : chr "wt"
## .. .. ..- attr(*, "term.labels")= chr "wt"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(mpg, wt)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:2] "mpg" "wt"
## - attr(*, "class")= chr "lm"
But we see familiar names such as coefficients
,
residuals
, and fitted.values
. We can access
these in the way that we access subvariables in a list.
regressionFit$coefficients
## (Intercept) wt
## 37.285126 -5.344472
regressionFit$residuals
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -2.2826106 -0.9197704 -2.0859521 1.2973499
## Hornet Sportabout Valiant Duster 360 Merc 240D
## -0.2001440 -0.6932545 -3.9053627 4.1637381
## Merc 230 Merc 280 Merc 280C Merc 450SE
## 2.3499593 0.2998560 -1.1001440 0.8668731
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## -0.0502472 -1.8830236 1.1733496 2.1032876
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 5.9810744 6.8727113 1.7461954 6.4219792
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -2.6110037 -2.9725862 -3.7268663 -3.4623553
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 2.4643670 0.3564263 0.1520430 1.2010593
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -4.5431513 -2.7809399 -3.2053627 -1.0274952
regressionFit$fitted.values
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## 23.282611 21.919770 24.885952 20.102650
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 18.900144 18.793255 18.205363 20.236262
## Merc 230 Merc 280 Merc 280C Merc 450SE
## 20.450041 18.900144 18.900144 15.533127
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 17.350247 17.083024 9.226650 8.296712
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 8.718926 25.527289 28.653805 27.478021
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## 24.111004 18.472586 18.926866 16.762355
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 16.735633 26.943574 25.847957 29.198941
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## 20.343151 22.480940 18.205363 22.427495
To see the usual results that we get from other languages, type these:
regressionFit
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
print(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Coefficients:
## (Intercept) wt
## 37.285 -5.344
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
To run a regression without a constant, do the following.
regFitWithoutConst = lm(mpg ~ -1 + wt, data=mtcars)
summary(regFitWithoutConst)
##
## Call:
## lm(formula = mpg ~ -1 + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.3018 -3.3177 0.7468 7.7538 24.1899
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## wt 5.2916 0.5932 8.921 4.55e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.27 on 31 degrees of freedom
## Multiple R-squared: 0.7197, Adjusted R-squared: 0.7106
## F-statistic: 79.58 on 1 and 31 DF, p-value: 4.553e-10
We can also add other regressors.
regressionFit = lm(mpg ~ wt + cyl + disp, data=mtcars)
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt + cyl + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4035 -1.4028 -0.4955 1.3387 6.0722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.107678 2.842426 14.462 1.62e-14 ***
## wt -3.635677 1.040138 -3.495 0.00160 **
## cyl -1.784944 0.607110 -2.940 0.00651 **
## disp 0.007473 0.011845 0.631 0.53322
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.595 on 28 degrees of freedom
## Multiple R-squared: 0.8326, Adjusted R-squared: 0.8147
## F-statistic: 46.42 on 3 and 28 DF, p-value: 5.399e-11
As we have seen in the ggplot2
tutorial, the variable
cyl
has only three values: 4
, 6,
8
. We may want to treat cyl
as a categorical
variable and not a continuous variable. To do this so that we regress
mpg
on indicators of cyl
, we use the
factor
function.
regressionFit = lm(mpg ~ wt + factor(cyl) + disp, data=mtcars)
summary(regressionFit)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl) + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5965 -1.2361 -0.4855 1.4740 5.8043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.041673 1.963039 17.341 3.66e-16 ***
## wt -3.306751 1.105083 -2.992 0.00586 **
## factor(cyl)6 -4.305559 1.464760 -2.939 0.00666 **
## factor(cyl)8 -6.322786 2.598416 -2.433 0.02186 *
## disp 0.001715 0.013481 0.127 0.89972
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.603 on 27 degrees of freedom
## Multiple R-squared: 0.8375, Adjusted R-squared: 0.8135
## F-statistic: 34.8 on 4 and 27 DF, p-value: 2.726e-10
If you want to use wt^2
as a regressor, there are two
ways to do it. One way is to create another column in the
data.frame
.
mtcars$wt2 = mtcars$wt^2 # the dataframe creates the column wt2 and assign the values.
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb wt2
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6.864400
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 8.265625
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 5.382400
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 10.336225
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 11.833600
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 11.971600
summary(lm(mpg ~ wt + wt2, data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + wt2, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.483 -1.998 -0.773 1.462 6.238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.9308 4.2113 11.856 1.21e-12 ***
## wt -13.3803 2.5140 -5.322 1.04e-05 ***
## wt2 1.1711 0.3594 3.258 0.00286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.651 on 29 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8066
## F-statistic: 65.64 on 2 and 29 DF, p-value: 1.715e-11
Another way that does not involve creating a column is the following.
summary(lm(mpg ~ wt + I(wt^2), data=mtcars))
##
## Call:
## lm(formula = mpg ~ wt + I(wt^2), data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.483 -1.998 -0.773 1.462 6.238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.9308 4.2113 11.856 1.21e-12 ***
## wt -13.3803 2.5140 -5.322 1.04e-05 ***
## I(wt^2) 1.1711 0.3594 3.258 0.00286 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.651 on 29 degrees of freedom
## Multiple R-squared: 0.8191, Adjusted R-squared: 0.8066
## F-statistic: 65.64 on 2 and 29 DF, p-value: 1.715e-11
The sums work similarly: e.g. I(cyl+disp)
.
What is the function I()
? The answer is related to what
is the nature of the first argument of lm
.
We have been omitting the label for the first argument. The first
argument of the lm
function is formula
:
lm(formula = mpg ~ wt + disp, data=mtcars)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Coefficients:
## (Intercept) wt disp
## 34.96055 -3.35083 -0.01772
formula
is a special object that interprets
“expression”. Note that we don’t need to specify
mpg ~ wt + disp
as string, in which case we need to write
"mpg ~ wt + disp"
. In this “expression”, the operators like
~
and +
work differently from the usual way.
For example, +
in the formula is not an arithmetic
operator, but an operator that says we have multiple regressors.
The function I
orders R to read operators like
+
and ^
inside I
as an arithmetic
operator. So the operator ^
in I(wt^2)
is
interpreted as a power operator. Similarly, I(cyl+disp)
interprets +
as an arithmetic operator and not the operator
that says we have two regressors cyl
and
disp
.
The lm
function computes standard errors under the
constant-variance assumption. We can account for heteroskedasticity, for
which we use the sandwich
and the lmtest
package.
Let’s first load the package.
library(sandwich)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
We use the following regression as an example.
lfit = lm(formula = mpg ~ wt + disp, data=mtcars)
summary(lfit)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
As mentioned earlier, the standard errors in the above regression are
based on the constant-variance assumption. Now we compute the
heteroskedasticity-robust standard error. We use the vcovHC
function in the sandwich
package:
vcHC = vcovHC(lfit, type = "HC0")
Note that we use the result of lm
function as the first
argument of vcovHC
.
The above command computes the variance-covariance matrix by the estimator
\[ vcHC = (X'X)^{-1}X'\Omega X(X'X)^{-1} \]
where \(X\) is the matrix of regressors and \[ \Omega = \text{diag}(u_1^2, \ldots, u_N^2) \] where \(u_i\) is the residual for individual \(i\).
For the value of the argument type
, we can instead write
HC1
, HC2
, HC3
. These are all
finite-sample corrections to the above estimator.
Now, to generate the analog of summary(lfit)
, we use the
coeftest
function in the lmtest
package:
coeftest(lfit, vcov. = vcHC)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.9605540 2.2538319 15.5116 1.41e-15 ***
## wt -3.3508253 1.0491411 -3.1939 0.003371 **
## disp -0.0177247 0.0080406 -2.2044 0.035585 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Compare the above result with the result of the lm
function. We can see that the standard errors are corrected in the
above.
summary(lfit)
##
## Call:
## lm(formula = mpg ~ wt + disp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4087 -2.3243 -0.7683 1.7721 6.3484
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.96055 2.16454 16.151 4.91e-16 ***
## wt -3.35082 1.16413 -2.878 0.00743 **
## disp -0.01773 0.00919 -1.929 0.06362 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.917 on 29 degrees of freedom
## Multiple R-squared: 0.7809, Adjusted R-squared: 0.7658
## F-statistic: 51.69 on 2 and 29 DF, p-value: 2.744e-10
We can also compute clustered standard errors using
sandwich
package. However, to do so, we need to tell
vcovHC
that which variables represent the group and the
time indices. We need the plm
package for this.
library(plm)
The plm
function in the plm
package allows
to do several kinds of panel data regressions. However, at this time, we
use plm
to do exactly what lm
does (i.e. OLS),
except that we store information on what are the group and the time
indices.
To discuss how to use plm
, we use the following dataset
as an example.
data("Grunfeld", package = "plm")
head(Grunfeld)
## firm year inv value capital
## 1 1 1935 317.6 3078.5 2.8
## 2 1 1936 391.8 4661.7 52.6
## 3 1 1937 410.6 5387.1 156.9
## 4 1 1938 257.7 2792.2 209.2
## 5 1 1939 330.8 4313.2 203.4
## 6 1 1940 461.2 4643.9 207.2
Consider the following OLS.
pfitl = lm(inv ~ value + capital, data=Grunfeld)
summary(pfitl)
##
## Call:
## lm(formula = inv ~ value + capital, data = Grunfeld)
##
## Residuals:
## Min 1Q Median 3Q Max
## -291.68 -30.01 5.30 34.83 369.45
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 9.511676 -4.491 1.21e-05 ***
## value 0.115562 0.005836 19.803 < 2e-16 ***
## capital 0.230678 0.025476 9.055 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 94.41 on 197 degrees of freedom
## Multiple R-squared: 0.8124, Adjusted R-squared: 0.8105
## F-statistic: 426.6 on 2 and 197 DF, p-value: < 2.2e-16
The following code produces exactly the same result:
pfit = plm(inv ~ value + capital, model="pooling", index="firm", data=Grunfeld)
summary(pfit)
## Pooling Model
##
## Call:
## plm(formula = inv ~ value + capital, data = Grunfeld, model = "pooling",
## index = "firm")
##
## Balanced Panel: n = 10, T = 20, N = 200
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -291.6757 -30.0137 5.3033 34.8293 369.4464
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) -42.7143694 9.5116760 -4.4907 1.207e-05 ***
## value 0.1155622 0.0058357 19.8026 < 2.2e-16 ***
## capital 0.2306785 0.0254758 9.0548 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 9359900
## Residual Sum of Squares: 1755900
## R-Squared: 0.81241
## Adj. R-Squared: 0.8105
## F-statistic: 426.576 on 2 and 197 DF, p-value: < 2.22e-16
The option model="pooling"
makes plm
to run
OLS. The option index=firm
tells plm
that the
observations are grouped according to the firm
variable.
Now we create clustered standard errors by the following code.
vcHCcluster = vcovHC(pfit, type = "HC0", cluster = "group")
Note that the value for the argument cluster
, which is
"group"
, is not the name of the variable in the dataset
Grunfeld
. The option cluster = "group"
tells
that we use the group index saved in pfit
as the
cluster.
The above command computes the estimator
\[ vcHCcluster = \left(\sum_{k=1}^K X_k'X_k\right)^{-1} \sum_{k=1}^K X_k'U_kU_k'X_k \left(\sum_{k=1}^K X_k'X_k\right)^{-1} \]
where \(X_k\) is the matrix of regressors for the \(k\)th cluster and \(U_k\) is the residual vector for the \(k\)th cluster.
To correct for standard errors, we again use the
coeftest
function.
coeftest(pfit, vcov. = vcHCcluster)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.714369 19.279431 -2.2155 0.027868 *
## value 0.115562 0.015003 7.7027 6.35e-13 ***
## capital 0.230678 0.080201 2.8763 0.004467 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For probit and logit, we use glm
. Using it is very much
similar to using lm
.
# recall:
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb wt2
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6.864400
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 8.265625
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 5.382400
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 10.336225
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 11.833600
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 11.971600
# let's run probit with some random formula.
probitFit = glm(am ~ mpg + disp, family = binomial(link="probit"), data = mtcars)
probitFit
##
## Call: glm(formula = am ~ mpg + disp, family = binomial(link = "probit"),
## data = mtcars)
##
## Coefficients:
## (Intercept) mpg disp
## -1.439509 0.104365 -0.004225
##
## Degrees of Freedom: 31 Total (i.e. Null); 29 Residual
## Null Deviance: 43.23
## Residual Deviance: 28.6 AIC: 34.6
summary(probitFit)
##
## Call:
## glm(formula = am ~ mpg + disp, family = binomial(link = "probit"),
## data = mtcars)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5240 -0.6536 -0.3211 0.5906 2.1394
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.439509 2.659683 -0.541 0.588
## mpg 0.104365 0.092554 1.128 0.259
## disp -0.004225 0.004314 -0.979 0.327
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.230 on 31 degrees of freedom
## Residual deviance: 28.601 on 29 degrees of freedom
## AIC: 34.601
##
## Number of Fisher Scoring iterations: 6
# let's run logit.
logitFit = glm(am ~ mpg + disp, family = binomial(link="logit"), data = mtcars)
logitFit
##
## Call: glm(formula = am ~ mpg + disp, family = binomial(link = "logit"),
## data = mtcars)
##
## Coefficients:
## (Intercept) mpg disp
## -2.256714 0.169978 -0.007615
##
## Degrees of Freedom: 31 Total (i.e. Null); 29 Residual
## Null Deviance: 43.23
## Residual Deviance: 28.61 AIC: 34.61
summary(logitFit)
##
## Call:
## glm(formula = am ~ mpg + disp, family = binomial(link = "logit"),
## data = mtcars)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5186 -0.6261 -0.3316 0.5960 2.1655
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.256714 4.760079 -0.474 0.635
## mpg 0.169978 0.168373 1.010 0.313
## disp -0.007615 0.007811 -0.975 0.330
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 43.230 on 31 degrees of freedom
## Residual deviance: 28.606 on 29 degrees of freedom
## AIC: 34.606
##
## Number of Fisher Scoring iterations: 5