24.4 自然流产和人工流产后的不育

help(infert)

head(infert)
##   education age parity induced case spontaneous stratum pooled.stratum
## 1    0-5yrs  26      6       1    1           2       1              3
## 2    0-5yrs  42      1       1    1           0       2              1
## 3    0-5yrs  39      6       2    1           0       3              4
## 4    0-5yrs  34      4       2    1           0       4              2
## 5   6-11yrs  35      3       1    1           1       5             32
## 6   6-11yrs  36      4       2    1           1       6             36
str(infert)
## 'data.frame':    248 obs. of  8 variables:
##  $ education     : Factor w/ 3 levels "0-5yrs","6-11yrs",..: 1 1 1 1 2 2 2 2 2 2 ...
##  $ age           : num  26 42 39 34 35 36 23 32 21 28 ...
##  $ parity        : num  6 1 6 4 3 4 1 2 1 2 ...
##  $ induced       : num  1 1 2 2 1 2 0 0 0 0 ...
##  $ case          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ spontaneous   : num  2 0 0 0 1 1 0 0 1 0 ...
##  $ stratum       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ pooled.stratum: num  3 1 4 2 32 36 6 22 5 19 ...

存在无序分类变量

infert_glm_1 <- glm(case ~ spontaneous + induced,
  data = infert, family = binomial()
)
summary(infert_glm_1)
## 
## Call:
## glm(formula = case ~ spontaneous + induced, family = binomial(), 
##     data = infert)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6678  -0.8360  -0.5772   0.9030   1.9362  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.7079     0.2677  -6.380 1.78e-10 ***
## spontaneous   1.1972     0.2116   5.657 1.54e-08 ***
## induced       0.4181     0.2056   2.033    0.042 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 316.17  on 247  degrees of freedom
## Residual deviance: 279.61  on 245  degrees of freedom
## AIC: 285.61
## 
## Number of Fisher Scoring iterations: 4

考虑其他潜在的因素

infert_glm_2 <- glm(case ~ age + parity + education + spontaneous + induced,
  data = infert, family = binomial()
)
summary(infert_glm_2)
## 
## Call:
## glm(formula = case ~ age + parity + education + spontaneous + 
##     induced, family = binomial(), data = infert)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7603  -0.8162  -0.4956   0.8349   2.6536  
## 
## Coefficients:
##                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.14924    1.41220  -0.814   0.4158    
## age               0.03958    0.03120   1.269   0.2046    
## parity           -0.82828    0.19649  -4.215 2.49e-05 ***
## education6-11yrs -1.04424    0.79255  -1.318   0.1876    
## education12+ yrs -1.40321    0.83416  -1.682   0.0925 .  
## spontaneous       2.04591    0.31016   6.596 4.21e-11 ***
## induced           1.28876    0.30146   4.275 1.91e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 316.17  on 247  degrees of freedom
## Residual deviance: 257.80  on 241  degrees of freedom
## AIC: 271.8
## 
## Number of Fisher Scoring iterations: 4

实际上应该使用条件逻辑回归, 调用 survival

library(survival)
infert_glm_3 <- clogit(case ~ spontaneous + induced + strata(stratum),
  data = infert
)
summary(infert_glm_3)
## Call:
## coxph(formula = Surv(rep(1, 248L), case) ~ spontaneous + induced + 
##     strata(stratum), data = infert, method = "exact")
## 
##   n= 248, number of events= 83 
## 
##               coef exp(coef) se(coef)     z Pr(>|z|)    
## spontaneous 1.9859    7.2854   0.3524 5.635 1.75e-08 ***
## induced     1.4090    4.0919   0.3607 3.906 9.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## spontaneous     7.285     0.1373     3.651    14.536
## induced         4.092     0.2444     2.018     8.298
## 
## Concordance= 0.776  (se = 0.044 )
## Likelihood ratio test= 53.15  on 2 df,   p=3e-12
## Wald test            = 31.84  on 2 df,   p=1e-07
## Score (logrank) test = 48.44  on 2 df,   p=3e-11