返回

logistic回归模型的数组R环

发布时间:2022-07-26 07:40:05 255
# git# 数据

我有以下数据:

ID <- c("A", "B", "C", "D", "E", "F")
age <- c(54, 61, 65, 55, 60, 60)
sex <- c(0, 0, 1, 1, 1, 0)
Q1 <- c(0, 0, 0, 0, 0, 0)
Q2 <- c(0, 1, 0, 0, 0, 1) 
Q3 <- c(0, 1, 1, 0, 0, 1) 
Q4 <- c(0, 1, 1, 1, 0, 1) 
Q5 <- c(0, 1, 1, 1, 0, 1)
E1 <- c(2, 1, 0, 0, 0, 0)
E2 <- c(0, 1, 2, 0, 1, 0)
E3 <- c(0, 0, 1, 0, 1, 1)
E4 <- c(1, 0, 0, 0, 0, 0)
E5 <- c(0, 0, 0, 0, 2, 2)
Sint <- c(4, 3, 4, 1, 0, 2)
surv1 <- c(1, 1, 1, 1, 1, 1)
surv2 <- c(1, 1, 0, 1, 1, 1)
surv3 <- c(1, 1, 0, 1, 1, 1)
surv4 <- c(1, 1, 0, 1, 1, 0)
surv5 <- c(1, 1, 0, 1, 0, 0)
surv6 <- c(1, 1, 0, 1, 0, 0)


dta <- data.frame(ID, age, sex, Q1, Q2, Q3, Q4, Q5, E1, E2, E3, E4, E5, Sint,
                  surv1, surv2, surv3, surv4, surv5, surv6)

我创建了以下阵列:

surv_wave <- c("surv1", "surv2", "surv3", "surv4", "surv5", "surv6")
var_num <- c("age", "sex")
Wave2 <- c("age", "sex", "Q1", "E1", "Sint")
Wave3 <- c("age", "sex", "Q1", "Q2", "E1", "E2", "Sint")
Wave4 <- c("age", "sex", "Q1", "Q2", "Q3", "E1", "E2", "E3", "Sint")
Wave5 <- c("age", "sex", "Q1", "Q2", "Q3", "Q4", "E1", "E2", "E3", "E4", "Sint")
Wave6 <- c("age", "sex", "Q1", "Q2", "Q3", "Q4", "Q5", "E1", "E2", "E3", "E4", "E5", "Sint")
Waves <- c("Wave2", "Wave3", "Wave4", "Wave5", "Wave6")

我想迭代数组,根据数组中的变量预测生存概率:

# Probability variables that will be predicted
dta$wsd2 <- NA
dta$wsd3 <- NA
dta$wsd4 <- NA
dta$wsd5 <- NA
dta$wsd6 <- NA

# vector of variables that will be predicted
wsurv_den <- c("wsd2", "wsd3", "wsd4", "wsd5", "wsd6")

# iterate all waves
for(i in 2:6) {
  
  # subset people who survived in the previous wave
  Subset <- subset(dta, dta[[surv_wave[i-1]]] == 1)
  
  # logistic regression
  f <- as.formula(
    paste(surv_wave[i], 
          paste(Waves[i], collapse = " + "),
          sep = " ~ "))
  Den_surv_s <- glm(f,  family = binomial(link = "logit"),  
                    data = Subset)
  
  # predict probabilities of survival based on logistic regression
  Den_surv_p_s <- predict(Den_surv_s, type = "response")
  
  # Add predicted values to original dataset
  dta[dta[[surv_wave[i-1]]] == 1,][[wsurv_den[i-1]]]<-Den_surv_p_s
  
}

我一直收到一条错误消息:model.frame中出错。默认值(formula=f,data=Subset,drop.unused.levels=TRUE):变量长度不同(为“Wave3”找到)

我考虑了可能的解决方案,但我没有NA值和唯一的“;Wave3“;环境中的变量是数组。我做错了什么?

特别声明:以上内容(图片及文字)均为互联网收集或者用户上传发布,本站仅提供信息存储服务!如有侵权或有涉及法律问题请联系我们。
举报
评论区(2)
按点赞数排序
用户头像