ich möchte einen for loop über mehrere numerische Variablen laufen lassen, und mit diesem Loop für jede dieser Variablen eine neue kategorielle Variable erstellen. Die Anzahl der Kategorien ist jeweils abhängig von der Anzahl der Beobachtungen > 0 der jeweiligen Variable. Bisher habe ich für jede der Variablen eine eigene if ... else Bedingung verwendet. Da es sich um mehr als 30 Variablen handelt, sollte es mit einem Loop vereinfacht werden. Die Namen meiner Variablen sind nicht numerisch, sondern characters (die Variablen heissen "ABH", "ADH", "AIK", etc.).
Die if... else Bedingungen sehen derzeit so aus:
Code: Alles auswählen
# transform ABH into categorical variable
if (nrow(subset(Step1.1, Step1.1$ABH != 0)) > 10000) {ABH_Kat <- cut(Step1.1$ABH, breaks=c(0, quantile(subset(Step1.1, Step1.1$ABH != 0)$ABH, probs=seq(from=0, to=1, by=0.1))+1),labels=c("0","1","2","3","4","5","6","7","8","9","10"), right = FALSE)} else if (nrow(subset(Step1.1, Step1.1$ABH != 0)) > 1000) {ABH_Kat <- cut(Step1.1$ABH, breaks=c(0, quantile(subset(Step1.1, Step1.1$ABH != 0)$ABH, probs=seq(from=0, to=1, by=0.25))+1),labels=c("0", "1","2","3","4"), right = FALSE)} else {ABH_Kat <- cut(Step1.1$ABH, breaks=c(0, quantile(subset(Step1.1, Step1.1$ABH != 0)$ABH, probs=seq(from=0, to=1, by=0.5))+1),labels=c("0","1","2"), right = FALSE)}
ABH_Kat[is.na(ABH_Kat)] <- 0
# transform ADH into categorical variable
if (nrow(subset(Step1.1, Step1.1$ADH != 0)) > 10000) {ADH_Kat <- cut(Step1.1$ADH, breaks=c(0, quantile(subset(Step1.1, Step1.1$ADH != 0)$ADH, probs=seq(from=0, to=1, by=0.1))+1),labels=c("0","1","2","3","4","5","6","7","8","9","10"), right = FALSE)} else if (nrow(subset(Step1.1, Step1.1$ADH != 0)) > 1000) {ADH_Kat <- cut(Step1.1$ADH, breaks=c(0, quantile(subset(Step1.1, Step1.1$ADH != 0)$ADH, probs=seq(from=0, to=1, by=0.25))+1),labels=c("0","1","2","3","4"), right = FALSE)} else {ADH_Kat <- cut(Step1.1$ADH, breaks=c(0, quantile(subset(Step1.1, Step1.1$ADH != 0)$ADH, probs=seq(from=0, to=1, by=0.5))+1),labels=c("0","1","2"), right = FALSE)}
ADH_Kat[is.na(ADH_Kat)] <- 0
# transform AIK into categorical variable
if (nrow(subset(Step1.1, Step1.1$AIK != 0)) > 10000) {AIK_Kat <- cut(Step1.1$AIK, breaks=c(0, quantile(subset(Step1.1, Step1.1$AIK != 0)$AIK, probs=seq(from=0, to=1, by=0.1))+1),labels=c("0","1","2","3","4","5","6","7","8","9","10"), right = FALSE)} else if (nrow(subset(Step1.1, Step1.1$AIK != 0)) > 1000) {AIK_Kat <- cut(Step1.1$AIK, breaks=c(0, quantile(subset(Step1.1, Step1.1$AIK != 0)$AIK, probs=seq(from=0, to=1, by=0.25))+1),labels=c("0","1","2","3","4"), right = FALSE)} else {AIK_Kat <- cut(Step1.1$AIK, breaks=c(0, quantile(subset(Step1.1, Step1.1$AIK != 0)$AIK, probs=seq(from=0, to=1, by=0.5))+1),labels=c("0","1","2"), right = FALSE)}
AIK_Kat[is.na(AIK_Kat)] <- 0
Wie kann ich daraus einen Loop machen? Das funktioniert aber leider nicht.
Probiert habe ich es so:
Code: Alles auswählen
PCGs <- c("ABH", "ADH", "AIK", "ALZ", "AST", "BSR", "CAR", "COP", "DEP", "DM1", "DM2", "EPI", "GLA", "HCH", "HIV", "hyp", "KHO", "KRE", "KRK", "MCR", "MSK", "NIE", "PAH", "PAR", "PSO", "PSY", "RHE", "SMC", "SMN", "THY", "TRA", "WAS", "ZFP", "ZNS")
for (PCG in PCGs){
if (nrow(subset(Step1.1, Step1.1$PCG != 0)) > 10000) {
Kat[PCG] <- cut(Step1.1$PCG, breaks=c(0, quantile(subset(Step1.1, Step1.1$PCG != 0)$PCG, probs=seq(from=0, to=1, by=0.1))+1),labels=c("0","1","2","3","4","5","6","7","8","9","10"), right = FALSE)
} else if (nrow(subset(Step1.1, Step1.1$PCG != 0)) > 1000) {
Kat[PCG] <- cut(Step1.1$PCG, breaks=c(0, quantile(subset(Step1.1, Step1.1$PCG != 0)$PCG, probs=seq(from=0, to=1, by=0.25))+1),labels=c("0", "1","2","3","4"), right = FALSE)
} else {
Kat[PCG] <- cut(Step1.1$PCG, breaks=c(0, quantile(subset(Step1.1, Step1.1$PCG != 0)$PCG, probs=seq(from=0, to=1, by=0.5))+1),labels=c("0","1","2"), right = FALSE)}
Kat[PCG][is.na(PCG_Kat)] <- 0
}
JuFl