In einem der oben von mir verlinkten Threads war das so: "Nein" war viel häufiger und es gab keine unabhängige, die beides gut getrennt hätte.
Du kannst über "ALLES AUSWÄHLEN", dann copy und paste mal Folgende Daten in R kopieren:
Code: Alles auswählen
beispiel <- dput(beispiel)
structure(list(x = c(594.827461289242, 989.85280143097, 615.4084878508,
273.077666992322, 63.7068431824446, 72.303275577724, 873.511214042082,
362.82499670051, 945.622149156407, 558.318939991295, 512.661150889471,
947.7716635447, 109.801595564932, 818.907550536096, 377.342106774449,
556.641047820449, 313.871186692268, 82.3428803123534, 779.937542276457,
193.234728416428, 655.585467582569, 435.147053329274, 197.148326551542,
994.660422205925, 446.742866421118, 31.4266642089933, 937.016558134928,
870.983954286203, 601.246283389628, 299.015456344932, 809.346275171265,
952.142385067418, 852.053971262649, 127.309735864401, 968.882349785417,
557.101008715108, 5.36444736644626, 135.763919679448, 5.84710156545043,
816.874062176794, 782.163587864488, 931.066285818815, 368.689882336184,
974.454874172807, 823.743681889027, 787.448863266036, 622.82583466731,
967.354928841814, 499.91849064827, 893.695599632338, 401.043246965855,
447.541609639302, 935.77908212319, 683.332197368145, 18.1631301529706,
165.526717668399, 495.834498899058, 426.535228034481, 992.053120629862,
888.950123917311, 345.755371265113, 705.844125011936, 908.724687062204,
676.292887190357, 639.10027127713, 149.604527046904, 601.125390036032,
272.751235868782, 457.853371044621, 222.253093263134, 254.921516869217,
653.687727171928, 471.993161132559, 160.963035654277, 272.080334601924,
712.956093018875, 613.353652181104, 132.121966220438, 70.0072532054037,
966.265012975782, 368.27027797699, 677.454084623605, 845.951730385423,
241.650385316461, 35.1509910542518, 595.240236725658, 836.191128008068,
362.249210244045, 332.188533386216, 644.73633072339, 958.615136798471,
596.203628461808, 510.280017973855, 155.264135915786, 845.851005520672,
214.380425633863, 669.873244827613, 617.75645124726, 49.9997769948095,
949.398775584996, 582.552031613886, 860.954337054864, 518.46724934876,
266.127483453602, 597.377663478255, 795.576084172353, 689.286037581041,
480.631644604728, 465.929388767108, 618.412335170433, 449.270514305681,
602.707532700151, 4.60897409357131, 994.994512526318, 406.462448416278,
117.951637133956, 869.53909532167, 540.192956570536, 881.59766793251,
37.5398390460759, 881.43396214582, 722.349669784307, 769.810296595097,
863.593708956614, 500.742103205994, 925.864024320617, 208.074670284986,
27.1055938210338, 315.793072106317, 660.112249664962, 314.69130073674,
944.461387814954, 778.970579849556, 339.431246276945, 481.646264670417,
415.994798764586, 776.333899470046, 485.427005914971, 15.29910415411,
642.451295163482, 234.829984838143, 747.025883058086, 295.607374515384,
906.218191608787, 853.324017953128, 461.963776964694, 71.3482482824475,
577.803724445403, 579.579334007576, 993.576776934788, 335.274414392188,
666.801774175838, 938.556001987308, 574.856834951788, 691.711413906887,
89.7585870698094, 866.459244629368, 501.139396801591, 223.256284138188,
412.419241620228, 886.306279571727, 495.593437226489, 672.753727762029,
282.504437956959, 817.058302927762, 246.0994545836, 369.646193925291,
235.524395015091, 935.75692595914, 415.076828328893, 137.449405388907,
914.364723023027, 347.641376079991, 852.930875727907, 142.635178752244,
359.432870987803, 245.505067519844, 45.9059465210885, 784.547902178019,
199.171734508127, 391.850244253874, 323.562244651839, 602.105905069038,
541.271679569036, 990.823732689023, 253.79104912281, 735.269893193617,
977.608161047101, 558.046241756529, 392.708159517497, 824.68925928697,
112.384460866451, 535.777904093266, 443.831584881991, 273.957212688401,
157.357535790652, 871.838073711842, 562.044856371358, 550.689758267254,
592.365285614505), y = c(727.812862722203, 638.925284612924,
614.450164837763, 52.4168505799025, 565.157752949744, 587.341380305588,
628.56707512401, 621.890645008534, 82.270042039454, 77.6426121592522,
687.094689812511, 830.303592374548, 145.909875165671, 783.530072309077,
486.450079130009, 989.265105454251, 499.104954535142, 562.260535079986,
378.257020609453, 518.637197325006, 74.2837921716273, 538.472344167531,
961.061000823975, 527.810139581561, 437.159451888874, 32.2669965680689,
37.734180688858, 47.348293941468, 160.959035623819, 469.097393332049,
805.92397483997, 280.597516102716, 754.783985204995, 521.825814852491,
288.947193184868, 17.3772014677525, 668.349529849365, 66.9759646989405,
836.386426817626, 135.343155823648, 655.878108460456, 837.947728810832,
257.647996069863, 135.995285119861, 763.488414231688, 211.996233323589,
267.96959922649, 531.831194646657, 474.116867408156, 373.443664982915,
775.349928531796, 558.203970082104, 155.236621154472, 989.418448414654,
244.44643035531, 130.378512199968, 212.681135395542, 380.004248348996,
359.621168812737, 302.662048954517, 44.3997136317194, 934.783352771774,
568.717029877007, 220.802745083347, 877.961175283417, 467.288806336001,
542.34981467016, 784.39817391336, 647.870272397995, 144.893147051334,
321.637377142906, 951.055582612753, 188.198490999639, 750.404248479754,
459.158475277945, 141.365048941225, 205.193076049909, 675.293499603868,
138.030210509896, 415.543700801209, 451.247031800449, 622.946569696069,
251.174297416583, 464.153463719413, 165.509229060262, 857.806453481317,
156.551897292957, 88.3316337130964, 893.417771905661, 300.620508380234,
752.086671069264, 401.606647996232, 448.558106087148, 159.673980204388,
144.515851279721, 149.180392036214, 514.434259850532, 492.827306268737,
616.342768538743, 447.422890691087, 55.6767152156681, 5.39630954153836,
221.834201132879, 850.963223958388, 267.346206121147, 598.600273253396,
608.599697239697, 992.158440407366, 191.189960809425, 753.390584606677,
242.387337144464, 327.452220022678, 3.53549467399716, 997.424143599346,
438.059504376724, 997.688744915649, 196.719169151038, 111.476795747876,
250.736450543627, 680.476603098214, 577.254296746105, 416.347296210006,
729.181243572384, 574.189733015373, 577.6351490058, 589.999127434567,
222.928340081125, 103.492298629135, 736.506026936695, 612.403756240383,
951.212360523641, 959.875865606591, 744.836050085723, 434.830145677552,
174.775137333199, 435.288068838418, 762.458187760785, 919.029311044142,
906.697582919151, 815.512828994542, 199.752942658961, 49.625585321337,
478.948525618762, 590.393368853256, 96.6202358249575, 950.890234904364,
162.743138615042, 921.506262617186, 127.775072818622, 662.739590508863,
784.041927428916, 764.613161794841, 672.570310533047, 115.665902383626,
851.301487535238, 617.961594834924, 836.411555996165, 169.928224757314,
10.2812200784683, 323.469731258228, 269.682724494487, 825.261356076226,
933.661737013608, 127.791483653709, 913.337606238201, 605.233947746456,
400.285166455433, 665.795303415507, 767.599277198315, 521.895700134337,
394.507517106831, 965.786833548918, 48.9855406340212, 153.308271663263,
696.839065523818, 727.785873226821, 108.379415469244, 464.568545576185,
642.125536222011, 397.110937861726, 990.430257050321, 678.708809893578,
315.318457549438, 153.769260039553, 301.251735771075, 94.5155306253582,
26.8755068536848, 310.483304318041, 742.438868619502, 960.092212306336,
415.802662726492, 522.235407261178, 67.5197287928313, 629.841805435717,
353.413787204772, 424.71465212293, 963.768790708855, 680.998486932367,
718.463884433731, 170.629765372723), z = c(FALSE, FALSE, FALSE,
FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE,
TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE,
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE)), .Names = c("x", "y", "z"), row.names = c(NA, -200L), class = "data.frame")
Das ist dann ein Datensatz `beispiel` mit zwei unabhängigen Variablen x und y und einer abhängigen z:
Wie Du siehst, sind TRUE und FALSE in deutlich ungleichem Zahlenverhältnis vorhanden. FALSE ist viel häufiger und doch erkennt rpart sehr schnell eine Abhängigkeit von x:
Auch wenn TRUE und FALSE stark unterschiedlich häufig im Datensatz vorkommen, kommt rpart ganz gut zurecht. Wichtiger ist die Frage, wie stark die Abhängigkeit von den Prädiktoren ist.