Survey
* Your assessment is very important for improving the work of artificial intelligence, which forms the content of this project
* Your assessment is very important for improving the work of artificial intelligence, which forms the content of this project
Machine Learning Methods Applied to DNA Microarray Data Can Improve the Diagnosis of Cancer Eric Bair Robert Tibshirani Dept. of Statistics Stanford University Stanford, CA 94305-4065 Depts. of Health, Research, & Policy, and Statistics Stanford University Stanford, CA 94305-4065 [email protected] [email protected] ABSTRACT !#"$%&%'" % ')( *+, -'.%/ %*02130&$'4)5 %"67'.%%*8. 9' :+;<=%'=91> / &'.0?&@8%'" 0%ABB>+&C1*0 %D$EF(A &% && (A G H2%'. '. :2IKJMLONP0!6. ' QR4)%!G '.%8S* AS'.*%T '.SUV&C1 '.'.% W*0&"6 *K=AS' 3' =XY& 97( ' Q)"65> T'.S &'.'Z[S*%*\S'.B] 9, '.S!8(A &%)XY& ' :^#4 "6!Q6%&6. 2#<1% *%* '8P*0( +$S8>&[S '2 G *0%"6&*%E[&4_A'.&" / % ' Q9%(#3H $"6 9%A3*7!%*%%(K9 *0'` [ %*a4)&E11*0%P7'.S!V:)b]2 '.&%>2"6 *a19 &/ S*%*0 dc 'Z='.Se6&] 9. '.fP3=3 'g'.S '.'Z/ ,S*%*& & = *%%%*%*%8 *% " 9(A&&%)XY&& '%8 & 1% 9' :h0'19 S 'a`1> 90 *9#> Ai#1>4j/ !.[S*k9A*Y[K(AA'.%(73F. %( !: Keywords NP%&A. ' Q>'.Se6&+ &$. 0' Q3&*'.'.l30 1. OVERVIEW b\ TE13% 970'70 (AA'.B4)%T &Qj" %AS' *%%% * 1&&'U]S'. WB'.'. '.'5V%'.eW 8& 'Z'.0'5 3 +%F)13% 9:h^M4 "A&Q '.1%MS&AS' " & ' %ml3 *0kQaAS8%*%%<]U&&%8%'.e]) 0%< %'-&C$. *F*%0Y:jRS 'j3# 11> #0%'Z%( S%'.3*% S3&M8%& '. A1>87P3"68'Z0 *%*]XY& 9M!XY &' PM13% 9: ;n5 'P*% (@> oe$4)\35 !5%'UD(A &%]%'. '.A: $S' QY%K0'M A *P>&*0% "A53# '.8XY&& 'M0U &*0%0 *jAS A7M !D>+&C1*%m9]XY& ' %WG(A&&%]1 l*0G =VS:OpM, .S3&*%9Q#S9%* & 9*9Q3ASK%*%57%&&*5A'.&"6g( &%=7 e6&S1 H8S4j'#!C.& *7*0%Y: %'H%'h3 (A%(Q94i "6&Q4)2j "6&$h>IKJMLB%&A./ ' :5NP0!6. '2 m'.%2S* &AS'.*E '.S!C1 'Z/ '.% F*% "A *%'i >AS' 3'`k(A 'i%7 (6%'.5:H$S' Q6& "6#K %*0<F8! &)%XY! &'j>&4i 7S ' *% S*0g*0&"6 *q:0'g%'%*%*0S'Z.E0Era%(ASPsA:2pM3&K %& '. A1> Q94iM1> 'hY*%1A7# 11>h>j0 9/ *q:H^M4 "A&Q(A &C1 '.'.%A+1l3*%%(8&"6 *%'j-4i S j<1>&')g !S3 *%*P%'Z%&#)KA*% S*0-*% "6 *q: %*0<20 9,7'.S!F'.S( S1'i3 'j%1>. 9j%1*%/ SIGKDD Explorations. ra%(ASKsAtaIMJKL\NP%&A. '` 0&$2XY& & 'h>&4i S'3PV P& ! *%GS&+m%&A'.&A1>A:up#'Z/ %(# A$"6 9%A *9%& '. A10 3*'.%' Q H*%1A7& *%*0'h% ( S1'hLT 8v] 11> hM>` 9% *q:`NP%&6. 'H3 *'.%' '.4)'g32XY& 9=(A '2F &%"6F ]%3&%"6%] '. 4iU( S1' Qa0%0(53=&V 1&'. 92%'Z0&8%'.'. '.S1> ' : 0 '-[ #=0(A '.%' P. 9- H !:#wS11>A'. A7'.S<1> ) &8%'2*%%e6 *]P& 'Z'.0?&F4)& '2 / &K'.S<1>%'# :M=13% 9'-4)F"6=7%(AU%'.e5 &'Z '.%'24 S*0D ]G>7.@ (A( '.'.%"6 *9QH4)&' 8 &g130&$'M AS*G>2( 0"A ]F*% '.'g0$" '.%"6=. 9 x 8+. 9=*0*y!:U;n!F%'=P4 ]U0'Z%(AS%'. >&<4 7&'.M'.S1> ' Q$*%*k1% 9'i4iAS*07 =>M( %"6 7(A( '.'.%"6+. 9:P^#4 "6!Qa%'=0'=%( *]S3 '../ *%AQ$> S'.M&S. 9H. 9'h[j &Q$'.S!F 'i'.S(A!. R! & 199Q"6h&C$. *M'.&"6&H'.h&XY &' : x ;<Mq&Q '.Ai &H130&$'h"Aj%8 'H- '.S*`> &1$9: y ;<4j S*02'.S& '.'Z[S*%*%20 9,gj1% 9'4)M%( 8%'.e Y! 'Z '.%'H 2kQA4j&AS*02( %"6 zj11 10 . 9a4)%*%'.13%(K&h1% 9'R, d`C0 S''.0 &XY !'))'.S!P. 94iAS*0P&$%*{: %'j%'j '.'. 90 *%*+8 *0'.'.%l% U1A*% 5:H|M%"6 +8$S8>& [S ' x ( !C1 '.'.%A*% "A *%'y!Q 4i4)0'.=M1%&4)0 1># !#%'j1 '. 9)%581% 9:`N5 9F7 %M*%/ %(#19 S 'R3"6H> 8 "6&*0 1>[ %'R1>H 31A*0&5: x w&AQ, -&C 1*%AQR} ~3n: y pM[.S3 *9Qh '.+&C0'Z%(]7 %+*%%(G19 S&' j>K &*F 11*%%F=%&6.F$:HK$S8>& #[S '7%'&C$. *D*0(AU 13 @V+S2>&2 A'.&" %A' QRS'.%(+A'ZM7 %=*%0(P19 S 'M q%*{:MNP "6&QYM%'#01>. 9)0 9,+F0 9%P4)0 Volume 5,Issue 2 - Page 48 ( &'=2>&'ZK1 %& 'M HS #1>A:MpM'.%(8&C$/ 1 '.'.%AG*% "6 *`AS'3'K iXY& 9M(A 'M77 e687/ (AA'.%'8%'g1&% *H4)]&C%'Z%(+&!A*% ( x 3V7 "6&#>['.%*%y!:#;<h4gG0 9%U'.7 *%*'.S'.!M`1&/ %&%"6(A ' Qa4i AS*ES'.&'.(A '=+ %'.F 9%>$% ' '.S *%)[%2SA'Z%%(:L#*&%"6 *9Q9`7 2>)1>A'.'./ *%2P "6 *% 1E-JKL)/< '.V(AA'Z% 'Z'KS'.0(PHi/<i ig: v` PR '.M 9S ' Q$4&"6&Q +A*%F>M 11*%0 F8 &*%"6 *]'.7*%*`$S8>&Kj(A& ' :+$S' QR%g%'g%1> .$ 0 9%74)%!P(A '-& '.'.71>!.[ '.S!P8 *0'.'.%/ l% : 2. PAM: A TOOL FOR CLASSIFYING TUMORS BASED ON MICROARRAY DATA - &$. 1'=G *0 '.'.BS '=S'.%(G%&6. B "62S'.E'Z0'Z%*i&$'8} $H$h` E.%l 0 *` S * &4i e$'} {:=bm8 '.!0> G *&%"6&$53M1>&./ , '-4 *%*a V4)08" %&<5 H1A*% ' :);n-0'M *%'.F 'Z5 S3&'Z P +%9&1!: YLMN0'g '.GAD7 !09Se$4)E 'Vc< 'Z='.Se6 & 9.A0' : fubm2%*%*%S'Z.S0*%Gi%'&$G9G1/ 1*%(5UP%&6. mP'.&g)'.7 *%*hASE*0S *%* S ' x w$Mvj )y2%*09$z} {:VE'.&F '.%'Z' i '.S& 9'#H8( =&C1&'.'.0 E*% "A *a A7(A ' A +13% 9' :r SgXY& 9K1> 'M iS '-4&2&1/ &'. 9kt)v`Se$.#*1A7 x vay!QY4)%(7'&A7 x hbowy!Q S *0 'Z 7 x JKvy!Qa3G YA86A'&A7 x KNUwy!:a S ')%"$0 '.K13% 9'%$8Ag. %%(2'. ') P &'Z '.&' :pM'.%(-&S *9&4i e$' Q&M *0 '.'.l3gh 'Z / '.!"A0 'j4)]s 6 S&9:` -$&*kS'.P 2(A& ' : %'. 9&a*q:j}sAl'ZR 3*?&K%'Rj'.&RS'.%()j ./ &'Zg 9.A0V *0 '.'.l3&: x rgP '.&%1%AG `%'M 0$SAQ '.&} {: yLo 'Z) &$. 0+ *0 '.'.l&- *% S* 'jM%'Z >!4& ]7(A%"6&V&'Z' 1*0 x 13% 9!y)7= *0 '.' 9. !E `8,ASg *0 '.'. ' :2 'ZK' 1*%80'&*'.'.l3E *0 '.')[-4)%!P%'#%'Z &0'jK'.7 *%*% 'Z: M *0'.' 9.A0' kMw#vj T=#'.4)70Fra%(AS x (53'y!:-bW V 'ZK &$. 0V&*'.'.l3&%'11*0% U0'8Qh87 e6 '8P * )l"6&. '8AmP5 'Z '1*% ' :g%'M '.S*'.4)'M3 'ZK& 9.A0G&*'.'.l3&' G>8'.S '.'Z,S*%*]11*0%5F%&6.U$:;<M3'M'. "9/ ! *A"A$(A 'R A1#!C%'Z%(-&$', h *0'.'.%0( %&A. ]$:F;]1.% S*Q= m>2 '.%*D11*%%E 1A*0&'4)%P K3P *0 '.' : ^#4 "6!Q%' 09SP'Z0*%*-3'F'.&"6& *# 4) e' :z &!09Sj3]&8 *q:G} `7 e6&'? &P 'Z=&.'8 E 'V]'.&:dNP "6&Q)&'ZP &$. 0'9S '3 *%*KAAE(A '>5S'. @, P *0 '.'.l3%A:\;<4iAS*0B> '. *0P "A *%A1mF *0 '.'.l3!84)](!2 S &V3 S'. 'j[&4i&#(A ' : 3. DESCRIPTION OF NEAREST SHRUNKEN CENTROIDS F"6! Ag'. . 0(A'# h= 'ZK& 9.A05 *0 'Z/ '.l&Qa%'. H!g *q:+}%s H1 1>A'.EF$l30 Ei &'Z 9. *%(A _e$4)'2c< 'Z'.SeA 2 &/ . 0' : f80F>&%3U 'Z'.Se6 U 9. 'M%'M ,A*%*%4)0(tHbm- *% S*0K!7 *0'.'j 9.A07 'i4i#4iAS*0%F &'Z#& 9.A0P *0 '.'.l&: P4ig%"$K!5& 9.A0P9 `4)%8 *0 '.'Z/n'Z 8 "$0% =, h!8(A A:0'( %"6 ' (!4 %(A9-(A 'R4)A'.&C1 '.'.%A=0''Z*%A(#1/ SIGKDD Explorations. % 9'i%-' - *0 '.' :`&74iM11*%'. ,i '.A*0%( 8 '.S*%(+ 7*%0?&E *0'.'=& 9.A0' :;ni7 *%%? 9. @%''.7 *%*qQ7%''.&E? & x 3@ 5%'Z (6 [j- 7 %3&jR# *% S*0%A3y!:ivH7'. %(Q4#A$"$/ AS'.*PS KS2>& H(A ')3#=S'.P%Pgl3* 1 %&%"6$&*{:`MM4 S*05 1>M3)%'j4AS*0P%1"6 7 S &] )$ *i'=4i *%*qQ`'.% 44 S*0] "6 . *% " 9-(A ' : wS11> '.2&7T1% 9'g Pm(A& ' :Fbm24)%*%*i*%&=> & UU&C1 '.'.%AoVq\(A V g+ \1% 9: L#*%'.Q6'.S11>A'.&j) *0'.'. ' :H&i¡j¢- i%3% &' )+¢P' 1*% '2%@ *0 '.'2£Y:] m7 A1> 98 ) 9. @ . '.1>A%(GGPq@(A 5%BP£9¤&*'.'%' (A%"6&59 x sy 3¥ ¢K¦¨§ > ¢ ©9ª3« 8i"A& *%*& 9.A02 . '.1>A%(KMjq(A j%2 £9P *0 '.'-%' x Ay ¥ ¢K¦§ ¬ &®a¯ ! x 6y ° ¢ ¦ ¢# ¥ ³ ¢#xn´ ± 3µ >¥ ´¶ y ² 4)& ´ 0'K21>9 *0 G4)%/n *0'.'='Z3G "$0%AG[ (A&.t s x · x ~9y ´· ¦ F±m ¹§¢ ¸ ®¯ § ©6ª3« ± ¥ ¢ y s s x Ay ² ¢ ¦»º R¢ ± x 89S3$< ´ ¶ %U8&A% M%'K71>A'.%"68&A'Z9 % *%SEU1 "6 9g1> '.'.%%*%%<m385(A 74)@U*%4 &C1&'.'.0 P*% "6&*k&AS*0F1$S =*(A ° ¢ 973 A:h;nj3' `' `"A*0S`, H *%*$(A& ' :HMi1> '.'.0%*0<8%'a-*%& ´¶ 9S3* M0 + ´ .¼ ' : y H$S%A x 6y +>K4j. U ' x 6y ¥ ¢ ¦ >¥ 3µ ² ¢ xn´ 3µ ´¶ y ° ¢ JM4Q4g11*+'. ./n '. *0%(28 '. &$. 0' :`& x Ay °9½ ¢ ¦ '.%(A x ° ¢Ay x¾ ° ¢ ¾ ±m¿y.À Á Á à 4)& Á x 6y À ¦OÂ Ä &.4)%'. bmg9 '.8g 1%7 *a" *%S=i¿Å9+&A'.'Z/n" *%0% : x vH !q S*Q iYLKNu'. 4ji 'Z'RA)1>A'.'.%*%" *%S&'R 3¿o(/ %(-,Ad-)i" *%Si `*(A 'ZH &$. 0=%g-'.&: ) 1%7 *3¿0'`!A'. >)"A*0S[i4)%j& '.'Z/ " *%0% D0'.&*'.'.l30 D&. =%'=%%%? k: yT 4g&l3MPc'.Se6&P 9. '.f2> x 6y ¥ ½ ¢ ¦ >¥ 3µ ² ¢ xn´ 3µ ´¶ y °9½ ¢ JM23g ° ½ ¢ ¦ +, 8 *%*`£V, 8P( 0"A ].QR m*%*` '.Se6&+ &$. 0')?&&Q +(A&KH9&'- ) 9.0S 2Ml3 *k *0 '.'.l3%A: Volume 5,Issue 2 - Page 49 ÆBL NB RMS Gene 0 500 1000 1500 2000 EWS -0.5 0.0 0.5 -0.5 0.0 0.5 -0.5 0.0 0.5 -0.5 0.0 0.5 Average Expression ra%( S2$t- i 9. ' x (y) 5'.Se6 5& 9.A0' x 3y[M8w$Mvj '.&:-="6& *%*a& 9.A053'M> U'.S.& AM 9.A0P 5 *0'.' : %? 9 *RS')*% (%A'- !C1 '.'.%AkQ> FK &- M(A ')0'-..9: J#4z'.S11> '.K3j4i3"6gGc< 'Z#130&$f=4)5&C1 '.'.% *% "A *%'=YÇ ¦ x kÇ¯È k·Ç ÈÉ&É ÉÈ kÊ Ç y!:bm24)%'.]+ *0 '.'.,]kÇ8+ &*'.'-4)A'.Vc'.Se6 F 9.A0f%') 'Z) Ç :`k& Ê x s 6y Ë ¢ x Ç y ¦ § x Yxn´ Ç ± µ ¥´ ¶ ¢Ay y · ±m*%A(`Ì ¢ · ®¯ x ^M!AQ6ÌY¢#&1 '. 9'j1% `1A 0*%2 Y *0'.'£>Q63`%' Q `1A1>.0 g *0 '.'£K%h1>A1S*0% :a;nÌY¢0'RSe$4)kQ # V> 'Z%7P, Í8$Q> -4gU*0!KÌY¢ ¦ sÎ , M *%*£Y: y&PK&*'.'.l30 US*%K%'K¡ x Ç y ¦TÏ 4)& x s sy Ë&Ð x Ç y ¦ ¢ % Ë ¢ x Ç y ;nh4g4)0'.U7 'Z%7gg1A3%*%5# Ç > *% (A'#7 ( %"6 5 *0'.' Q47 +8'.%+K,A*%*%4)0(77 &t &C1 x ± Ë ¢ x Ç y.ÎA y x sAy YÑ ¢ x Ç y ¦ Ò Ó ®a¯ &C1 x ± Ë ¢ x Ç y.Î Ay x %'=%'= *%A( AS'=P2¸ 19 SS'.VP 'Z%7 *0 '.' 1A %*%% '`%|K S'.'.0*%0`0'.!0% 9h *'.%' 9'. } , M&%*0' : y %'.&%%39g'. 'g% x 6yK'.%%*0FA'.2S'.G0 *%% U%'.&%0 9P 3*'.%' x I#LKy!:jkIMLÔS'. 'FGN5 / *0A%'&.%j A1S-0'Z ->&<4 FK( 0"A 'Z '.&" % U +K&*'.'#& 9.A0' x %5"6 ! )%A3y!t x s 6y ËA¢ÕAÖ× x Ç y ¦ x Ç ± Y¥ ¢AyZØÙuÚ ¯ x Ç ± Y¥ ¢ ya±m*% (iÌY¢ ^#&8ÙÛ 1&'. 9')g1>9A*%P4)%/n *0'.'M" 0 Ü&" / g7.%CF&C1&'.'.0 U:I#Lz3')> 5'.S '.'Z/ ,S*%*¤ 11*%0 @EG4)0+"A%&@K1 %&%AB1 *% 'P} n: SIGKDD Explorations. ^M4i "6&QAI#LB H> &*8 11*%0 8M( !C1 '.'.%A Q9'.% -j$S2>!h k1%& ' x (A 'yh%'H2S!(! 3G2$S8>&K'1*% ' x 1% 9'y!:L-'gF&A'. $S &AQ i7.C8ÙÝ%'&C$. *g*0( A:h$S' Q6 9g' 1*% 'Z%7 `ÙÞ4)%*0*>'.%(AS*0Q>3F%')%$"6&'.K4)%*%*R>S3!l3 k: JM 'ZH'.Se6&=& 9.A0'%'$S'a'.%%*0hKkIMLgQ4)8'. "9/ &*e6&V%XY! &' :;<K '.'.S&'#3M8 " 0 27.C Ùß%'50(AA *q:àL#'F \*%%&QK+4AS*0o>V%1>A'.'.%*0 1>&., Å& '.'.P *% S*0%A'M4)AS#%'M'.'.S1/ %Ak:GL#*%'.QHkIMLS'. '=4_ *0 '.'2 9.A0' Q4)&'84 S'.2'.Se6 G 9.A0' :7L#G%1> . 9g '.9S 2 i%' q&=%'K3M&24)%*%*H>'.A8(A 'K[=4)%! ° ½ ¢ ¦ 7[ *%*R£Y:`w$SP(A& 'j4)%*%*R->KS'.+0F *0'.'.%l% : 4. RESULTS ON THE SRBCT DATA %'19 S84 '= 11*%%VFw#vj »FK} {'. }s[h 1*%& '.S*' :hj" *%Si k¿%H9S30 x 6y 4j'KA'.&E9U11*%%( /{[A*0U& '.'Z/<" *%0%A:=v` U & '.'Z/<" *%0%AB&. 3]&'Z&.24!P%%%? E4) ¿ ¦ ~ É ~:-g&.M&S"6 '#g'.4)U%Vra%(AS=:)g!/ '.S*%(g'.SeA 9. '#'.4)7%Fra%(ASM x 3'y!: %'=$ *`1$S E? &U& '.'Z/<" *%0%A@&. '2 ]? & 'Z2&. ' :E;<=9Sm~AG( ' :E$S' Qh, 2%'U'.&Q 'ZR'.Se6 K& 9.A0'k1$S ' & S`1%&%A'RS'Z/ %(2&*%"6 *+,&4z(A ' : ra%(AS8~F'.4)'#g~67(A '#M4i&=S'.P7 *0 '.'.mw$-/ vj 'A( &!4)%=H"A*%S&%a'.Se6 g 9. 'a[ !UHK[ SK&*'.'. ' :-JMg)g( ')4)VA/n? & 1>A&$'j%U8(A%"6 + *0 '.'#g *% 'Z#8SS *%*7&C *%S'.%"6A: ra%(ASF+'.4)'g 'Z%7E1A3%*%0&'8)> *%A( %(P Volume 5,Issue 2 - Page 50 ä 1 5 8 10 15 22 34 52 Size 81 133 206 339 598 1020 1668 2188 2308 ã 0.8 æ ä te te tr 0.6 ä æ Error 0.4 æ ä å te 0.2 te æ ä å cv æ 0.0 0 te te æ æ cv te cv te te te åcv ætr tr æ tr te te te te te te te cv cv æ æ æ å tr te te tr tr cv tr cv tr cv tr cv å â 2 á 4 6 Amount of Shrinkage Delta ra%( S$tg8&. g S"6 ' x .%%(tM.!Ü( kQa& '.'Z/<" *%0%AtK "ÜYQR3U 'ZtMÜ*%Sy# '.S*%(F, ç 11*0(+&'Z '.Se6 U 9. '#2w#vj z$:8" *%S2¿ ¦ ~ É ~+%0%? &'-8& '.'Z/n"A*00%AG&. MA:M;<#1$S 'MF'.&M ~A(A ' : SIGKDD Explorations. Volume 5,Issue 2 - Page 51 BL EWS NB RMS 813841 859359 207274 296448 898219 784224 796258 244618 789253 298062 461425 1409509 42558 769716 25725 44563 325182 812105 41591 810057 52076 866702 814260 43733 357031 1435862 770394 377461 1473131 295985 241412 80109 183337 233721 897788 563673 504791 212542 365826 204545 308163 21652 486110 ïtissue plasminogen activator øquinone oxidoreductase homolog í íinsulin-like growth factor 2 ÷insulin-like growth factor 2 (somatomedin A) ñhomolog of mouse mesoderm specific transcript growth factor receptor 4 öfibroblast sarcoglycan alpha (dystrophin-associated glycoprotein) èEST presenilin 2 (Alzheimer disease 4) ïtroponin T2, cardiac muscle isoforms îmyosin MYL4 ïtroponin T1, slow skeletal muscle isoforms õ amidinotransferase òL-arginine:glycine neurofibromin 2 (mutated in neurofibromatosis type 2) ñ farnesyltransferase 1 êfarnesyl-diphosphate growth associated protein 43 (GAP43) ô óN-cadherin (neuronal) gene from chromosome 1q îALL1-fused meningioma 1 (disrupted in balanced translocation) ëcold shock domain protein A òneuroblastoma protein (NOE1) ñFas-associated protein tyrosine phosphatase 1 lymphoma variant translocation protein 1 êfollicular glycogenin 2 ð ïtumor necrosis factor alpha-induced protein 6 MIC2 surface antigen (CD99) Fc fragment receptor transporter, alpha chain ëIgG caveolin 1 (caveolae protein) ïtransducin-like enhancer of split 2 EST factor 1 (ets domain transcription factor) îE74-like major histocompatibility complex, class II, DQ alpha 1 îmajor histocompatibility complex, class II, DM alpha í growth factor binding protein 2 ìinsulin-like receptor type protein tyrosine phosphatase F éantiquitin 1 êglutathione S-transferase A4 ëcDNA DKFZp586J2118 êgrowth arrest-specific protein 1 EST éEST alpha 1 catenin (cadherin-associated protein) èprofilin 2 ra%( S=~t)=" *%S 'M ° ½ ¢ [M~6(A '#, #4)%!VK*%'ZKA ° ½ ¢ %'MA? &[M=w$Mv :)J# 3#=( &' 4)5A? &'.SeA +& 9. A0')%5!5&*'.'Mg *%A'Z-8SS3*%*%7!C&*0S'.%"6A: SIGKDD Explorations. Volume 5,Issue 2 - Page 52 *0 '.'h[i!213% 9:hr hA'ZH1% 9' QA 'Z%7 1A %*%] )> *%A(A%(U5.S *0 '.'=4j'8'.%(Al3$* (!= U8 'Z%7V1 3%*%%<G >&*0 ( 0(++ 9 &K *0 '.' :;nK%'g *%'.+09&&'Z0(+F&C %g8 'Z%7 1A %*%% '5 gl"65 'Z+' 1*% '73+4i&G 5w#vj ' : x '.Ml"6M'1*% ')M7e64)5= *%A7#( 1: y J# j3`) 'Z%721A %*%% 'i[i '.jl3"6j'. ' '.%( l39*%]*%4i&=3 E 'Z%7 ]1 3 %*0% 'g[8 .S=w#vj ' : 5. DIAGNOSIS OF CANCERS WHERE NO SUBTYPES ARE KNOWN TO EXIST b]3"6'. &]38&'Z2'.SeA ] &$. 0'=3 '1> / &$0*`5>+1>4i&.[S*`9 *`[0 (AA'.%(U &&:7bW '.&"6& *R &K'.S<1>&'#ge4)5&C0'ZQ3%#US'.g(A !C1 '.'.%AU%, 70 P0'Z%(AS%'.5>!4& P'.S<1>&' S'.%(2'.7*%*'.&-h(A ' : ^#4 "6!QA 'Za'.Se6 &$. 0'%')'.S1>&"$%'.g*0 %( 19 SA:;nH *=>) 11*%%2%'. 'H4)&j'.S1> ' &&=*A$Ee$4)VP!C%'Z:8pM[ .S3 *9Q+'.S! '.S<1>&'#"A=>& V 9l3P, 7$F1> '- ` &:#; '.S!W '. ' QM 'Z5'.SeA T &$. 0'P +>]11*0% S*% '.')'.A1S0"AK'.S1> '5>0 9l3 k: N59U1> 'M !g8'.S'.1> & VF>2 *0& S*0*V&./ !A(A& S' :ùrV0'Z AQ=XYS'.m*0(A@vH/<& *%*8*1A7 x IKRvj Ry20'P 'Z A m[ú K*1A7UA( S*%' :ÔL-11C%7 *W~A6¨ IKRvj O13% 9'P&'.1>A3 V &19m3] "6!:G7 7 %3&=4)0*%*jS'.S3 *%* '.S S257%'. '.A:F} HsAs!H;<K%'> *%% "6 VM%'%'Z/ ! 13&D%'2F&'.S* #" 0%A¤ (VIvj '2 *% S*0#*% "A *q: vHV%'.&*%<QR&'Z'.Se6 U& 9.A0' K>8S'.5+/ (AA'.F+1% 94)]Ivj H:;<`4+ g'.S1> 'K IKRvj z4i&5e$4)B]&C%'ZF ¤A5'.S1>+4i&5 (A( '.'.%"6235= &' Q>4= AS*0Ul 'ZM'.Se6 & 9.A02 *0 '.'.l&`K&&%i4)%2'.S1>%'h1 '.&$h%2 ( %"6 P1% 9:h^M4i "6&Q='.SF'.S<1> '3"6#> F *%* 0 9%lk: J# "6&. *% '.' QYh4=S'.g 'ZK'.Se6 5 &$. 0'# (A&& 4)5S'.S1>&"$0'. P*%0(&$' Q%')1A*% > ' .& *%A:8r !C1*%AQh}s&`3 *? G70!6. G7'.& &A'.%'Z%(F h8&C1 '.'.%AV*% "6 *%'M`A6~+( &'#[ FIY/ v T130&$' :pM'.%(+%&%*i *%S'Z&%(m} {Q&V0&/ lG<451S%"62'.S(AS1'g jIKRvj hQ4)0]&c.| vH/<*%%e6)Ivj f3]c &%" vH/<*%%e6jIvj H: fg&2 F13% 9'74)o| v`/n*%%e6UIvj 3@m*%%"6U*%A( & 51% 9'4)U !0" 5v`/n*%0eAIRvj h: L-*AS(AP%')&'.S*-0')%9.%(AS%(Q3-3')*0%FS%*%%<5'- 0 (AA'Z%79A*q:5^#0!% *` *%S'Z&%( x 2 9G !8<1> M&*0S'Z&%(6y8 BA*D>P11*0%DGG*0(A5(AS1@M1/ % 9' :Frg%'K '. Q&*0S'Z&%(59G'. *j=>S'. &A'Z.S&i0 ( A'Z%)9A*q:H;<kK1% 9H0'i0 ( '.4)% IKRvj hQ`&*0%0&28S'Z> *%`-1>&.[d-0 ( A'.%'hA #%3%"$0S *k13% 9:i^M#4 KS90**0( 8(AS1 a&)1% 9'jK0 ( A'. F4)+IKRvj G'.83j# 11*%+ *%S'Z!0(22K 9( AS1: J#&'Z'.Se6& 9.A0'iF"6! Aj%'iû S*%<9:L),/ !21S%"6S ='.S1> '"AF>& m0 9l3 kQ 7 11*%o&'ZU'.Se6 W& 9.A0'P¤.&1P¤0(A '. 4)%!'.S<1>i0'h1 '. 9H0 %3%"0S3 *$1% 9:abm`4AS*0 A1>2K2'.S"%" *a% 'K8130&$'KXY&g>&<4 1%&+'.S<1>&' : SIGKDD Explorations. bm 'Z P%'#0A5gIvj @ -}s&{:#&g4& A713% 9' QY H4)%!E9s=4i&2&*'.'.l3G 'K"%(+&%"A v`/n*%%e67IRvj W3¤s+4&F *0 '.'.l@ '=3"$0(V| uv`/n*0%e6 Ivj H:b]H3 *g%"$0M '. -1% 9'R%9Mj. %/ %('.!#s813% 9'- P8&'Z)'.&- s 13% 9' :b]Ml- '.Se6& 9.A07 *0 '.'.l3&jggs. %0(81% 9' :H & '.'Z/<" *%0%A@&. 84j'2%%%? V4) B¿ ¦ s É A$: %'+A1%7*#$ *MS'.W6]( ' :u;nF1$S Wm& '.'Z/ " *%0% U&. '# U8 'Z#&. ' :-NPg01>. 9*9Q4j/ "6!Q9130&$'H%j1%&7| @v`/n*%%e6)IRvj 5&*'.'*%%"6 '.%(A%l 9*+*0 ( &)3 +130&$')%+K1 %&5&%"A v`/n*%%e6KIvj ] *0 '.' x '. ra%( Sg6y!:`$S' Q 'Z-'.SeA 9. '+ \>VS'. ¤m&*01\0 ( A'.G &+ "6&T= &-'.S1> ')e$4)+!C%'Z: 6. CONCLUSIONS IKJMLü0!6. '5"AVE1> 90 *KD "6 *0S%A%? G 4j U4i0(A '. 5. &&:;V&K7[S*%*%VS%*0%? &%g1> 90 *qQa4i "6&Q48S'Zg "6 *% 1G9 *0'g53*%? &5:hJM 'ZM'.Se6 F 9.A0'-%'-21>4&.,S*9 *, #&C$/ . !0(S'.&,S*k%[7%A5AÅ%&6. 5:iv`+0 / ,%(g#(A 'i3)# & '.'.78XY& 90#>&4i XY&&$<1> 'g ) &Qa%=m&*01ES'Pl3E 30&' [h %'.%($%>$0&'a[ H%2S 'Z %%(:NP "6&QAAS!/ '.S*'M%'.8=1>A'.'.%0*%Gi& %(+0 ( 'Z08 'Z'M '. AU-JKLz&C1 '.'.%AU*% "A *%' Q>1>&31'-S'.%(Hi/<` i=:3ra0 *%*9Q 47"A'.4)m3=7&$A*%A(]%'='Z0*%*S'.&[S*`, &'Z/ %7%(+'.S"$%" *h ) !21% 9'4) E5'.S1> ' 3"6j>& 0 9l3 k:hNPM S-0 ( '. 'i&*01&*0%/ % 0 '8V(A%"67!@13% 98+11A10F& 199Qa4)0 4)%*%*H%& '.823 &'M3K=130&$M4)0*%*h'.S"$%"6AQ3 &*01=K'.131% 9, »'.0j!XY &' YS '.'. . 9' : 7. ACKNOWLEDGMENTS 05v 74j'[SD9B BJwr»|#AS3P- '.\r */ *%4)'.%1:H-A>&.0'. >4j 'j1.0 *%*F'.S11> .9JM;<^ ( 9gM$s2 LKA6QR 3UJKwr@( 9MIgNUw$/<AA$s&~669:=w&"$/ &*a hKl(AS 'M=&1$S +AÔ U*%0!.% *%8$ -A>&.-%'. k3-4 ')1S*%%'.F%UHJMLKwV}%s {: 8. REFERENCES }s!=L#'.uL=:L#*%%?A QFNP%3 *7v#:gh%'. Qg:g%¤I"$%' Q iN5$Q-;? 0GwY:A'.'. ' QKL-3$ '7-A'. 94j*0kQ#ýA / [&] #:KviA*0%e>Q8^Mþ &mw>&Q=kSDQ=ÿM%iSkQ ýAA;:)a4 *%*qQk% ( (QK|M! *0zj:#N5.qQK NP9 AQý6 &'^MS'.AkQ$ý :%Q%'.&PkSQ$I"$0Fv-:&4)%' Q # >&.D%'. qQP|K"$0_w$&*%9eYQ2bW%( #:8 i3 kQ 09 #: |# %&QIK %'I2:!bm %'. $S(A!QAýA 'a8: L- ( AQ# (A&bDe6AQ-A *0g "99Q bT \bW%*/ '.AQNP%3 *>=:$|# "6&QýA P #:$v`$kQ$IK"$0vi'Z %Q .%!e+8:v`4)kQ33FAS%'-NE:>w9 S3$Q !#"%$'&(!)+*,- %./0 .1"2 3- )4&(56879* $ :$ 3 (& QJMS;<5 = x A 6y!Q> ? >9sAs : } 8NP03 *>v#:6H%'. kQ$ S*>M:$w$1> *%*%7 Q6.%e8:Av`4)Q 3I"$vi 'Z %kQA@B C ,$D"E"9 E F"E G HI#"E &( .1*IJB K87$ G"E,$ Qh9 &%( 'E JK% 3 *3L# =2w$ % &'LM x s A6y!QYs&~6A ?> s ~6 A$: } =M: g:|MA*%SkQPI2:8:2w*%A%5Q5a:8a 7 9QU #:8^MSYQ NE:`|KA '.&>& e>Qiý:H:hNP '.">Qi^8:h i *%*0!Q)NE:HH:AkQ Volume 5,Issue 2 - Page 53 Training Data Probability 1.0 BL ••••••••• •••••• • • 0.8 EWS ••• ••••• ••• • • • NB •• • 0.0 •• • 0.4 0.2 • • 0.6 •••• RMS •••• • •••••• ••• • • • • • • • • • ••• • • • • • • • • • • •• • • •• • • •• • • • • • • • • • • • • • • • • • •• • •• • • • • • • • • • • •• •• •• • • • • •• • • • • • • • • • • • • • • • • • •• •• • •• •• •• • • •• • • • • • • • • • •• •• 0 10 20 30 40 50 60 Sample Test Data BL Probability 1.0 • EWS • • • • • NB • 0.8 0.6 • O 0.4 0.2 0.0 • • O • • RMS • • • • • O • • • • • O • • O •• • • • •• • • • • • • • •• • • • • • •• • •• •• • • • • • • • • • • • • • • • • • • • • • • • • • 5 10 15 20 25 Sample ra%( S+$t8h'Z07E&*'.'81A %*%% '=, =7w$Mv O$:5w 1*0&'g713.%% ]9U.S *0 '.' x S11>!!yK 3G1%& &*'.' x *04i&!y!:HL#*%*> #Ag. %%(=' 1*% 'j# *0 '.'.l3 + . &*9Q ')-M g 'Zj' 1*% 'i3jMe$4)78>Mw#vj ' : ra%"6- Y 'Z'1*% '`# &S *%*jw$Mvj ' 9e684)7 *%A:hJ# j3` 'Z%7 *0 '.'i1A %*%% ' g'.0( l3 9*P*%4&) +1 3 %*0% '- a &j 'Z-130&$')%P 5 *0 '.' : SIGKDD Explorations. Volume 5,Issue 2 - Page 54 1.0 0.6 0.4 0.2 Survival Probability 0.8 low−risk patients high−risk patients 0.0 p=0.046 0 20 40 60 80 Survival in Months ra%( SFtwS"%" *h S"6 'g j10!]'.S>&*'.'. '2jIvj \:5ra'ZQ%&!0 * *%S'Z&%(54j'211*0%GV s /n13% 9c<.0%(='.&: f]i4i20'Z%&'.S> *0 '.'. 'jRIvj U4&-0 9l3Y:h% 9'%7A#'.S> *0 '.'A'.%(A%l 9*7>&.& '.S"$0" *h3E130&$'g%] &='.S> *0 '.' :5M&2 '.'.S> *0 '.'. 'g4! 9l3YQP 'Z='.SeA E& 9.A0]$&* 4 'gl=S'.%(P '.5s P. %%(5130&$' :+pM'.%(P%'g$ *qQ m130&$g%DTs/n' 1*%@c<&'Z8'.!f54j '= *0 '.'.lD 'g ! c<*%4j/{%'.e$f7 Fc0( /{%'.e>: f]Uc<%(A/n%'.e$f1% 9')3AF'.%(Al 9*P1>9&-'.S"$%"A*q: ý:=:IK4)%(QYNE:3L=:Y *%0( %SqQ #:3I2:vi*%9Al *0kQ3 3 j:w>:a 3&OQ N P8#"%$+#"E 3Q "E "E$SR4+#"E $!"E +#"E -$8 ' A)U&(5V87$8 A. * $E(T & QYw&0& XWYZ x sA 6y!Q> s>A: } ~gL=:RI2:a|M Q@B#"E 3"% Q` i 17 [^K*%*{QHv`9 AkQ3L=QYs A$: # } g "A -^K'Z0 Q3-A>&.#%'.%qQ3IK"$05vi'Z %Q3 3 .%e vH4)k] Q \?$ 0"%$ (& 87$8 $ Qk|M Av`0 *%A( A W_T ^`a x T sy!Qas >s$: } g "A F^K'Z0 Q)# >&.+0'. qQM3¤ýA&AVr0 / 7 kQcb_0XP. d I"E ' "9eP "%$C&(R "EI"V.VC&Ef $ A"E $ ' Qw1%(A&.I/ gH&*0 (Q#JM!h 4 ` e>Q iJ 8Q A$sA: } ;( 0 ^M [ *%e>QçI"$0 IKS( (6 kj Q K0 (Û i&Q NP%! *TM 7 !QNP%! *Wv`%.&Q#%3úw/ AkQ` S*-NP&*%?&&Qjv..B|MS'Z!'.AQjN5 *)H'Z&*0*%&Q N5eG#XY *0klQ k>A3 e$%qQRL#gvi /nIK QR`$4j IK S(A&.9Q`ýAS3VAA&QiSe '2viS>&3.Q`bW%*%0 r *%AQAw$&[ 0-H.*0S(6$Q$wl>-|#S"$>&( &QJM%e$*0 'k / 7 kQM'.e )ýA 3'.'. Q! ^ m eAFM*%'.'.AQvi& þ.%FbW%*/ [ 3YQ|MS02wS&QM*%*%/a:9g*%*0% % qQL#e6#vi (Q 3 ýA&X>!àk 9Qonpq*d87$ r$ 3PsHr0$8 I"E$ )$ 8"E V "E5$Qj+JM&4H(A*0 Bý AS *) KNP0&0 =;; x A$sy!QkA? >$~A: } gý6"6 kQ9ýAS7wY: bm qQ$N5e$S'H#%( Q &t Q9R ^=:$wA *qQ N5TR 9qQFr ezb] 'Z&7 Qr e»vi&.A*0kQ N5 ,Bw$!94jQH `%'Z%3Ug:aL-9A&'. SQH 'Z Ba&/ &'. Q# \ S*wY:NP *? &Q:@B#"E 3Q "E u"E o '"S&9* SIGKDD Explorations. '/$8 ' "E$UCHC&v&(5187$ 4$ * 3x (&v"E "%$ 3 '"9 5$"95qJ $8w%QJMSMNP 0&0 x A$sy!Q36 S$> A : } gJMA/n^M$( e$0k¼ '@k1A7z i*'.'.l30 ÍhþZ !QAy + ' "95 "9 "E H0z,$5"E 5"9q %./0 .1"{ &E$ ?K+#"ET 3Q "E *0 S &?w%q | 1 %./0 .1 " QRv`*09$ YL x s 6Ay!Q3A A?9> A$s: }sg# >&.¤0'. qQG "6T^M 'Z%AQVv*0 '.S 70 JK'.%3 kQ# 3o|M%*%>&.U iSkQz'"S&E .V 5P "%5$v1)60C$Cw}o$ &(q87$8 Q h9 &%( '+K+JK0 3*ML-A 8@=w % ':LL x AAAy!Q36 AS9> 6A $: }sAs!=ýAS*%%NE:5Hg A'. Q@c$$ i"}$ "}0V H0{.1"E"S&(. *0 S &?w%q| : E.i0 .v " QHw$ %3'%@M A*% ( oWM x sA 6y!Q~6AS9> ~AsA: Volume 5,Issue 2 - Page 55