Download Machine Learning Methods Applied to DNA Microarray Data Can

Survey
yes no Was this document useful for you?
   Thank you for your participation!

* Your assessment is very important for improving the work of artificial intelligence, which forms the content of this project

Document related concepts
no text concepts found
Transcript
Machine Learning Methods Applied to DNA Microarray
Data Can Improve the Diagnosis of Cancer
Eric Bair
Robert Tibshirani
Dept. of Statistics
Stanford University
Stanford, CA 94305-4065
Depts. of Health, Research, & Policy, and
Statistics
Stanford University
Stanford, CA 94305-4065
[email protected]
[email protected]
ABSTRACT
!#"$%&%'"
% ')(
*+,
-'.%/
%*02130&$'4)5 %"67'.%%*8. 9' :+;<=%'=91>
/
&'.0?&@8%'"
0%ABB>+&C1*0
%D$EF(A &%
&&
(A G
H2%'.
'.
:2IKJMLONP0!6. ' QR4)%!G
'.%8S*
AS'.*%T
'.SUV&C1 '.'.%
W*0&"6 *K=AS'
3'
=XY& 97(
' Q)"65> T'.S &'.'Z[S*%*\S'.B] 9,
'.S!8(A &%)XY& ' :^#4 "6!Q6%&6. 2#<1%
*%*
'8P*0(
+$S8>&[S '2
G *0%"6&*%E[&4_A'.&"
/
%
' Q9%(#3H $"6 9%A3*7!%*%%(K9
*0'`
[
%*a4)&E11*0%P7'.S!V:)b]2 '.&%>2"6 *a19 &/
S*%*0 dc 'Z='.Se6&] 9.
'.fP3=3
'g'.S '.'Z/
,S*%*& & = *%%%*%*%8 *% "
9(A&&%)XY&& '%8
&
1% 9' :h0'19 S
'a`1>
90
*9#> Ai#1>4j/
!.[S*k9A*Y[K(AA'.%(73F. %( !:
Keywords
NP%&A. ' Q>'.Se6&+ &$.
0' Q3&*'.'.l30
1.
OVERVIEW
b\ TE13% 970'70
(AA'.B4)%T
&Qj"
%AS' *%%%
*
1&&'U]S'. WB'.'. '.'5V%'.eW
8&
'Z'.0'5
3
+%F)13% 9:h^M4 "A&Q '.1%MS&AS'
"
& '
%ml3 *0kQaAS8%*%%<]U&&%8%'.e])
0%<
%'-&C$. *F*%0Y:jRS
'j3#
11> #0%'Z%(
S%'.3*%
S3&M8%&
'. A1>87P3"68'Z0 *%*]XY& 9M!XY &'
PM13% 9:
;n5
'P*%
(@> oe$4)\35 !5%'UD(A &%]%'.
'.A:
$S' QY%K0'M A
*P>&*0% "A53# '.8XY&& 'M0U
&*0%0 *jAS A7M !D>+&C1*%m9]XY& '
%WG(A&&%]1
l*0G
=VS:OpM,
.S3&*%9Q#S9%*
& 9*9Q3ASK%*%57%&&*5A'.&"6g(
&%=7
e6&S1
H8S4j'#!C.& *7*0%Y:
%'H%'h3
(A%(Q94i "6&Q4)2j
"6&$h>IKJMLB%&A./
' :5NP0!6. '2 m'.%2S*
&AS'.*E
'.S!C1 'Z/
'.%
F*% "A *%'i
>AS'
3'`k(A 'i%7
(6%'.5:H$S' Q6&
"6#K
%*0<F8! &)%XY! &'j>&4i 7S
'
*% S*0g*0&"6 *q:0'g%'%*%*0S'Z.E0Era%(ASPsA:2pM3&K
%&
'. A1>
Q94iM1> 'hY*%1A7#
11>h>j0 9/
*q:H^M4 "A&Q(A &C1 '.'.%A+1l3*%%(8&"6
*%'j-4i
S
j<1>&')g
!S3
*%*P%'Z%&#)KA*% S*0-*% "6 *q:
%*0<20 9,7'.S!F'.S(
S1'i3
'j%1>.
9j%1*%/
SIGKDD Explorations.
ra%(ASKsAtaIMJKL\NP%&A. '` 0&$2XY& & 'h>&4i S'3PV
P& !
*%GS&+m%&A'.&A1>A:up#'Z/
%(# A$"6 9%A
*9%&
'. A10
3*'.%' Q
H*%1A7& *%*0'h%
(
S1'hLT
8v]
11> hM>` 9%
*q:`NP%&6. 'H3
*'.%'
'.4)'g32XY& 9=(A '2F
&%"6F
]%3&%"6%] '.
4iU(
S1' Qa0%0(53=&V 1&'. 92%'Z0&8%'.'.
'.S1> ' :
0
'-[
#=0(A
'.%'
P. 9-
H !:#wS11>A'.
A7'.S<1>
) &8%'2*%%e6 *]P&
'Z'.0?&F4)&
'2
/
&K'.S<1>%'#
:M=13% 9'-4)F"6=7%(AU%'.e5
&'Z
'.%'24
S*0D ]G>7.@
(A( '.'.%"6 *9QH4)&'
8
&g130&$'M AS*G>2(
0"A ]F*% '.'g0$"
'.%"6=. 9
x 8+. 9=*0*y!:U;n!F%'=P4 ]U0'Z%(AS%'.
>&<4 7&'.M'.S1> ' Q$*%*k1% 9'i4iAS*07 =>M(
%"6 7(A( '.'.%"6+. 9:P^#4 "6!Qa%'=0'=%(
*]S3 '../
*%AQ$> S'.M&S. 9H. 9'h[j
&Q$'.S!F
'i'.S(A!.
R! &
199Q"6h&C$. *M'.&"6&H'.h&XY &' : x ;<Mq&Q
'.Ai
&H130&$'h"Aj%8
'H- '.S*`> &1$9: y
;<4j S*02'.S& '.'Z[S*%*%20 9,gj1% 9'4)M%(
8%'.e
Y!
'Z
'.%'H
2kQA4j&AS*02(
%"6 zj11
10
. 9a4)%*%'.13%(K&h1% 9'R,
d`C0
S''.0
&XY !'))'.S!P. 94iAS*0P&$%*{:
%'j%'j '.'. 90
*%*+8 *0'.'.%l%
U1A*% 5:H|M%"6 +8$S8>&
[S ' x (
!C1 '.'.%A*% "A *%'y!Q
4i4)0'.=M1%&4)0
1>#
!#%'j1 '. 9)%581% 9:`N5
9F7
%M*%/
%(#19 S 'R3"6H> 8 "6&*0
1>[
%'R1>H
31A*0&5:
x w&AQ,
-&C
1*%AQR} ~3€n: y
pM[.S3 *9Qh '.+&C0'Z%(]7
%+*%%(G19 S&'
j>K &*F
11*%%F=%&6.F$:HK$S8>&
#[S '7%'&C$. *D*0(AU 13 @V+S2>&2
A'.&"
%A' QRS'.%(+A'ZM7
%=*%0(P19 S 'M
q%*{:MNP "6&QYM%'#01>.
9)0 9,+F0 9%P4)0
Volume 5,Issue 2 - Page 48
(
&'=2>&'ZK1 %&
'M
HS
#1>A:MpM'.%(8&C$/
1 '.'.%AG*% "6 *`AS'3'K
iXY& 9M(A 'M77
e687/
(AA'.%'8%'g1&%
*H4)]&C%'Z%(+&!A*%
(
x 3V7 "6&#>['.%*%y!:#;<h4gG0 9%U'.7
*%*‚'.S'.!M`1&/
%&%"6(A ' Qa4i AS*ES'.&'.(A '=+
%'.F
9%>$% '
'.S
*%)[%2SA'Z%%(:L#*&%"6 *9Q9`7 2>)1>A'.'./
*%2P "6 *%
1Eƒ-JKL)/<
'.V(AA'Z% 'Z'KS'.0(PƒHi/<„i…iƒg:
v`
PR '.M †9S ' Q$4&"6&Q
+A*%F>M
11*%0 F8
&*%"6 *]'.7*%*`$S8>&Kj(A& ' :+$S' QR%g%'g%1>
.$
0 9%74)%!P(A '-& '.'.71>!.[
‡'.S!P8 *0'.'.%/
l%
‚:
2.
PAM: A TOOL FOR CLASSIFYING TUMORS BASED ON MICROARRAY DATA
ƒ- &$. 1'=G *0
'.'.BS
'=S'.%(G%&6. B
"62S'.E'Z0'Z%*i&$'8} ˆ$H‰$hЁ`
E.%l 0
*` S
*
&4i
e$'} ‹{:=bm8 '.!0>
G
*&%"6&$53M1>&./
,
'-4 *%*a
V4)08"
%&<5
H1A*% ' :);n-0'M
*%'.F
'Z5
S3&'Z
P
+%9&1!:
„YLMNŒ0'g
'.GAD7 !0†9Se$4)E
'Vc< 'Z='.Se6 & 9.A0' : fubm2%*%*%S'Z.S0*%Gi%'&$G9G1/
1*%(5UP%&6. mP'.&g)'.7
*%*hASE*0S *%*
S
' x w$ƒMvj…)y2%*09$z} ‹{:VE'.&F '.%'Z'
i '.S& 9'#H8(
=&C1&'.'.0
E*% "A *a
Ž
ˆ
A‹7(A '
A‹
‹+13% 9' :r
SgXY& 9K1> 'M
iS
'-4&2&1/
&'. 9kt)v`Se$.#*1A7 x v‘ay!QY’4)%(7'&A7 x ’hbowy!Q
S
*0
'Z
7 x JKvy!Qa3G
YA86A'&A7 x ƒKNUwy!:a
S
')%"$0 '.K13% 9'%$8€Aˆg.
%%(2'. ')
PŽ
‰
&'Z
'.&' :pM'.%(-&S
*9&4i
e$' Q&M *0
'.'.l3gh 'Z
/
'.!"A0
'j4)]s
6“” S&9:` -$&*kS'.P•
€2(A& ' :
%'.
9&a*q:j}sAl'ZR
3*?&K%'Rj'.&RS'.%()j ./
&'Zg 9.A0V *0
'.'.l3&: x rgP '.&%1%AG
`%'M 0†$SAQ
'.&} €{: yLo 'Z) &$.
0+ *0
'.'.l&-
*% S* 'jM%'Z
>!4& ]7(A%"6&V&'Z'
1*0 x 13% 9!y)7= *0
'.' 9.
!E
`8,ASg *0
'.'. ' :2 'ZK'
1*%80'&*'.'.l3E
*0
'.')[-4)%!P%'#%'Z
&0'jK'.7
*%*% 'Z:
M *0'.' 9.A0'
kMwƒ#vj…T=#'.4)70Fra%(AS
Ž x (53'y!:-bW V 'ZK &$.
0V&*'.'.l3&%'11*0%
U0'8Qh87
e6 '8P
*
)l"6&.
'8AmPŽ5 'Z
'1*% ' :g%'M '.S*'.4)'M3 'ZK& 9.A0G&*'.'.l3&'
G>8'.S '.'Z,S*%*]11*0%5F%&6.U$:;<M3'M'. "9/
!
*A"A$(A 'R A1#!C%'Z%(-&$'‚,
h *0'.'.%0(
%&A. ]$:F;]1.% S*Q=
m>2
'.%*D11*%%E
1A*0&'4)%P
K3P
*0
'.' :
^#4 "6!Q%' 0†9SP'Z0*%*-3'F'.&"6&
*# 4)
e' :z
&!0†9Sj–3]&8
*q:G} ‹`7
e6&'? &P 'Z=&.'8
E
'V]'.&:dNP
"6&Q)&'ZP &$.
0'†9S '3
*%*KŽˆAA‹E(A '>5S'. @,
P *0
'.'.l3%A‚:\;<4iAS*0B>
'.
*0P "A *%A1mF *0
'.'.l3!84)](!2 S
&V3
S'. 'j[&4i&#(A ' :
3.
DESCRIPTION OF NEAREST SHRUNKEN CENTROIDS
‚F"6! Ag'.
. 0(A'#
h= 'ZK& 9.A05 *0
'Z/
'.l&Qa%'.
H!g
*q:+}%s H1
1>A'.EF$l30
Ei
&'Z 9.
*%(A
_e$4)'2c< 'Z'.SeA 2 &/
.
0' : f—80F>&%3U 'Z'.Se6 U 9.
'M%'M
,A*%*%4)0(tHbm-
*% S*0K!7 *0'.'j 9.A07
'i4i#4iAS*0%F
&'Z#& 9.A0P *0
'.'.l&: P4ig%"$K!5& 9.A0P9
`4)%8 *0
'.'Z/n'Z
8 "$0%
=,
h!8(A A:0'(
%"6 '
(!4 %(A9-(A 'R4)A'.&C1 '.'.%A=0''Z*%A(#1/
SIGKDD Explorations.
% 9'i%-'
- *0
'.' :`&74iM11*%'.
,i '.A*0%(
8 '.S*%(+
7*%0?&E *0'.'=& 9.A0' :;ni7
*%%? 9.
@%''.7
*%*qQ7%''.&E? & x 3@ 5%'Z (6
[j- 7
%3&jR#
*% S*0%A3y!:ivH7'.
%(Q4#A$"$/
AS'.*PS KS2>&
H(A ')3#=S'.P%Pgl3*
1 %&%"6$&*{:`˜MM4
S*05
1>M3)%'j4AS*0P%1"6
7 S
&]
)$ *i'=4i *%*qQ`'.% 44
S*0] "6
. *% "
9-(A ' :
wS11>
'.2&7™T1% 9'g
Pšm(A& ' :Fbm24)%*%*i*%&=›>œ 
&
UU&C1 '.'.%AoVžq\(A V
g+Ÿ
\1% 9:
L#*%'.Q6'.S11>A'.&j) ‡ *0'.'. ' :H‘‚&i¡j¢- i%3% &'
)+™‚¢P'
1*% '2%@ *0
'.'2£Y:] m7 A1>
98
)
9.
@ . '.1>A%(GGPžq@(A 5%BP£9¤&*'.'%'
(A%"6&59
x sy
›3¥ œ ¢K¦¨§ ›>™‚œ ¢ 
©9ª3«
8i"A&
*%*& 9.A02 . '.1>A%(KMjžq(A j%2
£9P *0
'.'-%'
x ŽAy
› ¥ ¢K¦­§ ¬ › ™ œ 
&®a¯
‘‚!
x ˆ6y
° œ ¢ ¦ ¢#› ¥ ³ œ ¢#xn´ ± œ3µ ›>¥ œ ´¶ y
²
4)& ´ œ 0'K21>9
*0 G4)%/n *0'.'='Z3G "$0%AG[
(A&ž.t
s
x
·
x ~9y
´œ· ¦
™F±m ¹§¢ ¸ ®¯ § ©6ª3« › œ  ± › ¥ œ ¢
y
s
s
x ‰Ay
² ¢ ¦»º ™R¢ ± ™
x 8†9S3$< ´ ¶ %U8&A%
M%'K71>A'.%"68&A'Z9
% *%SEU1 "6 9g1>
'.'.%%*%%<m385(A 74)@U*%4
&C1&'.'.0
P*% "6&*k&AS*0F1$S =*(A ° œ ¢ 973
A:h;nj3'
`'
`"A*0S`,
H
*%*$(A& ' :H˜Mi1>
'.'.0%*0<8%'a-*%& ´¶ †9S3*
M0
+
´ œ.¼ ' : y
’H†$S%A x ˆ6y
+>K4j. U
'
x €6y
› ¥ œ ¢ ¦ ›>¥ œ3µ ² ¢ xn´ œ3µ ´¶ y ° œ ¢
JM4Q4g11*+'.
./n '.
*0%(28 '. &$.
0' :`‘‚&
x ŠAy
°9½œ ¢ ¦ '.%(A x ° œ ¢Ay x¾ ° œ ¢ ¾ ±m¿y.À
Á
Á
Ã
4)&
Á

x ‹6y
À ¦OÂ
Ä
&.4)%'.
bmg9
'.8g
1%7
*a"
*%S=i¿Å9+&A'.'Z/n"
*%0%
‚: x vH
!q
S*Q
i„YLKNu'.
4ji 'Z'RˆA)1>A'.'.%*%"
*%S&'R
3¿o(/
%(-,Ad-)i"
*%Si
`*(A 'ZH &$.
0=%g-'.&:
)
1%7
*3¿—0'`!A'. >)"A*0S[i4)%j&
'.'Z/
"
*%0%
D0'.&*'.'.l30
D&.
=%'=%%%? k: yT 4g&l3MPc'.Se6&P 9.
'.f2>
x •6y
› ¥ ½œ ¢ ¦ ›>¥ œ3µ ² ¢ xn´ œ3µ ´¶ y °9½œ ¢
JM23g ° ½œ ¢ ¦ +,
8
*%*`£V,
8P(
0"A ]ž.QR m*%*`
'.Se6&+ &$.
0')?&&Q
+(A&KžH9&'-
) 9.0S
2Ml3
*k *0
'.'.l3%A‚:
Volume 5,Issue 2 - Page 49
ÆBL
NB
RMS
Gene
0
500
1000
1500
2000
EWS
-0.5
0.0
0.5
-0.5
0.0
0.5
-0.5
0.0
0.5
-0.5
0.0
0.5
Average Expression
ra%(
S2Ž$t-…i 9.
' x (y)
5'.Se6 5& 9.A0' x 3y[M8w$ƒMvj…'.&:-="6&
*%*a& 9.A053'M> U'.S.&
A‡M 9.A0P
5 *0'.' :
%? 9
*RS')*%
(%A'-
!C1 '.'.%AkQ>
FK
&-
M(A ')0'-..9:
J#4z'.S11>
'.K3j4i3"6gGc< 'Z#130&$f=4)5&C1 '.'.%
*% "A *%'=›YÇ ¦ x ›kÇ¯È ›k·Ç ÈÉ&É É‚È ›kÊ Ç y!:bm24)%'.]+ *0
'.'.,]›kÇ8+
&*'.'-4)A'.Vc'.Se6 F 9.A0f%') 'Z)› Ç :`‘k&
Ê
x s 6y
Ë ¢ x › Ç y ¦ § x ›Yxn´ œÇ œ ± µ › ¥´ œ¶ ¢Ay y · ±mŽ*%A(`Ì ¢
·
œ®¯
x ^M!AQ6ÌY¢#&1 '. 9'j1%
`1A
0*%2
Y *0'.'£>Q63`%' Q
`1A1>.0
g
*0
'.'£K%h1>A1S*0%
‚:a;nÌY¢0'RSe$4)kQ
#
V> 'Z%7P,
Í8$Q>
-4gU*0!KÌY¢ ¦ sÎ
,
M
*%*‚£Y: y&PK&*'.'.l30
US*%K%'K¡ x › Ç y ¦TÏ 4)&
x s
sy
Ë&Ð x › Ç y ¦ ¢ % Ë ¢ x › Ç y
;nh4g4)0'.U7 'Z%7gg1A3%*%5#› Ç > *%
(A'#7
(
%"6 5 *0'.' Q47 +8'.%+K,A*%*%4)0(77
&t
&C1 x ± Ë ¢ x › Ç y.ÎAŽ
y
x sŽAy
Yњ ¢ x › Ç y ¦
Ò Ó ®a¯ &C1 x ± Ë ¢ x › Ç y.Î
ŽAy
x %'=%'=
*%A(
AS'=P2¸ 19 SS'.VP 'Z%7 *0
'.'
1A
%*%% '`%|K
S'.'.0*%0`0'.!0%
9h
*'.%' 9'. } €
,
M&%*0' : y
%'.&%%39g'. 'g% x ˆ6yK'.%%*0FA'.2S'.G0
*%% U%'.&%0
9P
3*'.%' x ‘‚I#LKy!:j‘kIMLÔS'. 'FGN5
/
*0A%'&.%j A1S-0'Z ->&<4 FK(
0"A 'Z
'.&"
%
U
+K&*'.'#& 9.A0' x %5"6 !
)%A3y!t
x s ˆ6y
ËA¢ÕAÖ× x › Ç y ¦ x › Ç ± ›Y¥ ¢AyZØÙuÚ ¯ x › Ç ± ›Y¥ ¢
ya±mŽ*%
(iÌY¢
^#&8ÙÛ 1&'. 9')g1>9A*%P4)%/n *0'.'M"
0
Ü&"
/
g7.%CF&C1&'.'.0
U:‘‚I#Lz3')> 5'.S '.'Z/
,S*%*¤
11*%0 @EG4)0+"A%&@K1 %&%AB1
*% 'P} €n:
SIGKDD Explorations.
^M4i "6&QA‘‚I#LB H> &*8
11*%0 8M(
!C1 '.'.%A
Q9'.% -j$S2>!h
k1%&
' x (A 'yh%'H2S!(!
3G2$S8>&K'1*% ' x 1% 9'y!:L-'gF&A'. †$S &AQ
i7.C8ÙÝ%'&C$. *g*0(
A:h$S' Q6
9g'
1*% 'Z%7
`ÙÞ4)%*0*‚>'.%(AS*0Q>3F%')%$"6&'.K4)%*%*R>S3!l3 k:
JM 'ZH'.Se6&=& 9.A0'%'$S'a'.%%*0hK‘kIMLgQ4)8'. "9/
&*e6&V%XY! &' :;<K
'.'.S&'#3M8 "
0
27.C
Ùß%'50(AA
*q:àL#'F
\*%%&QK+4AS*0o>V%1>A'.'.%*0
1>&.,
Å& '.'.P
*% S*0%A'M4)AS#%'M'.'.S1/
%Ak:GL#*%'.QH‘kIML”S'. '=4_ *0
'.'2 9.A0' Q4)&'84
S'.2'.Se6 G 9.A0' :7L#G%1>
.
9g '.†9S 2
i%'
q&=%'K3M&24)%*%*H>'.A8(A 'K[=4)%! ° ½œ ¢ ¦ 7[
*%*R£Y:`w$SP(A& 'j4)%*%*R->KS'.+0F *0'.'.%l%
‚:
4. RESULTS ON THE SRBCT DATA
%'19 S84
'=
11*%%VFwƒ#vj…»FK} ‹{'. }s[h 1*%& '.S*' :hj"
*%Si
k¿—%’H†9S30
x ˆ6y
4j'KA'.&E9U11*%%( /{[A*0U&
'.'Z/<"
*%0%A‚:=v`
U
&
'.'Z/<"
*%0%AB&.
3]&'Z&.24!P%%%? E4) ¿ ¦ ~ É ˆ~:-g&.M&S"6 '#g'.4)U%Vra%(AS=ˆ:)g!/
'.S*%(g'.SeA 9.
'#'.4)7%Fra%(ASMŽ x 3'y!:
%'=$ *`1$S E? &U&
'.'Z/<"
*%0%A@&.
'2
]? &
'Z2&.
' :E;<=†9Sm~AˆG(
' :E$S' Qh,
2%'U'.&Q
'ZR'.Se6 K& 9.A0'k1$S '‚
& S`1%&%A'RS'Z/
%(2&*%"6 *+,&4z(A ' :
ra%(AS8~F'.4)'#g~6ˆ7(A '#M4i&=S'.P7 *0
'.'.mw$ƒ-/
vj…'A(
&!4)%=H"A*%S&%a'.Se6 g 9.
'a[
!UHK[
SK&*'.'. ' :-JMg)g(
')4)VA/n? &
1>A&$'j%U8(A%"6 + *0
'.'#g
*%
'Z#8SS
*%*7&C *%S'.%"6A:
ra%(ASF‰+'.4)'g 'Z%7E1A3%*%0&'8)> *%A(
%(P
Volume 5,Issue 2 - Page 50
ä
1
5
8
10
15
22
34
52
Size
81
133
206
339
598
1020
1668
2188
2308
ã
0.8
æ
ä
te te
tr
0.6
ä
æ
Error
0.4
æ
ä
å
te
0.2
te
æ
ä
å
cv æ
0.0
0
te te
æ
æ
cv
te
cv
te
te
te åcv ætr
tr
æ
tr
te te te te te te te
cv
cv
æ
æ
æ
å
tr
te te
tr
tr cv tr cv tr cv tr cv
å
â
2
á
4
6
Amount of Shrinkage Delta
ra%(
Sˆ$tg8&.
g S"6 ' x .%%(tM.!Ü( kQa&
'.'Z/<"
*%0%A‚tK "ÜYQR3U 'ZtMÜ*%Sy# '.S*%(F,
ç
11*0(+&'Z
'.Se6 U 9.
'#2wƒ#vj…z$:8"
*%S2¿ ¦ ~ É ˆ~+%0%? &'-8&
'.'Z/n"A*00%AG&.
MA:M;<#1$S 'MF'.&M
~Aˆ(A ' :
SIGKDD Explorations.
Volume 5,Issue 2 - Page 51
BL EWS NB RMS
813841
859359
207274
296448
898219
784224
796258
244618
789253
298062
461425
1409509
42558
769716
25725
44563
325182
812105
41591
810057
52076
866702
814260
43733
357031
1435862
770394
377461
1473131
295985
241412
80109
183337
233721
897788
563673
504791
212542
365826
204545
308163
21652
486110
ïtissue plasminogen activator
øquinone oxidoreductase homolog
í
íinsulin-like growth factor 2
÷insulin-like growth factor 2 (somatomedin A)
ñhomolog of mouse mesoderm specific transcript
growth factor receptor 4
öfibroblast
sarcoglycan alpha (dystrophin-associated glycoprotein)
èEST
presenilin 2 (Alzheimer disease 4)
ïtroponin T2, cardiac muscle isoforms
îmyosin MYL4
ïtroponin T1, slow skeletal muscle isoforms
õ
amidinotransferase
òL-arginine:glycine
neurofibromin 2 (mutated in neurofibromatosis type 2)
ñ
farnesyltransferase 1
êfarnesyl-diphosphate
growth associated protein 43 (GAP43)
ô
óN-cadherin (neuronal)
gene from chromosome 1q
îALL1-fused
meningioma 1 (disrupted in balanced translocation)
ëcold shock domain protein A
òneuroblastoma protein (NOE1)
ñFas-associated protein tyrosine phosphatase 1
lymphoma variant translocation protein 1
êfollicular
glycogenin 2
ð
ïtumor necrosis factor alpha-induced protein 6
MIC2 surface antigen (CD99)
Fc fragment receptor transporter, alpha chain
ëIgG
caveolin 1 (caveolae protein)
ïtransducin-like enhancer of split 2
EST
factor 1 (ets domain transcription factor)
îE74-like
major histocompatibility complex, class II, DQ alpha 1
îmajor histocompatibility complex, class II, DM alpha
í
growth factor binding protein 2
ìinsulin-like
receptor type protein tyrosine phosphatase F
éantiquitin 1
êglutathione S-transferase A4
ëcDNA DKFZp586J2118
êgrowth arrest-specific protein 1
EST
éEST
alpha 1 catenin (cadherin-associated protein)
èprofilin 2
ra%(
S=~t)="
*%S 'M ° ½œ ¢ [M~6ˆ(A '#,
#4)%!VK*%'ZKA ° ½œ ¢ %'MA? &[M=w$ƒMv…—:)J#
3#=(
&'
4)5A? &'.SeA +& 9. A0')%5!5&*'.'Mg
*%A'Z-8SS3*%*%7!C&*0S'.%"6A:
SIGKDD Explorations.
Volume 5,Issue 2 - Page 52
*0
'.'h[i!213% 9:hr
hA'ZH1% 9' QA 'Z%7
1A
%*%]
)> *%A(A%(U5.S *0
'.'=4j'8'.%(Al3$*
(!=
U8 'Z%7V1
3%*%%<G
>&*0
(
0(++
9
&K *0
'.' :;nK%'g
*%'.+09&&'Z0(+F&C
%g8 'Z%7
1A
%*%% '5
gl"65 'Z+'
1*% '73+4i&G
5wƒ#vj…' :
x '.Ml"6M'1*% ')M7e64)5= *%A7#(
1‚: y
J#
j3`) 'Z%721A
%*%% 'i[i '.jl3"6j'. '
'.%(
l39*%]*%4i&=3
E 'Z%7 ]1
3
%*0% 'g[8
.S=wƒ#vj…' :
5.
DIAGNOSIS OF CANCERS WHERE NO
SUBTYPES ARE KNOWN TO EXIST
b]3"6'. &]38&'Z2'.SeA ] &$.
0'=3
'1>
/
&$0*`5>+1>4i&.[S*`9
*`[0
(AA'.%(U &&:7bW '.&"6&
*R
&K'.S<1>&'#ge4)5&C0'ZQ3%#US'.g(A !C1 '.'.%AU%,
70
P0'Z%(AS%'.5>!4& P'.S<1>&'
S'.%(2'.7*%*‚'.&-h(A ' :
^#4 "6!QA 'Za'.Se6 &$.
0'%')'.S1>&"$%'.g*0 %(
19 SA:;nH
*=>)
11*%%2%'. 'H4)&j'.S1> '
&&=*A$Ee$4)VP!C%'Z:8pM[
.S3 *9Q‚+'.S!
'.S<1>&'#"A=>& V 9l3P,
7$F1> '-
` &:#;
'.S!W '. ' QM 'Z5'.SeA T &$.
0'P +>]11*0%
S*% '.')'.A1S0"AK'.S1> '5>0 9l3 k:
N59U1> 'M !g8'.S'.1> & VF>2
*0& S*0*V&./
!A(A& S' :ùrV0'Z
AQ=XYS'.m*0(A@vH/<& *%*8*1A7
x IK‘Rvj…‘Ry20'P
'Z A
m[ú
K*1A7UA(
S*%' :ÔL-11C%7 *W~A6“¨
IK‘Rvj…‘O13% 9'P&'.1>A3
V &19m3] "6!:G7 7
%3&=4)0*%*jS'.S3
*%*
'.S S257%'.
'.A:F} •HsAs!H;<K%'> *%% "6 VM%'%'Z/
! 13&D%'2F&'.S*
#"
0%A¤
(VI‘‚vj…‘‚'2
*% S*0#*% "A *q:
vHV%'.&*%<QR&'Z'.Se6 U& 9.A0'
K>8S'.5+/
(AA'.F+1% 94)]I‘‚vj…‘H:‚;<`4+
g'.S1> 'K
IK‘Rvj…‘z4i&5e$4)B]&C%'ZF
¤A5'.S1>+4i&5
(A( '.'.%"6235=
&' Q>4= AS*0Ul 'ZM'.Se6 & 9.A02 *0
'.'.l&`K&&%i4)%2'.S1>%'h1 '.&$h%2
(
%"6 P1% 9:h^M4i "6&Q='.SF'.S<1> '3"6#> F *%*
0 9%lk:
J# "6&. *% '.' QYh4=S'.g 'ZK'.Se6 5 &$.
0'#
(A&&
4)5S'.S1>&"$0'. P*%0(&$' Q%')1A*% ‡> '
.&
*%A:8r
!C1*%AQh}s&`3
*? G70!6. G7'.&
&A'.%'Z%(F
h8&C1 '.'.%AV*% "6 *%'M`ˆA€6Ž~+(
&'#[ˆ
€FI‘Y/
v…‘T130&$' :pM'.%(+%&%*i *%S'Z&%(m} ށ{Q‚&V0&/
lG<451S%"62'.S(AS1'g
jIK‘Rvj…‘hQ‚4)0]&c.|…
vH/<*%%e6)I‘‚vj…‘‚f3]c
&%"
vH/<*%%e6jI‘‚vj…‘H: fg&2
F13% 9'74)o|…‡v`/n*%%e6UI‘‚vj…‘— 3@m*%%"6U*%A(
&
51% 9'4)U
!0"
5v`/n*%0eAI‘Rvj…‘h:
L-*AS(AP%')&'.S*-0')%9.%(AS%(Q3-3')*0%FS%*%%<5'-
0
(AA'Z%79A*q:5^#0!%
*` *%S'Z&%( x 2
9G
!8<1>
M&*0S'Z&%(6y8
BA*D>P11*0%DGG*0(A5(AS1@M1/
% 9' :Frg%'K
'.
‚Q&*0S'Z&%(59G'. *j=>S'.
&A'Z.S&i0
(
A'Z%)9A*q:H;<kK1% 9H0'i0
(
'.4)%
IK‘Rvj…‘hQ`&*0%0&28S'Z‚>
*%`-1>&.[d-0
(
A'.%'hA
#%3%"$0S
*k13% 9:i^M#4
KS90*‚*0(
8(AS1
a&)1% 9'jK0
(
A'. F4)+IK‘Rvj…‘G'.83j#
11*%+ *%S'Z!0(22K 9(
AS1‚:
J#&'Z'.Se6& 9.A0'iF"6! Aj%'iû S*%<9:L),/
!21S%"6S
='.S1> '"AF>& m0 9l3 kQ
7
11*%o&'ZU'.Se6 W& 9.A0'P¤.&1P¤0(A
'.
4)%!'.S<1>i0'h1 '. 9H0
%3%"0S3
*$1% 9:abm`4AS*0
A1>2K2'.S"%"
*a% 'K8130&$'KXY&g>&<4 1%&+'.S<1>&' :
SIGKDD Explorations.
bm 'Z P%'#0A5gI‘‚vj…‘@
-}s&{:#&g4&
ˆA€713% 9' QY
H4)%!EŽ9s=4i&2&*'.'.l3G
'K"%(+&%"A
v`/n*%%e67I‘Rvj…‘W3¤s‰+4&F *0
'.'.l@
'=3"$0(V|…uv`/n*0%e6
I‘‚vj…‘H:b]H3
*g%"$0M '.ˆ
€-1% 9'R%9Mj.
%/
%('.!#s‹813% 9'-
P8&'Z)'.&-
s ‹13% 9' :b]Ml-
'.Se6& 9.A07 *0
'.'.l3&jggs‹.
%0(81% 9' :H
&
'.'Z/<"
*%0%A@&.
84j'2%%%? V4) B¿ ¦ s É •AŽ$:
%'+A1%7*#$ *MS'.W€6Š](
' :u;nF1$S Wˆm&
'.'Z/
"
*%0%
U&.
'#
Uˆ8 'Z#&.
' :-NPg01>.
9*9Q4j/
"6!Q9130&$'H%j1%&7|…@v`/n*%%e6)I‘Rvj…‘5&*'.'*%%"6
'.%(A%l
9*+*0
(
&)3
+130&$')%+K1 %&5&%"A
v`/n*%%e6KI‘‚vj…‘] *0
'.' x '. ra%(
Sg€6y!:`$S' Q 'Z-'.SeA 9.
'+ \>VS'. ¤m&*01\0
(
A'.G &+ "6&T=
&-'.S1> ')e$4)+!C%'Z:
6. CONCLUSIONS
IKJMLü0!6. '5"AVE1> 90
*KD "6
*0S%A%? G
4j U4i0(A
'.
5. &&:;V&K7[S*%*%VS%*0%? &%g1> 90
*qQa4i "6&Q‚48S'Zg "6 *%
1G9
*0'g53*%? &5:hJM 'ZM'.Se6 F 9.A0'-%'-21>4&.,S*‚9
*‚,
#&C$/
.
!0(S'.&,S*k%[7%A5AÅ%&6. 5:iv`+0 /
,%(g#(A 'i3)# & '.'.78XY& 90#>&4i XY&&$<1> 'g
) &Qa%=m&*01ES'Pl3E 30&'
[h
%'.%($%>$0&'a[
H%2S
'Z
%%(:NP "6&QAAS!/
'.S*'M%'.8=1>A'.'.%0*%Gi& %(+0
(
'Z08 'Z'M
'.
AUƒ-JKLz&C1 '.'.%AU*% "A *%' Q>1>&31'-S'.%(ƒHi/<„`…iƒ=:3ra0
*%*9Q
47"A'.4)m3=7&$A*%A(]%'='Z0*%*S'.&[S*`,
&'Z/
%7%(+'.S"$%"
*h
) !21% 9'4) E5'.S1> '
3"6j>& 0 9l3 k:hNPM S-0
(
'. 'i&*01&*0%/
% 0
'8V(A%"67!@13% 98+11A10F&
199Qa4)0
4)%*%*H%&
'.823 &'M3K=130&$M4)0*%*h'.S"$%"6AQ‚3
&*01=K'.131% 9,
»'.0j!XY &'
YS '.'.
. 9' :
7. ACKNOWLEDGMENTS
’05v
74j'[SD9B
BJwr»|#AS3Pƒ- '.\r */
*%4)'.%1‚:Hƒ-A>&.0'.
>4j
'j1.0
*%*F'.S11>
.9JM;<^
(
9gŽƒM$s2…LKŠAŽ6Ž‹QR
3UJKwr@(
9MIgNUw$/<•A•AŠ$s&~66‰9:=w&"$/
&*a
hKl(AS 'M=&1$S +AÔ
U*%0!.% *%8$
ƒ-A>&.-%'.
k3-4
')1S*%%'.F%U„HJMLKwV}%s {:
8. REFERENCES
}s!=L#'.uL=:L#*%%?A ‚QFNP%3 *7v#:g’h%'. ‚Qƒg:g’%¤I"$%' Q
…iN5$Q-;? 0GwY:‘‚A'.'.
' QKL-3$
'7ƒ-A'. 94j*0kQ#ýA /
[&]…#:KviA*0%e>Q8^Mþ &mw>&Q=kSD‚‚Q=ÿM%iSkQ
ýAA;:)„a4 *%*qQ‘k%
( (QK|M!
*0z’j:#N5.qQK‚
NP9
AQý6
&'^MS'.AkQ$ý
:%Q‘‚%'.&P‘kS‚Q$I"$0Fv-:‘‚&4)%' Q
ƒ#
>&.D%'.
qQP|K"$0_w$&*%9eYQ2bW%(—…#:8…i3
kQ
09…#:
|# %&QIK %'‚I2:!bm %'. $S(A!QAýA
'a˜8:
L-
(
AQƒ#
(A&‚bDe6AQƒ-A
*0g‘‚ "99Q bT
\bW%*/
'.A‚QNP%3 *>ƒ=:$|# "6&QýA
P…#:$v`$kQ$IK"$0vi'Z %‚Q
„.%!e+˜8:v`4)kQ33F‘‚AS%'-NE:>w9
S3$Q
!#"%$'&(!)+*,- %./0 .1"2 3- )4&(56879*
$
:$ 3 (&
QJMS;<5 = x Ž

A 6y!Q>‰

ˆ? >‰9sAs
:
} ށ8NP03 *>v#:6’H%'. kQ$„
S*>M:$w$1> *%*%7
‚Q6„.%e˜8:Av`4)‚Q
3I"$—vi
'Z %kQA@B C
,$D"E"9 E
F"E G HI#"E &( .1*IJB K87$
G"E,$ Q„h9 &%(
'E
JK%
3
*3L#
=2w$ % &'LM x s•
•A‹6y!QYs&~6‹A€
ˆ?> s ~6‹
€A‹$:
} ˆ=M: ƒg:|MA*%SkQPI2:8–:2w*%A%5Q5„a:8a
7 9QU…#:8^MSYQ
NE:`|KA
'.&>& e>Qiý:H„:hNP '.">Qi^8:h…i
*%*0!Q)NE:H‘H:‘‚AkQ
Volume 5,Issue 2 - Page 53
Training Data
Probability
1.0
BL
•••••••••
••••••
•
•
0.8
EWS
••• ••••• •••
• •
•
NB
••
•
0.0
••
•
0.4
0.2
•
•
0.6
••••
RMS
•••• • •••••• •••
•
•
•
•
• •
•
•
• •••
•
•
•
• •
• • • • • •• • • •• •
•
•• •
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•• • •• • • • • • • • • • • •• •• •• • • • • •• • • • • • • • • • • • • • • • • • •• •• • •• •• •• • • •• • • • • • • • • • •• ••
0
10
20
30
40
50
60
Sample
Test Data
BL
Probability
1.0
•
EWS
• • •
• •
NB
•
0.8
0.6
•
O
0.4
0.2
0.0
•
•
O
•
•
RMS
• • •
•
•
O
•
•
• •
•
O
•
•
O
••
•
•
•
••
•
•
•
• • • • ••
•
•
•
•
•
••
•
•• •• •
•
• •
•
• •
•
•
• • • • •
• •
•
• • • • • • • •
5
10
15
20
25
Sample
ra%(
S+‰$t8’h'Z07E&*'.'81A
%*%% '=,
=7w$ƒMv…O$:5w
1*0&'g713.%%
]9U.S *0
'.' x S11>!!yK
3G1%&
&*'.' x *04i&!y!:HL#*%*>
‚#€Aˆg.
%%(='
1*% 'j# *0
'.'.l3 + . &*9Q
')-MŽ
g 'Zj'
1*% 'i3jMe$4)78>Mwƒ#vj…' :
ra%"6-
Y 'Z'1*% '`#
&S
*%*jw$ƒMvj…' 9&#7e684)7 *%A:hJ#
j3` 'Z%7 *0
'.'i1A
%*%% '
g'.0(
l3 9*P*%4&)
+1
3
%*0% '-
a
&j 'Z-130&$')%P 5 *0
'.' :
SIGKDD Explorations.
Volume 5,Issue 2 - Page 54
1.0
0.6
0.4
0.2
Survival Probability
0.8
low−risk patients
high−risk patients
0.0
p=0.046
0
20
40
60
80
Survival in Months
ra%(
SF€twS"%"
*h S"6 'g
j10!]'.S>&*'.'. '2jI‘‚vj…‘\:5ra'ZQ%&!0 * *%S'Z&%(54j'211*0%GV
s ‹/n13% 9c<.0%(='.&: f]i4i20'Z%&'.S> *0
'.'. 'jRI‘‚vj…‘U4&-0 9l3Y:h„% 9'%7A#'.S> *0
'.'A'.%(A%l
9*7>&.&
'.S"$0"
*h3E130&$'g%]
&='.S> *0
'.' :5˜M&2 '.'.S> *0
'.'. 'g4! 9l3YQP 'Z='.SeA E& 9.A0]$&*
4
'gl=S'.%(P '.5s ‹P.
%%(5130&$' :+pM'.%(P%'g$ *qQ m130&$g%DTs‹/n'
1*%@c<&'Z8'.!f54j
'= *0
'.'.lD
'g !
c<*%4j/{%'.e$f7
Fc0(
/{%'.e>: f]Uc<%(A/n%'.e$f1% 9')3AF'.%(Al
9*P1>9&-'.S"$%"A*q:
ý:ƒ=:IK4)%(QYNE:3L=:Y…*%0(
%SqQ‚…#:3I2:vi*%9Al *0kQ3
3
’j:w>:a‘‚
3&OQ N P8#"%$+#"E
3Q
"E "E$SR4+#"E
$!"E +#"E
-$8 ' A)U&(5V87$8
A. *
$E(T &
QYw&0& XWYZ x s•A •
•6y!Q> ‰
ˆs>‰ˆA€:
} ~gL=:RI2:a|M
‚Q@B#"E
3"% Q`…i
17
[‡^K*%*{QHv`9
ƒ AkQ3‘‚L=QYs•
•A•$:
#
} ‰g‚ "A
-^K'Z0
Q3ƒ-A>&.#%'.%qQ3IK"$05vi'Z %‚Q3
3
„.%e vH4)k]
Q \?$ 0"%$ (&
87$8
$
Qk|M Av`0
*%A(
A W_T ^`a x Ž

T sy!Qas >sŽ$: } €g‚ "A
F^K'Z0
Q)ƒ#
>&.+0'.
qQM3¤ýA&AVr0 /
7
kQcb_0XP. d I"E
'
"9eP
"%$C&(R "EI"V.VC&Ef
$
A"E $
' Qw1%(A&.I/ gH&*0
(Q#JM!h
4 `
e>Q
iJ 8Q Ž
A$sA: } Ё;(
0 ^M [
*%e>QçI"$0 IKS(
(6
kj
Q K0
(ۅi&‚Q
NP%!
*TƒM
7
!QNP%!
*Wv`%.&Qƒ#%3úw/
AkQ`„
S*-NP&*%?&&Qjv..B|MS'Z!'.A‚QjN5
*)’H'Z&*0*%&Q
N5eGƒ#XY *0klQ k>A3
e$%qQRL#gvi /nIK
QR’`$4j
IK
S(A&.9Q`ýAS3V–AA&‚Qi‘‚Se
'2viS>&3.Q`bW%*%0
r *%AQAw$&[
0-„H.*0S(6$Q$wl>-|#S"$>&(
&QJM%e$*0
'‘k
/
7
kQ˜M'.e
)ýA
3'.'.
‚Q!
^ m eAF˜M*%'.'.A‚Qvi&
þ.%FbW%*/
[
3YQ|MS02wS&Q˜M*%*%/„a:9–g*%*0%
% qQL#e6#vi
(Q
3
ýA&X>!àk 9Qonpq*d87$
r$ 3PsHr0$8 I"E$
)$
8"E
V
"E5$Qj+JM&4‡’H(A*0
Bý
AS
*)
KNP0&0
=;; x ŽA$sy!Qk‰ˆA?• >$‰~A‹:
} ‹gý6"6–
kQ9ýAS7wY:
bm qQ$N5e$S'Hƒ#%(
Q &t Q9‘R
^=:$wA
*qQ
N5T‘R
9qQFr
ezb] 'Z&7
‚Qr
e»vi&.A*0kQ
N5
,Bw$!94j‚QH…`%'Z%3Uƒg:aL-9A&'. S‚QH…'Z B„a&/
&'.
‚Q#
\„
S*wY:NP *? &Q:@B#"E
3Q
"E u"E o '"S&9*
SIGKDD Explorations.
'/$8 ' "E$UCHC&v&(5187$
4$ *
3x (&v"E "%$ 3 '"9 5$"95qJ $8w%QJMSMNP 0&0 x
Ž
A$sy!Q3€6Š
ˆS$> €AŠ
•:
} •gJMA/n^M$(
e$0k¼ '@‘k1A7z…i*'.'.l30
̈́hþZ !QAy
+ '
"95 "9 "E H0z,$5"E 5"9q %./0 .1"{
&E$ ?K+#"ET 3Q
"E *0 S &?w%q | 1 %./0 .1 " QRv`*09$ YL x s•
•6ŠAy!Q3ˆA•
A•?9> ˆA•$s‹:
}sgƒ#
>&.¤0'.
qQG‚ "6T^M
'Z%AQVv*0
'.S
70
JK'.%3
kQ#
3o|M%*%>&.U…iSkQz'"S&E .V 5P
"%5$v1)60C$Cw}o$ &(q87$8
Q
„h9 &%(
'+K+JK0
3*ML-A 8@=w % ':LL
x Ž
AAŽAy!Q3€6‰
€AŠS9> €6‰AŠ
Ž$:
}sAs!=ýAS*%%NE:5Hg A'.
Q@c$$
i"}$ "}0V H0{.1"E"S&(.
*0 S &?w%q| : E.i0 .v
" QHw$ %3'%@˜M A*%
(
oWM
x s•A•
‹6y!Q~6‹AˆS9> ~A•sA:
Volume 5,Issue 2 - Page 55
Related documents