c c program calculating Friedman-Rafsky generalized correlation and c prediction measures. c c Friedman J. H. and Rafsky L. C. (1983). Graph-theoretic measures of c multivariate association and prediction. Annals of Statistics, 11, c 377-391 (June). c c version 10/10/84 c c coded and copyright (c) 1984 by: c c Jerome H. Friedman c c Department of Statistics c and c Stanford Linear Accelerator Center c Stanford University c c all rights reserved. c c c signif = frcorr (graph,n,p1,x1,p2,x2,k1,k2,deg,grph1,grph2,iscr) c c integer graph,n,p1,k1,p2,k2 c c real signif,x1(p1,n),x2(p2,n),deg(n) c c integer grph1(k1,n),grph2(k2,n),iscr(n,2) c c signif = normal significance of test. c graph = flag for type of graph in each space. c = 'kmst' : k-mst graph. c = 'knnr' : k-nn graph. c n = sample cardinality. c p1 = dimension of first space. c x1 = data matrix of first space. c p2 = dimension of second space. c x2 = data matrix of second space. c k1 = order of first space mst(nn) graph. c k2 = order of second space mst(nn) graph. c k2 = n flags second space as the response c for prediction measure. c deg = scratch storage for frcorr. c grph1, grph2 = scratch storage for frcorr. c upon return they store the k-mst(k-nn) c graphs for the two spaces. if k2 = n then c grph2 stores the distance matrix scores c for response domain. c iscr = even more scratch storage for frcorr. c c labeled common: c c common /print/ iprint c c iprint = print flag for program. c = 0 : no printed output. c .ne. 0: printed output. c default value = 1. c block data common /print/ iprint data iprint /1/ end subroutine knear (m,n,points,knr,graph,scr) integer m,n,knr real points(m,n),scr(n) integer graph(knr,n) do 110 k=1,n km1=k-1 kp1=k+1 do 10 i=1,knr scr(i)=1.0e20 10 continue index=1 j=1 go to 30 20 j=j+1 30 if ((j).gt.(km1)) go to 60 dist=0.0 do 40 i=1,m dist=dist+(points(i,j)-points(i,k))**2 40 continue if (dist.ge.scr(index)) go to 20 scr(index)=dist graph(index,k)=j do 50 i=1,knr if (scr(i).gt.scr(index)) index=i 50 continue go to 20 60 j=kp1 go to 80 70 j=j+1 80 if ((j).gt.(n)) go to 110 dist=0.0 do 90 i=1,m dist=dist+(points(i,j)-points(i,k))**2 90 continue if (dist.ge.scr(index)) go to 70 scr(index)=dist graph(index,k)=j do 100 i=1,knr if (scr(i).gt.scr(index)) index=i 100 continue go to 70 110 continue do 150 j=1,n do 140 i=1,knr nij=graph(i,j) do 120 k=1,knr if (graph(k,nij).eq.j) go to 130 120 continue go to 140 130 graph(i,j)=0 140 continue 150 continue return end subroutine primk (m,n,points,ftree,ltree,mst,ui,ji,nit) integer m,n,ftree,ltree real points(m,n),ui(n) integer mst(ltree,n),ji(n),nit(n) do 100 ktree=ftree,ltree ktrem1=ktree-1 nitp=n-1 kp=n do 10 i=1,nitp nit(i)=i ui(i)=1.0e20 10 continue 20 do 80 i=1,nitp ni=nit(i) if (ktrem1.le.0) go to 40 do 30 itree=1,ktrem1 if (mst(itree,ni).eq.kp) go to 60 if (mst(itree,kp).eq.ni) go to 60 30 continue 40 d=0.0 do 50 i001=1,m d=d+(points(i001,ni)-points(i001,kp))**2 50 continue go to 70 60 d=1.0e20 70 if (ui(i).le.d) go to 80 ui(i)=d ji(i)=kp 80 continue uk=ui(1) do 90 i=1,nitp if (ui(i).gt.uk) go to 90 uk=ui(i) k=i 90 continue kp=nit(k) mst(ktree,kp)=ji(k) ui(k)=ui(nitp) nit(k)=nit(nitp) ji(k)=ji(nitp) nitp=nitp-1 if (nitp.ne.0) go to 20 100 continue return end function frcorr (graph,n,p1,x1,p2,x2,k1,k2,deg,grph1,grph2,iscr) integer graph,n,p1,p2,k1,k2 real x1(p1,n),x2(p2,n),deg(n) double precision e1,e2,tc1,tc2,expect,var,a,b integer grph1(k1,n),grph2(k2,n),iscr(n,2) common /print/ iprint integer kmst data kmst / 'kmst'/ nm1=n-1 if (graph.ne.kmst) go to 10 lim=nm1 call primk (p1,n,x1,1,k1,grph1,deg,iscr,iscr(1,2)) if (k2.lt.n) call primk (p2,n,x2,1,k2,grph2,deg,iscr,iscr(1,2)) go to 20 10 lim=n call knear (p1,n,x1,k1,grph1,deg) if (k2.lt.n) call knear (p2,n,x2,k2,grph2,deg) 20 if (k2.eq.n) call rnkmtx (p2,n,x2,grph2,deg,iscr) do 30 j=1,n deg(j)=0.0 30 continue do 50 k=1,k1 do 40 j=1,lim if (grph1(k,j).le.0) go to 40 deg(j)=deg(j)+1.0 deg(grph1(k,j))=deg(grph1(k,j))+1.0 40 continue 50 continue tc1=0.0 e1=tc1 do 60 j=1,n e1=e1+deg(j) tc1=tc1+deg(j)*(deg(j)-1.0) 60 continue if (k2.ge.n) go to 110 do 70 j=1,n deg(j)=0.0 70 continue do 90 k=1,k2 do 80 j=1,lim if (grph2(k,j).le.0) go to 80 deg(j)=deg(j)+1.0 deg(grph2(k,j))=deg(grph2(k,j))+1.0 80 continue 90 continue tc2=0.0 e2=tc2 do 100 j=1,n e2=e2+deg(j) tc2=tc2+deg(j)*(deg(j)-1.0) 100 continue go to 140 110 a=0.0 b=a do 130 j=1,n colsum=0.0 do 120 i=1,n colsum=colsum+grph2(j,i) a=a+grph2(i,j)*grph2(j,i) 120 continue b=b+colsum**2 130 continue 140 d=0.0 do 190 j=1,lim do 180 k=1,k1 jpar=grph1(k,j) if (jpar.le.0) go to 180 if (k2.ge.n) go to 170 do 150 i=1,k2 if (jpar.eq.grph2(i,j)) d=d+1.0 150 continue if ((jpar.eq.n).and.(graph.eq.kmst)) go to 180 do 160 i=1,k2 if (j.eq.grph2(i,jpar)) d=d+1.0 160 continue go to 180 170 d=d+grph2(jpar,j)+grph2(j,jpar) 180 continue 190 continue e1=e1*0.5 if (k2.ge.n) go to 240 e2=e2*0.5 expect=2.0*e1*e2/(n*nm1) var=expect*(1.0-expect)+(tc1*tc2/(n-2)+4.0*(e1*(e1-1.0)-tc1)*(e2*( 1e2-1.0)-tc2)/((n-2)*(n-3)))/(n*(nm1)) std=dsqrt(var) xstd=(d-expect)/std frcorr=0.5*(1.0-erf(xstd/1.41421)) if (iprint.eq.0) go to 290 tc1=tc1*0.5 tc2=tc2*0.5 write (6,200) 200 format ( 37h0friedman - rafsky independence test.) write (6,210) e1,e2 210 format ( 22h edges in each graph = 2g13.5) write (6,220) tc1,tc2 220 format ( 30h edge pairs with common node = 2g13.5) write (6,230) d,expect,std,xstd,frcorr 230 format ( 15h shared edges =g13.5, 9h expect = g13.5, 5h 1+/- g13.5/ 1h g13.5, 46h standard deviations with normal si 2gnificance g13.5) go to 290 240 expect=e1*n nm3=n-3 nnm12=n*nm1*(n-2) var=(e1**2/nm3)*(n*(3*n-1)/3.0+4.0*(a-b)/nnm12)+(e1/nm3)*((2.0*nm1 1*(n-4)*a+4.0*b)/nnm12-n*nm1*(n+2)/3.0)+(tc1/((n-2)*nm3))*(((n+1)*b 2-2.0*nm1*a)/(n*nm1)-n*(3.0*n-4)*(n**2-1)/12.0) std=dsqrt(var) xstd=(d-expect)/std frcorr=0.5*(1.0+erf(xstd/1.41421)) if (iprint.eq.0) go to 290 tc1=tc1*0.5 write (6,250) 250 format ( 35h0friedman - rafsky prediction test.) write (6,260) e1,tc1 260 format ( 17h edges in graph = g13.5, 23h share common node 1= g13.5) write (6,270) a,b 270 format ( 40h distance rank matrix parameters (a,b) = 2g13.5) write (6,280) d,expect,std,xstd,frcorr 280 format ( 17h graph rank sum =g13.5, 9h expect = g13.5, 5h 1 +/- g13.5/ 1h g13.5, 46h standard deviations with normal 2significance g13.5) 290 return end subroutine rnkmtx (p,n,x,matrix,scr,iscr) integer p,n real x(p,n),scr(1) integer iscr(1),matrix(n,n) do 70 k=1,n do 20 j=1,n iscr(j)=j scr(j)=0.0 do 10 i=1,p scr(j)=scr(j)+(x(i,j)-x(i,k))**2 10 continue 20 continue call sort (scr,iscr,1,n) j=1 30 if (iscr(j).eq.k) go to 40 j=j+1 go to 30 40 if (j.eq.1) go to 50 it=iscr(1) iscr(1)=iscr(j) iscr(j)=it 50 do 60 j=1,n matrix(iscr(j),k)=j-1 60 continue 70 continue return end subroutine sort (v,a,ii,jj) c c puts into a the permutation vector which sorts v into c increasing order. only elements from ii to jj are considered. c arrays iu(k) and il(k) permit sorting up to 2**(k+1)-1 elements c c this is a modification of cacm algorithm #347 by r. c. singleton, c which is a modified hoare quicksort. c dimension a(jj),v(1),iu(20),il(20) integer t,tt integer a real v m=1 i=ii j=jj 10 if (i.ge.j) go to 80 20 k=i ij=(j+i)/2 t=a(ij) vt=v(ij) if (v(i).le.vt) go to 30 a(ij)=a(i) a(i)=t t=a(ij) v(ij)=v(i) v(i)=vt vt=v(ij) 30 l=j if (v(j).ge.vt) go to 50 a(ij)=a(j) a(j)=t t=a(ij) v(ij)=v(j) v(j)=vt vt=v(ij) if (v(i).le.vt) go to 50 a(ij)=a(i) a(i)=t t=a(ij) v(ij)=v(i) v(i)=vt vt=v(ij) go to 50 40 a(l)=a(k) a(k)=tt v(l)=v(k) v(k)=vtt 50 l=l-1 if (v(l).gt.vt) go to 50 tt=a(l) vtt=v(l) 60 k=k+1 if (v(k).lt.vt) go to 60 if (k.le.l) go to 40 if (l-i.le.j-k) go to 70 il(m)=i iu(m)=l i=k m=m+1 go to 90 70 il(m)=k iu(m)=j j=l m=m+1 go to 90 80 m=m-1 if (m.eq.0) return i=il(m) j=iu(m) 90 if (j-i.ge.11) go to 20 if (i.eq.ii) go to 10 i=i-1 100 i=i+1 if (i.eq.j) go to 80 t=a(i+1) vt=v(i+1) if (v(i).le.vt) go to 100 k=i 110 a(k+1)=a(k) v(k+1)=v(k) k=k-1 if (vt.lt.v(k)) go to 110 a(k+1)=t v(k+1)=vt go to 100 end