function [motiflist]=fuzzyMotifFunction(geneID,promoterlist,motiflen,fuzzypos,outfn) motiflist= enumerateMotifs(motiflen,'fuzzy',fuzzypos); fprintf('--- In fuzzyMotifFunction: motif list generated, total %d motifs.\n',length(motiflist)); ngenes = length(geneID); nmotifs = length(motiflist); okayoverlap=motiflen-1; siz = 4*ones(1,motiflen); grandtable = uint8(zeros(nmotifs,ngenes)); t0=clock; for i=1:ngenes if rem(i,100)==0 timepassed=etime(clock, t0); fprintf('--- --- %d out of %d genes done, elapsed time=%d\n',i, ngenes,timepassed); t0 = clock; save(sprintf('fuzzyMotifScript%d.mat',motiflen), '*'); end seq=promoterlist{i}; % **** seqc=seqcomplement(seq); promoterlen = length(seq); seqn = zeros(1,promoterlen); seqcn = zeros(1,promoterlen); % motiftable contains counts of each 4^motiflen word in both the template % and coding strand. A word in the coding strand is counted if it % it is not the same with any word in the last 'okayoverlay' positions % in either the coding or reverse template strand, or if that word has % not been counted in 'okayoverlay' times. Same rule applies to the % word in the reverse template strand, with the additional check that % it is not palindromic. motiftable = zeros(siz); prevmotif = cell(1,okayoverlap); for j=1:okayoverlap prevmotif{j} = ''; end prevmotifc = cell(1,okayoverlap); for j=1:okayoverlap prevmotifc{j} = ''; end for k=1:promoterlen seqn(k) = mynt2int(seq(k)); seqcn(k) = mynt2int(seqc(k)); end havenotcounted = zeros(siz); % when was the last time this motif was counted? % a motif is not counted if it overlaps with the % previous motiflen-1 motifs, or if this motif % has not been counted in the last motiflen-1 % positions. for k=1:(promoterlen-(motiflen-1)) currmotif = seq(k:(k+motiflen-1)); currmotifc = seqc((k+motiflen-1):(-1):k); %reverse complement word s= seqn(k:(k+motiflen-1)); sc= seqcn((k+motiflen-1):(-1):k); if (max(s)<=4 && min(s)>0) linearind = mysub2ind(siz,s); linearindc = mysub2ind(siz,sc); else % there are fuzzy characters in s (discovered for 1423rd gene % in top 5000 paired T2 list). fprintf('fuzzy %d.',k); linearind=-1; linearindc=-1; end % with every advance of k the matrix havenotcounted gets % incremented by 1. Later, if the current word is counted, then % havenotcounted(linearind) is set to 0. If the current reverse % complement word is counted, then havenotcounted(linearindc) is % set to 0. havenotcounted= havenotcounted+1; % did currmotif (coding strand word) appear in the last okayoverlap positions? overlapself= false; for j=1:okayoverlap if strcmp(prevmotif{j},currmotif)==1 overlapself = true; end end % did currmotif occur in the last okayoverlap positions in % complementary strand? overlapselfc=false; for j=1:okayoverlap if strcmp(prevmotifc{j},currmotif)==1 overlapselfc = true; end end % if it _does_not_ overlap with self, OR we haven't counted the % word in okayoverlap positions, count the word. if linearind>0 if ~(overlapself || overlapselfc) || (havenotcounted(linearind) >=okayoverlap) motiftable(linearind) = motiftable(linearind)+1; havenotcounted(linearind)=0; end end % if currmotif (coding strand word) overlapped with itself, then % the template strand also overlapped its a same word in its own % strand. Therefore, no need to compare with itself. Also, if % coding strand word overlapped with a previous word in reverse template % strand, then the template strand overlapped with a previous word % in coding strand. Thus, only need to check whether the word is % palindromic. if ~overlapself % if here, then currmotifc is sure not to have overlapped with % any previous words, in its own strand or in the reverse % complement. if strcmp(currmotif, currmotifc)==1 %is this word palindromic? overlapself = true; end end if linearindc>0 if ~overlapself || (havenotcounted(linearindc) >=okayoverlap) motiftable(linearindc) = motiftable(linearindc)+1; havenotcounted(linearindc)=0; end end % update prevmotif, prevmotifc. for j=1:(okayoverlap-1) prevmotif{j} = prevmotif{j+1}; prevmotifc{j} = prevmotifc{j+1}; end prevmotif{okayoverlap} = currmotif; prevmotifc{okayoverlap} = currmotifc; end count = zeros(1,nmotifs); % this is not yet fully optimized. it's better to % loop over all entries in motiftable (4^p) rather % than all entries in motiflist. s=zeros(1,motiflen); for j=1:nmotifs % ---- Tabulate all instances of this motif --- word = motiflist{j}; if isfuzzy(word,fuzzypos) motifset = reduceFuzzyMotif(word,true); for k=1:length(motifset) linearind = mysub2ind(siz,motifset{k}); count(j) = count(j)+ motiftable(linearind); end else for m=1:motiflen s(m) = mynt2int(word(m)); end linearind = mysub2ind(siz,s); count(j) = motiftable(linearind); % simply the count in the single motiftable entry. end end grandtable(:,i) = uint8(count); end % print results to file fn=sprintf(outfn, motiflen); fid = fopen(fn,'w'); fprintf(fid, 'Locus ID:\t'); for i=1:ngenes fprintf(fid, '%s\t',geneID{i}); end fprintf(fid,'\n'); for j=1:nmotifs fprintf(fid,'%s\t',motiflist{j}); fprintf(fid,'%d\t',grandtable(j,:)); fprintf(fid,'\n'); end fclose(fid);