#!/usr/bin/python2 # translate.py import sys, os, string # # translate.py sequences.fa codons.txt # # Note: Usually I would not save sequences and then translate since it can cause # memory problems if the input file is huge (which happens quite often) def WrapPrint( str , N ): """ prints str with N columns per line """ for s in range( 0, len(str) , N ): print str [ s: s + N ] def Translate ( seq_dict , codons_dict ): """ given a sequence dictionary and a codon dictionary, prints out the translated sequence """ for n,s in seq_dict.items(): print n length = len ( s ) prot_seq = [] for i in range ( 0 , length , 3 ): prot_seq.append ( codons_dict [ s[ i : i + 3] ] ) WrapPrint( ''.join (prot_seq), 70) def ReadCodons( file ): """ Creates a dictionary with codons with keys and amino acid code as value """ codons_dict = {} fp = open(file) for x in fp: # .. aa = x[:1] codons = x[1:].split() for k in codons: codons_dict [ k ] = aa fp.close() return codons_dict def ReadFasta( file ): """ creates dictionary with sequence names as keys and the associated sequences as value Note: I can do all this in lot fewer lines but its hard to understand.... fp=open(file) seq_dict={} [seq_dict.__setitem__(x[0], ''.join(x[1:])) for x in [y.split("\n") for y in fp.read().split(">")]] """ seq_dict = {} fp = open (file) seq = [] key = '' for x in fp: if x[0] == '>': seq_dict [ key ] = ''.join ( seq ) seq = [] key = x[:-1] else: seq.append( x[:-1] ) # don't include the '\n' fp.close() del seq_dict [ '' ] # initial case return seq_dict if __name__ == '__main__': try: seq_file = sys.argv[1] codon_file = sys.argv[2] codons_dict = ReadCodons( codon_file ) seq_dict = ReadFasta ( seq_file ) prot_dict = Translate ( seq_dict , codons_dict ) except IndexError: print "USAGE: translate.py "