# `tms3' program in Perl selects transmembrane proteins and generates hydropathy plots. # 2001 J. M. Ward use GD; use Text::ParseWords; $name1 = "ATH.FAS"; #Fasta input data filename $name2 = "hydro.dat"; #Fasta output filename $name3 = "ATH.OUT"; #Fasta file from HMMTOP $window = 9; #global window for hydropathy open(INFILE, $name1) or die 'Could not open file'; open(HMMFILE, $name3) or die 'Could not open HMM file'; $desc = ; chomp $desc; $desc =~ s/^[>]//; $desc2 = ; %aakd = ( A => 1.8, C => 2.5, D => -3.5, E => -3.5, F => 2.8, G => -0.4, H => -3.2, I => 4.5, "*" => 0, K => -3.9, L => 3.8, M => 1.9, N => -3.5, P => -1.6, Q => -3.5, R => -4.5, S => -0.8, T => -0.7, V => 4.2, W => -0.9, Y => -1.3, ); open (HTFILE1, ">".$name2) or die 'Could not open output file'; close HTFILE1; for ($i = 1; $i < 40000; $i++) { ($nextdes, $seq) = next1(*INFILE); #get the next sequence $desc =~ /(^\S+)/; $temp = $1; if ($desc2 =~ /$temp/) { ($desc2, $nextor, $numtm, @list) = next2(*HMMFILE); @kd = hydro1($seq); # calculate the hydropathy plot if ($numtm > 1) { %rec = ( name => $i, description => $desc, tms => $numtm, orient => $nextor, pos => join ("\,", @list), sequence => $seq, gif => $i.".gif" ); out_to_gif(\@list, \@kd, $i.".gif"); %{$rec1{$i}} = %rec; } } $desc = $nextdes; } open (HTFILE, ">".$name2); $temp = print_rec (*HTFILE, \%rec1); close HTFILE; print $temp, " sequences selected \n\n"; $longest = 0; # longest sequence foreach $i (keys %rec1) { $tms[$rec1 {$i} {tms}]++; # sick: make an array indexed by number of TMS with the count for each $temp = length($rec1 {$i} {sequence}); if ($temp > $longest) {$longest = $temp}; } print "The longest sequence is ", $longest, " amino acids. \n\n"; print "TMS Number found \n\n"; for ($i = 1; $i < @tms; $i++) { printf " %3d %5d \n", $i, $tms[$i]; } sub out_to_gif { #takes the kd data and outputs a gif ($dlist,$dk1,$fn2) = @_; #reference to array with kd data my @dk = @$dk1; my @ist = @$dlist; if (scalar (@dk) >= 20) { open (GIF,">$fn2"); $im = new GD::Image(500,150) or die; # create a new image $white = $im->colorAllocate(255,255,255); # allocate some colors $black = $im->colorAllocate(0,0,0); $im->transparent($white); # make the background transparent and interlaced $im->interlaced('true'); $im->rectangle(0,0,499,149,$black); # Put a black frame around the picture $im->line(10,10,10,140,$black); #draw some lines $im->line(10,75,490,75,$black); my $len = scalar(@dk); #scale the drawing based on amino acid numbers my $scale1 = 480 / $len; my $st = 0; foreach $stpos (@ist) { #calculate and draw the plot if ($st == 0) { $st = $stpos; } else { $tx = int (10 + $st * $scale1); $nx = int (10 + $stpos * $scale1); $im->line($tx, 5, $nx, 5, $black); $st = 0; } } $tx = 10; $ty = int($dk[0] * -2 + 75); $nx = 0; $ny = 0; for ($j=0; $j<$len; $j++) { $nx = int (10 + $j * $scale1); $ny = int ($dk[$j] * -2 + 75); $im->line($tx,$ty,$nx,$ny,$black); $tx = $nx; $ty = $ny; } print GIF $im->gif; close GIF; } } sub next1 { # this returns the current sequence and the NEXT description my $fh = shift; my $seq1 = ""; my $descr = ""; while (<$fh>) { if (/>/) { $descr = $_; chomp $descr; $descr =~ s/^[>]//; last; } else { chomp; $seq1 .= $_; } } return ($descr, $seq1); } sub next2 { # this returns the current HMMTOP results my $fh = shift; my $nexto = ""; my $numt = ""; my @lis = (); while (<$fh>) { chomp; $t = $_; if (/>/) { $descr2 = $t; $descr2 =~ s/^[>]//; last; } else { $t =~ s/^\s*(.*?)\s*/$1/; $t =~ tr/ / /s; @lis = quotewords(" ",1,($t)); $nexto = shift(@lis); $numt = shift(@lis); } } return ($descr2, $nexto, $numt, @lis); } sub hydro1 { my $line = shift; my @kd = (); my $len = length($line); my $gap = int ($window / 2); my $ss = 0; for ($h = $gap; $h < ($len - $gap); $h++) { $ss = $h - $gap; $kd[$h] = calc(substr($line, $ss, $window)); } return @kd; } sub calc { #calculates the KD value of substring my $str = shift; my $val = 0.0; my @temp = unpack("A1" x length($str), $str); foreach $aa (@temp) { $val += $aakd{$aa}; } return $val; } sub print_rec { # Prints the Fasta output file and returns the number of records my $fh = shift; my $href = shift; my %rec = %$href; my $num = 0; foreach $i (keys %rec) { print $fh "> "; print $fh $i," ",$rec {$i} {orient}," "; print $fh $rec {$i} {tms}," ", $rec {$i} {gif}," "; print $fh "\$ ", $rec {$i} {pos}, " \$"; print $fh $rec {$i} {description},"\n" ; print $fh $rec {$i} {sequence},"\n" ; $num++; } return $num; }