#!/usr/bin/perl -w # # Script ExtractSamples # # # Input: (locus+refSeq), refGene.txt # Output: Samples or fasta files # # by Enrique Blanco, February 2004 ##################################################################################### use strict; use Getopt::Std; #Boolean vars my $TRUE = 1; my $FALSE = 0; my $HUMAN = "/seq/genomes/H.sapiens/golden_path_200307/chromFa_msk/"; my $MOUSE = "/seq/genomes/M.musculus/golden_path_200310mm4/chromFa_msk/"; my $RAT ="/seq/genomes/R.norvegicus/golden_path_200306rn/chromFa_msk/"; my $SS = "../bin/SS"; my $RC = "../bin/RC"; my $th_U_TSS = "200_upstream_TSS/sequences/"; my $th_D_TSS = "200_downstream_TSS/sequences/"; my $th_U_ATG = "200_upstream_ATG/sequences/"; my $th_D_ATG = "200_downstream_ATG/sequences/"; my $tt_U_TSS = "2000_upstream_TSS/sequences/"; my $tmp_file = "tmp.fa"; ### Step 0. Options my %opt; (getopts('v',\%opt)) or die("parser: Problems reading options\n"); ### Step 0. Set up my ($locus_file,$refgene_file); my $n_files; # Two filenames are mandatory $n_files = $#ARGV+1; ($n_files == 2) or die ("2 files are required but $n_files are provided!\n"); ($locus_file,$refgene_file) = @ARGV; ### Step 1. print_mess("Stage 1. Opening refGene file..."); my $line; my $n_genes; my %GENES; my @record; my $found; # Open refgene file (open(REFGENE,$refgene_file)) or die("$refgene_file impossible to open"); $n_genes = 0; while ($line=) { @record = split(/\s+/,$line); #registering the refgene $GENES{$record[0]} = $line; $n_genes++; } close(REFGENE); print_mess("$n_genes genes succesfully read"); ### Step 2. my ($TSS,$ATG); my ($pos1,$pos2); my ($in_file,$out_file); my ($command,$locus,$chr); print_mess("Stage 2. Opening locus file..."); # Open locus file (open(LOCUS,$locus_file)) or die("$locus_file impossible to open"); $n_genes = $found = 0; while ($line=) { @record = split(/\s+/,$line); $locus = $record[0]; if (defined($GENES{$record[1]})) { print_mess($GENES{$record[1]}); @record = split(/\s+/,$GENES{$record[1]}); $chr = $record[1]; $in_file = $MOUSE.$chr.".fa.msk"; #Strand? if ($record[2] eq "+") { $TSS = $record[3]; $ATG = $record[5]; #Extract the five segments $pos1 = $TSS - 500; $pos2 = $TSS; $out_file = $th_U_TSS.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $TSS; $pos2 = $TSS + 500; $out_file = $th_D_TSS.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $ATG - 500; $pos2 = $ATG; $out_file = $th_U_ATG.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $ATG; $pos2 = $ATG + 500; $out_file = $th_D_ATG.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $TSS - 2000; $pos2 = $TSS - 2000 + 500; $out_file = $tt_U_TSS.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $out_file"; print_mess ("Running...$command"); system($command); } else { $TSS = $record[4]; $ATG = $record[6]; #Extract the five segments $pos1 = $TSS; $pos2 = $TSS + 500; $out_file = $th_U_TSS.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $tmp_file"; print_mess ("Running...$command"); system($command); $command = "$RC $tmp_file > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $TSS - 500; $pos2 = $TSS; $out_file = $th_D_TSS.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $tmp_file"; print_mess ("Running...$command"); system($command); $command = "$RC $tmp_file > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $ATG; $pos2 = $ATG + 500; $out_file = $th_U_ATG.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $tmp_file"; print_mess ("Running...$command"); system($command); $command = "$RC $tmp_file > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $ATG - 500; $pos2 = $ATG; $out_file = $th_D_ATG.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $tmp_file"; print_mess ("Running...$command"); system($command); $command = "$RC $tmp_file > $out_file"; print_mess ("Running...$command"); system($command); $pos1 = $TSS + 2000 - 500; $pos2 = $TSS + 2000; $out_file = $tt_U_TSS.$locus.".fa"; $command = "$SS $in_file $pos1 $pos2 > $tmp_file"; print_mess ("Running...$command"); system($command); $command = "$RC $tmp_file > $out_file"; print_mess ("Running...$command"); system($command); } $found ++; } $n_genes++; } print_mess("$found/$n_genes genes succesfully found"); # Step 5. Finish successful program execution print_mess("Everything well done. Exit!\n"); exit(0); ############ Subroutines sub print_mess { my @mess = @_; print STDERR ("\t: @mess \n") if (exists($opt{v})); } sub print_error { my @mess = @_; print STDERR ("\t: @mess \n") and exit(); }