#!/usr/local/bin/perl -w # # File: fragUpload.pl # Author: Prabhu # # Copyright @ 2007, The Institute for Genomic Research (TIGR). All # rights reserved. # # Populates database with information collected from a frag file. # use strict; use File::Basename; my $SCRIPT = basename($0); # ######################################################################### # # Usage Declaration # # ######################################################################### my $USAGE = qq~ USAGE: $SCRIPT [-S SERVER] -D DATABASE [-U USER -P PASSWORD] ~; # ######################################################################### # # Helptext Declaration # # ######################################################################### my $HELPTEXT = qq~ $SCRIPT - PURPOSE $USAGE OPTIONS: Standard Database Options -S : Database Server Name -D : Database Name -U : User name -P : Password -i: Input a .frg file. This is a necessary 'option'. ~; # ######################################################################### # # Version Strings # # ######################################################################### my $VERSION = "1.0"; my $BUILD = (qw/$Revision: 25407 $/ )[1]; # ######################################################################### # # Dependency Declaration # # ######################################################################### my @DEPENDS = ( "TIGR::Foundation", "DBI", "TIGR::GLKLib", "TIGR::AsmLib", ); # ######################################################################### # # Load Modules # # ######################################################################### # # Use Local Modules First # use FindBin qw($Bin); use lib "$Bin"; #use lib "/usr/local/devel/SE/bin/"; # # Standard USE statments # use DBI; use TIGR::Foundation; use TIGR::AsmLib; use TIGR::QualitiesAndPositions; # # Load TIGR::GLKLib # use TIGR::GLKLib; # ######################################################################### # # Subroutine Declarations # # ######################################################################### # ######################################################################### # # Set up the TIGR::Foundation # # ######################################################################### my $VERSION_STRING = "$VERSION (Build $BUILD) with GLKLib $GLKLIB_VERSION"; my $tf = new TIGR::Foundation; $tf->addDependInfo(@DEPENDS); $tf->setHelpInfo($HELPTEXT); $tf->setVersionInfo($VERSION_STRING); $tf->setUsageInfo($USAGE); # ######################################################################### # # Global Variables # # ######################################################################### # # Option Variables # my $output_filename; my $input_filename; # Database Defaults my $DEFAULT_DB_SERVER = "SYBTIGR"; my $DEFAULT_DB_NAME = undef; my $DEFAULT_DB_USER = "access"; my $DEFAULT_DB_PASS = "access"; # Standard Database Options my $db_passfile_path = undef; my $db_server = undef; my $db_name = undef; my $db_user = undef; my $db_pass = undef; # # Ouput Variables # my $quiet_mode = undef; my $output = undef; # ######################################################################### # # Command Line Option Parsing # # ######################################################################### $tf->TIGR_GetOptions( # Standard Database Options 'S=s' => \$db_server, 'D=s' => \$db_name, 'U=s' => \$db_user, 'P=s' => \$db_pass, 'p=s' => \$db_passfile_path, 'q|quiet' => \$quiet_mode, 'i|input=s' => \$input_filename, 'o|output=s' => \$output_filename, ) or $tf->printUsageInfoAndExit(); # ######################################################################### # # Load the password file as defaults # # ######################################################################### if (defined $db_passfile_path) { $tf->logLocal("Loading password data from $db_passfile_path.", 1); my @passdata = (); my $passfile = new IO::File(); if ($passfile->open($db_passfile_path, 'r')) { for (my $i = 0; $i < 4; $i++) { my ($line) = $passfile->getline() =~ /^\s*(.*)\s*$/; $line = undef if ($line =~ /^\s*$/); push @passdata, $line; } $passfile->close(); # Set the variables if they haven't been set yet $DEFAULT_DB_USER = $passdata[0]; $DEFAULT_DB_NAME = $passdata[1]; $DEFAULT_DB_PASS = $passdata[2]; $DEFAULT_DB_SERVER = $passdata[3]; } } # ######################################################################### # # Resolve the database credentials # # ######################################################################### if (defined $db_user) { $db_pass = $tf->getPassword() unless ($db_pass); } else { ($db_user, $db_pass) = ($DEFAULT_DB_USER, $DEFAULT_DB_PASS); } $db_server = $DEFAULT_DB_SERVER unless defined $db_server; $db_name = $DEFAULT_DB_NAME unless defined $db_name; # ######################################################################### # # Get a GLKLib Object # # ######################################################################### # Check that database variables are set $tf->bail("You must specify a database server with -S or a passfile.\n") unless defined $db_server; $tf->bail("You must supply a project database with -D or a passfile.\n") unless defined $db_name; $tf->bail("You must supply a username with -D or a passfile.\n") unless defined $db_user; $tf->bail("No password specified.\n") unless defined $db_pass; $tf->logLocal("Connecting to Database: $db_server.$db_name as $db_user.", 1); my $glk = TIGR::GLKLib::newConnect($db_server, $db_name, $db_user, $db_pass); $tf->bail("Database connection failed.") unless defined $glk; $glk->setTF($tf); # ######################################################################### # # Get a handle to the output # # ######################################################################### # Interpret the following as aliases for console output if (defined $output_filename) { $output_filename = undef if ($output_filename eq "-"); $output_filename = undef if ($output_filename eq "STDOUT"); } # # Route output to /dev/null if we requested quiet mode # if ($quiet_mode) { $output_filename = "/dev/null"; } if (defined $output_filename) { $output = new IO::File; $output->open($output_filename, "w"); } else { $output = new IO::Handle; $output->fdopen(1, "w"); } # ######################################################################### # # Do Work # # ######################################################################### ####### begin query assignments ####### my $seq_query = "INSERT INTO sequence (seq_name,ed_date,ed_pn) VALUES (?, ?, ?)"; $glk->addSQL('INSERT_SEQ', $seq_query); my $seq_id_query="SELECT id FROM sequence WHERE seq_name=?"; $glk->addSQL('QUERY_SEQ_ID', $seq_id_query); my $bases_query="INSERT INTO bases (sequence_id,date,version,sequence) VALUES (?,?,?,?)"; $glk->addSQL('INSERT_BASE', $bases_query); my $qualities_query="INSERT INTO qualities (bases_id,quality) VALUES (?,?)"; $glk->addSQL('INSERT_QUAL', $qualities_query); my $bases_id_query="SELECT id FROM bases WHERE sequence_id=?"; $glk->addSQL('QUERY_BASE_ID', $bases_id_query); my $peaks_query="INSERT INTO peaks (bases_id,positions) VALUES (?,?)"; $glk->addSQL('INSERT_PEAK', $peaks_query); my $feature_query="INSERT INTO feature (seq_name,feat_type,end5,end3) VALUES (?,?,?,?)"; $glk->addSQL('INSERT_FEAT', $feature_query); my $names_query="INSERT INTO names (seq_name,clone_name) VALUES (?,?)"; $glk->addSQL('INSERT_NAME', $names_query); my $read_proc_row_exists_query="SELECT ReadProcSystem_id FROM ReadProcSystem WHERE ReadProcSystem_id = ? "; $glk->addSQL('READ_PROC_ROW_EXISTS', $read_proc_row_exists_query); my $cs_row_exists_query="SELECT name FROM CloningSystem WHERE CloningSystem_id = ? "; $glk->addSQL('READ_CS_ROW_EXISTS', $cs_row_exists_query); my $insrt_rps_row = "INSERT INTO ReadProcSystem (ReadProcSystem_id, createDate) VALUES (?,?)"; $glk->addSQL('INSERT_RPS_ROW', $insrt_rps_row); my $insrt_cs_row = "INSERT INTO CloningSystem (CloningSystem_id, createDate,name) VALUES (?,?,?)"; $glk->addSQL('INSERT_CS_ROW', $insrt_cs_row); ####### end query assignments ####### my $lib_id; my $root_eid = $glk->getExtentRoot(); if(!defined($root_eid)){ $root_eid=$glk->addExtent(undef,"GENOME","WHOLE",undef); } my $date=$tf->getSybaseDate(); $date =~ s/^'(.*)'$/$1/; my @positionArray; ######## MAIN PROCEDURE BEGINS ############# if(!defined(readProcSystemRowExists())){ createReadProcSystemRow(); } if(!defined(cloningSystemRowExists())){ createCloningSystemRow(); } ### add library ### unless ($glk->translateLibraryName('Rick')){ my $expt_id = $glk->addExperiment("Experiment for Rickettsia WGS reads",$db_user); $lib_id = $glk->addLibrary($root_eid, 'Rick',1000, "Rick_WGS_LIBRARY", 1047224292936); $glk->addLibraryPolicy($lib_id,500,4000,$expt_id); } unless ($glk->translateSeqReadType('Rick')){ $glk->createSeqReadType('Rick'); } ### create main extent ### unless ($glk->translateExtentType("Rick")) { $glk->createExtentType("Rick","Root extent for Rickettsia WGS reads"); } $lib_id = $glk->getLibraryByLimsRef('Rick'); my $expt_id=$glk->getExperimentByComment("Experiment for Rickettsia WGS reads"); ##########create hashMap for storing mate-pair info my %base = ( 0 => 'A', 1 => 'B', 2 => 'C', 3 => 'D', 4 => 'E', 5 => 'F', 6 => 'G', 7 => 'H', 8 => 'I', 9 => 'J', 10 => 'K', 11 => 'L', 12 => 'M', 13 => 'N', 14 => 'O', 15 => 'P', 16 => 'Q', 17 => 'R', 18 => 'S', 19 => 'T', 20 => 'U', 21 => 'V', 22 => 'W', 23 => 'X', 24 => 'Y', 25 => 'Z' ); my %matesHash = (); my $count=-1; my @libIds; for(my $i=0;$i<=15;$i++){ for(my $j=0;$j<=15;$j++){ for(my $k=0;$k<=15;$k++){ for(my $l=0;$l<=15;$l++){ $count++; $libIds[$count]=$base{$i}.$base{$j}.$base{$k}.$base{$l}; } } } } ####### ADDING LIBRARIES open (DST,"< $input_filename") || $tf->bail("Can't open $input_filename: $!"); until (eof (DST)) { my $msg = getCARecord(\*DST); my ($id,$fields,$recs) = parseCARecord($msg); if ($id eq 'DST') { my $lims_ref = $$fields{'acc'}; my $mea = $$fields{'mea'}; my $std = $$fields{'std'}; my $minSize = $mea - (3*$std); my $maxSize = $mea + (3*$std); my $tmp_lib_id=$glk->translateLibraryName($lims_ref); print "Outside \"unless\"--tmp_lib_id is: $tmp_lib_id and lims_ref is: $lims_ref\n"; unless ($tmp_lib_id==$lims_ref){ print "tmp_lib_id is: $tmp_lib_id and lims_ref is: $lims_ref\n"; $lib_id = $glk->addLibrary($root_eid, "".$lims_ref,1000, "Rick_WGS_LIBRARY", 1047224292936); $glk->addLibraryPolicy($lib_id,$minSize,$maxSize,$$expt_id{"Experiment_id"}); } } } close(DST); die; #### creating hashMap from LKG records $count=-1; open (LKG,"< $input_filename") || $tf->bail("Can't open $input_filename: $!"); until (eof (LKG)) { my $msg = getCARecord(\*LKG); my ($id,$fields,$recs) = parseCARecord($msg); if ($id eq 'LKG') { $count++; my $accn_num = $$fields{'fg1'}; my $mate_accn_num = $$fields{'fg2'}; my $dst = $$fields{'dst'}; my $count_modulo=$count%10; my $clone_name = $libIds[$count].'LP'.$count_modulo; if(defined $accn_num && (defined $mate_accn_num)){ $matesHash{$accn_num} = $clone_name; $matesHash{$mate_accn_num} = $clone_name; $matesHash{$accn_num.'direction'} = 'F'; $matesHash{$mate_accn_num.'direction'} = 'R'; $matesHash{$accn_num.'dst'}=$dst; $matesHash{$mate_accn_num.'dst'}=$dst; # print "dst is $dst\n"; $matesHash{$accn_num.'ext'}=$count; $matesHash{$mate_accn_num.'ext'}=$count; my $libId=$glk->getLibraryByLimsRef($dst); $matesHash{$accn_num.'lib'}=$libId; $matesHash{$mate_accn_num.'lib'}=$libId; } } } close(LKG); #die "count is:$count\n"; if(defined $input_filename){ open (FRG,"< $input_filename") || $tf->bail("Can't open $input_filename: $!"); until (eof (FRG)) { my $message = getCARecord(\*FRG); my ($id,$fields,$recs) = parseCARecord($message); if ($id eq 'FRG') { my $accn_num = $$fields{'acc'}; if(defined $accn_num ){ my $direction=undef; my $clone_name=undef; my $extent_=undef; my $lib_id=undef; my $eid = undef; if(defined $matesHash{$accn_num}){ #mate-pair info is present $clone_name=$matesHash{$accn_num}; $direction = $matesHash{$accn_num.'direction'}; $extent_="".$matesHash{$accn_num.'ext'}; $lib_id=$matesHash{$accn_num.'lib'}; $eid = $glk->getExtentByTypeRef("INSERT","WGS".$extent_."PairedRick"); if(!defined($eid)){ $eid = $glk->addExtent($root_eid, "INSERT", "WGS".$extent_."PairedRick", "PairedRickExtent"); $glk->linkExtentLibrary($eid,$lib_id); } } else{ # mate-pair info is not present $count++; my $count_modulo=$count%10; $clone_name = $libIds[$count].'LP'.$count_modulo; $direction = 'X'; $lib_id=$glk->getLibraryByLimsRef('Rick'); $eid = $glk->getExtentByTypeRef("INSERT",$accn_num); if(!defined($eid)){ $eid = $glk->addExtent($root_eid, "INSERT", $accn_num, "UnpairedRickExtent"); $glk->linkExtentLibrary($eid,$lib_id); } } my $seq_name=$clone_name.'T'.$direction; my $clrStr=$$fields{'clr'}; my @clrVals=split /,/,$clrStr; my $seq_content = $$fields{'seq'}; my $quality = $$fields{'qlt'}; if(defined($seq_content) && defined($quality) && defined($clrVals[0]) && defined($clrVals[1])){ $seq_content =~ s/\n//g; $seq_content = uc $seq_content unless ($seq_content =~ /[A-Z]/); $quality =~ s/\n//g; insertRows ($seq_name,$direction,$accn_num, $clone_name,$seq_content,$eid,$quality,$clrVals[0],$clrVals[1]); } } } } close(FRG); # print "count after all extra frg recs: $count\n"; } ####### end code for inserting records into db ####### # ######################################################################### # # End the Script # # ######################################################################### END { $glk->closeDB() if defined $glk; $output->close() if defined $output; } sub insertRows { my $seq_name = shift; my $direction = shift; my $accn_num = shift; my $clone_num = shift; my $seq_content = shift; my $eid = shift; my $quality= shift; my $start=shift; my $stop=shift; my $row; my $sequence_id; my $srid; my $base_id; my $seq_len= length $seq_content; # for reason(s) still unknown, running the 'names' insert before the 'sequence' insert # was necessary - otherwise, a duplicate key entry violation was getting reported. # query for adding to names table if(!defined $clone_num){ $clone_num="CAFRBO4"; } $glk->runQuery('INSERT_NAME',$seq_name,$clone_num); $glk->endQuery('INSERT_NAME'); # query for adding to sequence table $glk->runQuery('INSERT_SEQ', $seq_name, $date, $db_user); $glk->endQuery('INSERT_SEQ'); $glk->runQuery('QUERY_SEQ_ID',$seq_name); $row = $glk->fetchRow('QUERY_SEQ_ID'); $sequence_id= $row->{'id'}; $glk->endQuery('QUERY_SEQ_ID'); #GLK api for adding a record to GLK's SequenceRead table if(!defined $direction){ $direction = 'R'; } $srid = $glk->addSequenceRead($seq_name, $eid, $direction, "RANDOM",$accn_num); # query for adding to bases table $glk->runQuery('INSERT_BASE',$sequence_id,$date,1,$seq_content); $glk->endQuery('INSERT_BASE'); $glk->runQuery('QUERY_BASE_ID',$sequence_id); $row = $glk->fetchRow('QUERY_BASE_ID'); $base_id= $row->{'id'}; $glk->endQuery('QUERY_BASE_ID'); # query for adding to qualities table $glk->runQuery('INSERT_QUAL',$base_id,$quality); $glk->endQuery('INSERT_QUAL'); # query for adding to peaks table my $pos_str = populatePositionArray($seq_len); $glk->runQuery('INSERT_PEAK',$base_id,$pos_str); $glk->endQuery('INSERT_PEAK'); # queries for adding to feature table $glk->runQuery('INSERT_FEAT',$seq_name,'CLB',$start,$stop); $glk->endQuery('INSERT_FEAT'); $glk->runQuery('INSERT_FEAT',$seq_name,'CLR',$start,$stop); $glk->endQuery('INSERT_FEAT'); $glk->runQuery('INSERT_FEAT',$seq_name,'CLV',1,$seq_len); $glk->endQuery('INSERT_FEAT'); $glk->runQuery('INSERT_FEAT',$seq_name,'CLN',$start,$stop); $glk->endQuery('INSERT_FEAT'); $glk->runQuery('INSERT_FEAT',$seq_name,'CLZ',0,0); $glk->endQuery('INSERT_FEAT'); } sub readProcSystemRowExists{ my $value; if ($glk->runQuery('READ_PROC_ROW_EXISTS', 1047224292937)){ $value = $glk->fetchSingle('READ_PROC_ROW_EXISTS'); } $glk->endQuery('READ_PROC_ROW_EXISTS'); return $value; } sub cloningSystemRowExists{ my $value; if ($glk->runQuery('READ_CS_ROW_EXISTS', 1047224292936)){ $value = $glk->fetchSingle('READ_CS_ROW_EXISTS'); } $glk->endQuery('READ_CS_ROW_EXISTS'); return $value; } sub createReadProcSystemRow{ $glk->runQuery('INSERT_RPS_ROW',1047224292937,$date); $glk->endQuery('INSERT_RPS_ROW'); } sub createCloningSystemRow{ $glk->runQuery('INSERT_CS_ROW',1047224292936,$date,"Default"); $glk->endQuery('INSERT_CS_ROW'); } sub populatePositionArray{ my $seq_len = shift; my $i; my $pos=20; $#positionArray = $seq_len-1; for($i=0;$i<$seq_len;$i++){ $positionArray[$i] = $pos; $pos += 20; } return positionsToAsciiFromArray(@positionArray); }