
use GD::Simple;
use Math::Trig;
use List::Util qw[min max];
use List::Util 'shuffle';
use POSIX qw(ceil floor);


#                            #
#    INITIALIZE VARIABLES    #
#                            #

$maxRecGenes   = 200;   #Maximum Recommended Number of Genes
$maxRecRegions = 100;   #Maximum Recommended Number of Regions
$maxThick      = 3;     #Maximum thickness for a line
$maxIter       = 5000;    #Maximum number of iterations
$outputFile    = "graphCluster.png";

#                            #
#    ASSIGN PARAMETERS       #
#                            #

#Default parameter assignement
%paramHash = % {initializeParameters(0.05, 200,1,1,"graph_data","graph_connect")} ;


#update default parameters with user inputs
%paramHash = % {updateParams(\@ARGV, \%paramHash )};


errorCheck(\%paramHash);

#assign parameter variables
$regionFile = $paramHash{"--data"}; 
$connectionFile = $paramHash{"--connect"} ;
$clusterParameter = $paramHash{"--clusterRegions"}; 
$orientParameter = $paramHash{"--orient"} ;
$geneClusterParam = $paramHash{"--clusterGenes"} ;
$scoreThresh =  $paramHash{"--scoreThresh"} ;



#                              #
#    READ IN REGIONS & GENES   #
#      FROM DATA FILE          #
#                              #


#Open Data File - to read in regions first
if(!open(IN, "$regionFile")){die("Could not open $regionFile.\n\n")}

$cnt = 0;   #cnt used to count number of regions
while($line=<IN>)
  {
    chomp($line);
    @temp = split(/\s+/, $line);
    $regName = $temp[0];

    if(exists($regHashIndex{$regName}))  #if already noted, then skip
      {
	
      }	
    else     #if we encounter the region for the first time, store info
      {
	$regHashIndex{$regName} = $cnt; #store region name in a hash ...
	$regArray[$cnt] = $regName;     # ... and also an array
	$cnt = $cnt +1;
      }	
  }
$numRegions = $cnt;  #Note total number of regions
close(IN);


#open Data File, again to read in genes, and scores
if(!open(IN, "$regionFile")){die("Could not open $regionFile.\n\n")}

$numGenes = 0;   #numGenes used to keep track of total number of genes
while($line= <IN>)
  {
    $numGenes = $numGenes+1;
    chomp($line);
    @temp = split(/\s+/, $line);

    $gene = $temp[1];       #note gene name
    $regName = $temp[0];    #region name
    
    
    $geneScore{$gene} = $temp[2];  #note score in hash
    $geneRegion{$gene} = $temp[0]; #note region in hash

    if(exists($regHash{$regName})) #get total count of regions
      {
	$last{$regName} = $last{$regName} + 1;
	$regHash{$regName}[$last{$regName}] = $gene;
      }	
    else     #initialize structures with first occurence of the region
      {
	$regHash{$regName}[0] = $gene;#Keeps track of the gene in each region, in order

	$last{$regName} = 0;          #last - notes the index of the last gene
	                              #   in a region
      }	

  }	
close(IN);


print(stderr "Read in regions and genes...\n");

#Warning if there are too many genes
if($numGenes>$maxRecGenes)
  {
    print(stderr "WARNING: Maximum recomended gene number ( $maxRecGenes ), exceeded in $regionFile\n\n");
  }	

#Warning if there are too many regions
if($numRegions>$maxRecRegions)
  {
    print(stderr "WARNING: Maximum recomended region number ( $maxRecRegions ), exceeded in $regionFile\n\n");
  }	



#                                          #
#    READ IN PAIRWISE GENE CONNECTIONS     #
#      FROM CONNECTIONS FILE TO            #
#       TABULATE "TRAFFIC" BETWEEN REGIONS #
#                                          #

if(!open(IN2, "$connectionFile"))
  {die("Could not open $connectionFile.\n\n")}

while($line=<IN2>)
  {
    chomp($line);
    @temp = split(/\s+/,$line);
    
    if((exists($geneScore{$temp[0]}) & exists ($geneScore{$temp[1]}))) 
      #If both genes are scored
      {
	
	#check and see if scores are both below
	#   the threshold, if so, look for connections

	if(($geneScore{$temp[0]}<$scoreThresh) 
	   & ($geneScore{$temp[1]}<$scoreThresh)){ 

	  #tally up the total numbers of connections between the
	  #regions the regions that the genes sit in...
	  #dis contains the cumulative score of connections
	  #between two regions
	  #    NOTE - need to initialize both ordered pairs
	  if(exists($dis{$geneRegion{$temp[0]}}{ $geneRegion{$temp[1]} })){ 
	    
	    $dis{$geneRegion{$temp[0]}}{ $geneRegion{$temp[1]}} = $temp[2]+ 
	      $dis{$geneRegion{$temp[0]}}{ $geneRegion{$temp[1]}}; 

	    $dis{$geneRegion{$temp[1]}}{ $geneRegion{$temp[0]}} = $temp[2]+ 
	      $dis{$geneRegion{$temp[1]}}{ $geneRegion{$temp[0]}}; 

	  }
	  else   #initialize dis if it is the first connection between 2 regions
	    {	
	      $dis{$geneRegion{$temp[0]}}{ $geneRegion{$temp[1]}} = $temp[2];
	      $dis{$geneRegion{$temp[1]}}{ $geneRegion{$temp[0]}} = $temp[2]; 
	    }

	  $dis2{$temp[0]}{$temp[1]} = $temp[2];
	  $dis2{$temp[1]}{$temp[0]} = $temp[2];

	}
      }
  }

close(IN2);


#                                       #
#  now cluster -                        #
#   if user has seleced the option      #
#                                       #

if(($clusterParameter=~/\d+/) && ($clusterParameter>0 ) )
  {

    $numIterations = $clusterParameter; 

    #then cluster
    print(stderr "Clustering regions ... \n\n");


    #Initialize Best Ever ordering of regions, 
    #Current ordering of regions (@pos), and score
    #as the initial entered condition
    
    @pos = @regArray;
    $x1 = pathtotalglobal(\@pos , \%dis);  #calculate the total path
    @bestEver = @pos;
    $bestEverMin = $x1;

    #keep track of the number of iterations where their has been no change
    $noChange = 0;

   
 
    for($k=1; ($k<$numIterations)&& ($bestEverMin>0); $k++)       #max number of iterations      
      {
	
	$crossMin = $x1;

	
	if($noChange==0)
	  {
	    #Only examine positions, where there
	    #are connections crossing other connections
	    #Only do this, if there has been a change in the order
	    
	    @conflicts = checkpathconflicts(\@pos, \%dis);
	  }

	#define the indices that we might pick...
	#remove all of the indices that are not connected to other regions
	#by a path that crosses others

	@indices = @{ nonZeroIndices(\@conflicts) };


	#Pick a random index to work with, of those without conflicts
	@shuffled = shuffle(@indices);      
	$indexa = $shuffled[0];

	#How many potential choices are there?
	$at = $#indices+1;
	    
	#Since that position will be scrutinize
	#we consider it "unconflicted"
	#to avoid looking at it in the next iteration
	$conflicts[$indexa] = 0;
	

	#Print Iteration #, selected locus
	print(stderr "\nIteration \#$k. Evaluating optimal fit of locus : ");
	print(stderr "(selected from $at) ");
	print(stderr "$pos[$indexa]");

	
	
	for($p=0; $p<3; $p++)
	  {	
	    for($shuf2 =0; $shuf2<$numRegions; $shuf2++)
	      {

	    if($p==0) { $changeType = "repositioning $pos[$indexa] from $indexa to $shuf2";}
	    if($p==1) { $changeType = "switching $pos[$indexa] with $pos[$shuf2]";}
	    if($p==2) { $ll = ($indexa-$shuf2) % $numRegions;
			$changeType = "inverting a $ll region long segment from $pos[$indexa] to $pos[$shuf2]";}
			  
		if(($indexa == $shuf2) || (!(exists($dis{$pos[$indexa] }))))   {}
		  #If we are shifting to the position that we are in
		  #or there are no connections out of $indexa
		  #no need to evaluate
		else
		  {	
		    @pos2 = @ {changeRegionOrder(\@pos,$indexa, $shuf2, $p)};
		    $x2 = pathtotalglobal(\@pos2 , \%dis);  #calculate the total path      
		    
		    #check to see if this particular region arrangement is the best 
		    #for this iteration
		    if($x2<$crossMin)
		      {
			printf(stderr "\n  Can improve score from %0.3f to %0.3f by\n" ,$x1 ,$x2);
			print (stderr "      $changeType");

			$crossMin = $x2;
			@bestMin = @pos2;


		      }	
		    
		  }
	      }	
	  }
	
	
	#check if this particular region arrangement is the best
	#seen yet
	if($crossMin < $bestEverMin)
	      {
		print(stderr "\n\n\t**Obtained best optimization so far in iteration \#$k");
		@bestEver = @bestMin;
		$bestEverMin = $crossMin;
	      }	



	#check if this particular region arrangement is an improvement
	#on the arrangement resulting from the previous iteration
	if($crossMin < $x1)
	  {
	    $noChange = 0;   #since we have a change, we zero out no change parameter
	    printf(stderr "\n\tImproved score from %.3f to %.3f\n", $x1, $crossMin);
	    @pos = @bestMin;
	    $x1 = $crossMin;
	  }	

	#if we have tried every single paraeter, we need to 
	#shuffle to get out of a local minimum
	elsif( ($#indices+1)  ==0)
	  {	    
	    
	    $noChange = 0;
	    @pos = @{softShuffle(\@pos)};
	    $x1 = pathtotalglobal(\@pos , \%dis);  #calculate the total path      
	    printf(stderr "\n\t>>No improvement in Iteration \#$k...<<\n\tRandomly shuffling loci positions. \n\t(Note current best observed score is %0.3f)\n", $bestEverMin); 
	  }

	#otherwise, we have not altered any position, 
	#and we increment the nochange parameter
	else
	  {
	    $noChange +=1;
	    print(stderr "\nTotal number of no change steps: $noChange\n");
	  }	
	

	
      }
    
    $x1 = pathtotalglobal(\@bestEver , \%dis);
    $x2 = pathtotalglobal(\@regArray, \%dis);

    printf(stderr "\n\nClustering complete!! Best score achieved is %0.3f\n", $bestEverMin);
    
    if($x1<$x2)
      {
	@regArray = @bestEver;
      }	
    

    print(stderr "\nWriting order of clustered SNPs to file : clusteredOrder\n");
    open(OUT, ">clusteredOrder");
    for($ll=0; $ll<@regArray;$ll++)
      {
	print(OUT "$regArray[$ll]\n");
      }	
    
  }	
elsif ($clusterParameter eq "0")
  {
    print(stderr "No clustering of regions ... \n\n");
  }
else
  {
    $cnt = 0;
    if(!open(IN, "$clusterParameter")){die("Could not open the file $clusterParameter.\n\n");}
    print(stderr "Reading in $clusterParameter ... \n\n");
    while($line =<IN>)
      {
	
	chomp($line);
	$regArray[$cnt] = $line;
	$cnt = $cnt + 1;
	if(exists($regHashIndex{$line}))
	  {
	  }	
	else
	  {
	    die("could not find $line in $clusterParameter");
	  }	
      }	

  }






#Cluster genes within regions

if($geneClusterParam == 1 )
  {
    print(stderr "\n\nOrdering genes....\n\n");
    
    %bestRegHash = %regHash;
    $bestHashScore = 1000000000;
    
    
    
    @bigGeneArray = [];
    
    for($r=0; $r<$numRegions; $r++)
      {
	$thisRegion = $regArray[$r];
	@notShuffled = @{$regHash{$thisRegion}};
	
	#identify the number of active genes in this region

	($activeGeneCnt , $indexPointer) = findActiveGenes(\@notShuffled, \%dis2 );
	@activeGeneIndices = @{$indexPointer};
	$totalGenes = $last{$thisRegion}+1;

	$iteratedPointer = defineIterates($activeGeneCnt, \@notShuffled, \@activeGeneIndices);
	@iterated = @{$iteratedPointer};

	
	for($gi=0; $gi<@iterated; $gi++)
	  {	
    	    #Update the not shuffled parameter to represent
	    #the latest arrangement of genes in this region
	    @notShuffled = @{$regHash{$thisRegion} };

	    #rearrange the genes in the region
	    @{$regHash{$thisRegion} } = @{$iterated[$gi]};
	
	    #score the genes, with genes in "this region" rearranged
	    $geneArrangeScore[$gi] = scoreGeneArrangement(\@regArray, \%regHash, \%dis2 );


	    if($geneArrangeScore[$gi]<$bestHashScore )
	      {
		$bestHashScore = $geneArrangeScore[$gi];
		@{$regHash{$thisRegion}} =  @{$iterated[$gi]};
	      }	
	    else
	      {
		@{$regHash{$thisRegion}} =  @notShuffled;
	      }	
	    
	  }	

	print(stderr "Finished ordering genes in $thisRegion".
                   "\n  $activeGeneCnt genes out of $totalGenes are connected" .
                   "\n  $gi iterations achieved a score of $bestHashScore\n");   
    
	
      }

  }	
else
  {
    print(stderr "\n\nNo ordering of genes....\n\n");
  }	

#orient circle

if($orientParameter ==1)
  {

    $mostConRegion = findMostConnectedRegion( \@regArray, \%dis) ;
    print(stderr "$mostConRegion will be oriented to the top.\n\n" );
    @regArray = @ { orientCircle( \@regArray, $mostConRegion ) } ;


  }
elsif($orientParameter eq "0")
  {
  }
else
  {

    
    print(stderr "Orienting plot to place $orientParameter region at the top\n");

    @regArray = @ { orientCircle( \@regArray, $orientParameter ) } ;
    
  }	


print(stderr "Creating an image file\n\n");

#                              #
#    Make a new image here     #
#                              #

# Initialize a new image
$img = GD::Simple->new(4000,4000);

# Define the center of mass
$cX = 2000;
$cY = 2000;

#Define the diameter and radius of the central circus
$diam = 2300;
$radius = $diam/2;

# Define fraction of diameter that represents inner bound
$innerBound = 0.95;

#Define colors here
$black = $img->colorAllocate(255,255,255);
$blue = $img->colorAllocate(20,20,255);
$grey  = $img->colorAllocate(20,20,20);
$greyII  = $img->colorAllocate(130,130,170);
$red   = $img->colorAllocate(100,40,40);




print(stderr "Drawing regions and genes...\n\n");

#Draw and label regions
$cnt = 0;
for($i=0; $i<@regArray; $i++){

  print(stderr "\nDrawing $regArray[$i]...\n");

  if(($i%2)==0){
    $img->fgcolor($blue);
    $col = $blue;
  }
  else{
    $img->fgcolor($grey);
    $col = $grey;
  }	

  $region = $regArray[$i];
  
  #label the region
  $deg = ($cnt+($last{$region}+0.5)/2)*(360/$numGenes);
  $rad = deg2rad($deg);
  $cc = sin($rad);
  $dd = cos($rad);

  $img->moveTo($cX+1.32*$radius*$cc,$cY-1.32*$radius*$dd);
  $img->font('Arial'); 
  $img->fontsize(60);
  $img->angle($deg+270);
  $img->fgcolor($grey);
  $img->string("$region");
  $img->fgcolor($col);


  $img->fgcolor('white');
  $img->penSize(20,20);
  $deg = ($cnt-0.5)*(360/$numGenes);
  $rad = deg2rad($deg);
  $cc = sin($rad);
  $dd = cos($rad);
  $img->moveTo($cX+1.28*$radius*$cc,$cY-1.28*$radius*$dd);
  $img->angle($deg+270);
  $img->line($radius*0.05);

  $deg = ($cnt+($last{$region}+0.5))*(360/$numGenes);
  $rad = deg2rad($deg);
  $cc = sin($rad);
  $dd = cos($rad);
  $img->moveTo($cX+1.2*$radius*$cc,$cY-1.2*$radius*$dd);
  $img->angle($deg+270);
  $img->line($radius*0.2);
  $img->penSize(1,1); $img->fgcolor($col);


  print(stderr "Labeling:");

  for($k=0; $k<=$last{$region}; $k++){
    
    $gene = $regHash{$region}[$k];

    print(stderr "$gene ");

    $deg = $cnt*(360/$numGenes);
    

    $rad = deg2rad($deg);
    $cc = sin($rad);
    $dd = cos($rad);

    $geneDeg{$gene} = $deg;
    $genePosX{$gene} = $cX+0.99*$innerBound*$radius*$cc;
    $genePosY{$gene} = $cY-0.99*$innerBound*$radius*$dd;
    
    $img->moveTo($cX,$cY);
    


    for($j=0; $j<50; $j++){	
      #$img->arc($diam*1.25-$j,$diam*1.25-$j,$deg-90-0.5*(360/$numGenes),$deg-90+0.5**(360/$numGenes),gdNoFill|gdEdged);    
    }
    
    for($j=0; $j<1000; $j++){	
      #$img->arc($diam-$j,$diam-$j,$deg-90-0.5*(360/$numGenes),$deg-90+0.5*(360/$numGenes),gdNoFill|gdEdged);
      #$img->arc($diam-$j,$diam-$j,-90,-90+(360/$numGenes),gdNoFill|gdEdged);

      $deg = $cnt*(360/($numGenes)) + (($j-500)/1000)*(360/($numGenes));
      $rad = deg2rad($deg);
      $cc = sin($rad);
      $dd = cos($rad);


      $img->moveTo($cX+$innerBound*$radius*$cc,$cY-$innerBound*$radius*$dd);
      $img->angle($deg+270);
      $img->line( (1-$innerBound)*$radius);


      $img->moveTo($cX+1.29*$radius*$cc,$cY-1.29*$radius*$dd);
      $img->angle($deg+270);
      $img->line(0.02*$radius);
    }

    $img->penSize(6,6);
    $img->fgcolor('white');
    $img->moveTo($cX+$innerBound*$radius*$cc,$cY-$innerBound*$radius*$dd);
    $img->angle($deg+270);
    $img->line( (1-$innerBound)*$radius);
    $img->fgcolor($col); $img->penSize(1,1);
    

    if($geneScore{$gene}<=2)
      {
	$img->moveTo($cX+1.01*$radius*$cc,$cY-1.01*$radius*$dd);
	$img->font('Arial');
	$img->fontsize(40);
	$img->angle($deg+270);

	if($geneScore{$gene}<=$scoreThresh){
	  $img->font('Arial:bold');
	  $img->fgcolor($grey);
	}
	else{
	  $img->fgcolor($greyII);
	}	

	$img->string("-$gene");	
	$img->fgcolor($col);
      }	


    $cnt = $cnt + 1;
  }	

  
  
}


#Now Draw the connections


if(!open(IN, "$connectionFile")){die("Could not open $connectionFile.\n\n")}

while($line = <IN>)
  {
    chomp($line);
    @temp = split(/\s+/, $line);

    $fir = $temp[0];
    $sec = $temp[1];
    
    if($temp[2]>3){$temp[2] = 3}

    $s = 0;
    $temp[2] = (1+$s)*$temp[2] - $s*3;

    if($temp[2]<0.01){$temp[2] = 0.01}

    if(exists($geneScore{$fir}) &exists ($geneScore{$sec}) &  ($geneScore{$fir}<=$scoreThresh) & ($geneScore{$sec}<=$scoreThresh) & ($geneRegion{$fir} ne $geneRegion{$sec}))
      {
	$red   = $img->colorAllocate(min(30+$temp[2]*200,255),50,50);
	
	$img->fgcolor($red);
	connec($genePosX{$fir}, $genePosY{$fir}, $genePosX{$sec}, $genePosY{$sec}, $temp[2]*15);
#	print(stderr "$fir-->$sec\n");
      }

  }




# convert into png data and write out to file
print(stderr "\n\nPrinting to $outputFile\n\n");

open(OUT, ">$outputFile");
binmode OUT;
print OUT $img->png;




################################################################
#
#   Sub functions
#
################################################################

sub connec {

  my $fx = shift;
  my $fy = shift;
  my $sx = shift;
  my $sy = shift;
  my $thick = shift;
  
  my $res = 500;

  $img->penSize($thick,$thick);
 
  
  $img->moveTo($fx, $fy);

  my $midX = ($fx + $sx)/2;
  my $midY = ($fy + $sy)/2;

  $dist = sqrt(($fx-$sx)*($fx-$sx) + ($fy-$sy)*($fy-$sy));

  for($i=0; $i<=$res; $i++)
    {
      my $dx = $fx + ($sx-$fx)*$i/$res;
      my $dy = $fy + ($sy-$fy)*$i/$res;
      
      my $ccx = $dx + (($dist/$diam)**0.25)*2*(1 - $i/$res)*($i/$res - 0)*($cX - $midX);
      my $ccy = $dy + (($dist/$diam)**0.25)*2*(1 - $i/$res)*($i/$res - 0)*($cY - $midY);
      
      $img->lineTo($ccx,$ccy);
      
      #$img->string("$i");
      #print(stderr "$ccx $dx $ccy $dy $i\n");
    }	
  $img->lineTo($sx,$sy);


  
  
}


sub paths{
  my $firIndex = shift;
  my $secIndex = shift;

  my $numRegions = shift;
  my $i = $firIndex+1;

  $i = ($i % $numRegions);

  my @arrayIndices;

  $cnt = 0;
  while($i != $secIndex)
    {
      $arrayIndices[$cnt] = $i;
      $cnt = $cnt + 1;
      $i = $i + 1;
      $i = $i % $numRegions;
      
    } 
  
  return @arrayIndices;
}



sub pathtotal{
  my $inda = shift;
  my $indb = shift;
  my $numSet = shift;
  my $posPoint = shift;
  my $disHash = shift;
  my @position = @{$posPoint};
  my %distance = %{$disHash};

  
  my @aa = paths($inda,$indb,$numSet);
  my @bb = paths($indb,$inda,$numSet);
  
  my $ii;
  my $jj;
  my $crosses  = 0;

  if(exists($distance{$position[$inda]}{$position[$indb]}))
    {
      for($ii=0; $ii<@aa; $ii++)
	{for($jj=0; $jj<@bb; $jj++){
	  $crosses = $crosses + $distance{$position[$aa[$ii]]}{$position[$bb[$jj]]};
	}	
       }	

      $crosses = $crosses * $distance{$position[$inda]}{$position[$indb]};
    }
  else
    {
      $crosses = 0;
    }	

  return $crosses;

}	



sub pathtotalglobal{
  my $posPoint = shift;
  my $disHash = shift;
  my @position = @{$posPoint};
  my %distance = %{$disHash};

  my $numSet = $#position + 1;

  my $inda;
  my $indb;

  my $ii;
  my $jj;

  $totalBig = 0;

  for($inda=0; $inda<($numSet -1); $inda++)
    {
      for($indb=$inda+1; $indb<$numSet; $indb++)
	{
	  
	  $total = 0;
	  my @aa = paths($inda,$indb,$numSet);
	  my @bb = paths($indb,$inda,$numSet);
	  if(exists($distance{$position[$inda]}{$position[$indb]}))
	    {

	      my $xx = $distance{$position[$inda]}{$position[$indb]};

	      for($ii=0; $ii<@aa; $ii++)
		{for($jj=0; $jj<@bb; $jj++){
		  $total = $total + 
		    $xx*$distance{$position[$aa[$ii]]}{$position[$bb[$jj]]};
		  
		}	
	       }	




	      $totalBig = $total + $totalBig; 
	    }

	}
    }	
  return($totalBig);


}	



sub checkpathconflicts{
  my $posPoint = shift;
  my $disHash = shift;
  my @position = @{$posPoint};
  my %distance = %{$disHash};
  my @total;
  my $numSet = $#position + 1;

  my $inda;
  my $indb;

  my $ii;
  my $jj;

  $totalBig = 0;



  for($inda=0; $inda<($numSet); $inda++)
    {
      $total[$inda] = 0;
      for($indb=0; $indb<($numSet); $indb++)
	{
	  
	  if($inda==$indb){}
	  else
	    {
	      $total = 0;
	      my @aa = paths($inda,$indb,$numSet);
	      my @bb = paths($indb,$inda,$numSet);
	      if(exists($distance{$position[$inda]}{$position[$indb]}))
		{
		  
		  my $xx = $distance{$position[$inda]}{$position[$indb]};
		  
		  for($ii=0; $ii<@aa; $ii++)
		    {for($jj=0; $jj<@bb; $jj++){
		      $total[$inda] = $total[$inda] + 
		    $xx*$distance{$position[$aa[$ii]]}{$position[$bb[$jj]]};
		      
		    }	
		   }	
		}	



	    }

	}

    }
  return @total;


}	







sub genPermuTranspOrder
{
    # @e is the original (unpermuted) list of elements.
    my @e = @_;

    # @o is the order we want to output the elements.
    # So @e[@o] is the permuted list.
    my @o = (0..$#e);

    # @p is the position of each element in @o,
    # that is, @o = (0..$#e)[@p]
    my @p = (0..$#e);
    # Note that it is also true that @p = (0..$#e)[@o].

    # $d[$n] is the direction we are moving $e[$n].
    my @d = (-1) x @e;

    # We return a code reference (closure) that each time
    # it is called will return the next permutation of @e.
    # Note that the first time this is called, it permutes
    # @e one step, so the calling code must output the
    # original @e before calling the iterator.
    return sub {

        # Start by assuming we'll move the last element of @e.
        my $n = $#e;

        my $j;
        for(;;) {

            # If we move this one, we'd set $p[$n] += $d[$n].
            # So $j is the possible new value for $p[$n].
            $j= $p[$n] + $d[$n];

            # That is okay if $j is in range and
            # we aren't swapping with an element later in @e.
            last   if  0 <= $j  &&  $j <= $#e  &&  $o[$j] < $n;

            # It wasn't okay.  So move this element
            # in the other direction next time.
            $d[$n]= -$d[$n];
            # And move to the left in @e.
            --$n;

            # If we are to the first element in @e
            # then we are done.
            return   if  $n < 1;
        }

        # $p[$n] is the (permuted) position of the $n'th
        # element ($e[$n]) we wish to swap in the direction
        # of $d[$n].  So we want to swap $o[ $p[$n] ] with
        # $o[ $p[$n]+$d[$n] ].  $j is already that second
        # offset so we'll set $i to be the first offset
        # and later we'll swap @o[$i,$j].
        my $i = $p[$n];

        # We need to keep @o = (0..$#e)[@p], so we need
        # swap corresponding elements of @p to match @o.
        # We should swap @p[@o[$i,$j]], but $o[$i] is $n.
        @p[ $n, $o[$j] ]= @p[ $o[$j], $n ];

        @o[$j,$i] = @o[$i,$j];

        # To confuse your instructor, the commented
        # line produces the permutations in a different
        # and strange "transposition order".
        # return @e[@p];

        return @e[@o];
    };
}



sub findActiveGenes()
  {
    my $nsPointer = shift;
    my $dis2Pointer = shift;

    my @notShuffled = @{$nsPointer}; 
    my %dis2        = %{$dis2Pointer};

    my $activeGenes =0;
    my @activeGeneIndices = ();

    my $ag;
    
    for($ag=0; $ag<@notShuffled; $ag++)
      {
	if(exists($dis2{$notShuffled[$ag]})){
	  $activeGenes++;
	  $activeGeneIndices[$#activeGeneIndices+1] = $ag;
	}
	
      }
    
    return($activeGenes, \@activeGeneIndices)
  }	
  




sub defineIterates()
    {
      my $activeGeneCnt = shift;
      my $nsPointer = shift;
      my @notShuffled = @{$nsPointer};
      my $agiPointer = shift;
      my @activeGeneIndices = @{$agiPointer};
     
      my @iterated = ();

      if($activeGeneCnt>4)
	{
	  my $numGeneIters=24 ; 

	  for(my $gi=0; $gi<$numGeneIters; $gi++)
	      {
		@shuffledGenes = shuffle(@notShuffled);
		@{$iterated[$gi]} = @shuffledGenes;
	      }

	  }	
      elsif($activeGeneCnt<=1)
	{
	  @{$iterated[0]} =  @notShuffled;
	}	
      else
	{

	  #note the indices - initially unshuffled
	  my @shuffledIndices = @activeGeneIndices;
	   
	  #define iterator
	  my $iter = genPermuTranspOrder( @shuffledIndices );


	  my $iterInd = 0;
	  
	  #Permute through iterations
	  do {
	    #Note the genes prior to shuffling
	    @shuffledGenes = @notShuffled;
	    
	    #for each gene, used shuffled active indices to reorder
	    for(my $ix=0; $ix<@activeGeneIndices;$ix++){
	      $shuffledGenes[$activeGeneIndices[$ix]] = $notShuffled[$shuffledIndices[$ix]]; }

	    @{$iterated[$iterInd]} = @shuffledGenes;
	    
	    $iterInd +=1;
	    
	  } while(   @shuffledIndices = $iter->()  );
	  
	  
	  
	}
      

      return(\@iterated);
    }	



sub scoreGeneArrangement()
   {
     my $raPointer = shift;
     my @regArray = @{$raPointer};
     my $rhPointer = shift;
     my %regHash = %{$rhPointer};
     my $gdPointer = shift;
     my %geneDis = %{$gdPointer};

     my @bigGeneArray = [];


     for(my $rr=0; $rr<@regArray; $rr++)
       {
	 $thisRegion2 = $regArray[$rr];
	 for(my $g=0; $g< @{$regHash{$thisRegion2}}; $g++)
	   {
	     $bigGeneArray[$#bigGeneArray+1] = $regHash{$thisRegion2}[$g];
	   }
       }

     return( pathtotalglobal(\@bigGeneArray , \%geneDis) );
 
   }

	
	

sub changeRegionOrder
     {
       my $posPointer = shift;
       my @pos = @{$posPointer};
       my $indexa = shift;
       my $shuf2 = shift;
       my $p = shift;
       

        my @pos2 = @pos;
       
       #Move $indexa to another position
       if($p==0){
	 
	 splice(@pos2, $indexa , 1);
	 @temp =("$pos[$indexa]");
	 splice(@pos2, $shuf2, 0, @temp);
	
       }

       #Swap $indexa with another position
       if($p==1){
	 $pos2[$indexa] = $pos[$shuf2];
	 $pos2[$shuf2] = $pos[$indexa];
	
       }

       #Invert the stretch from $indexa to another position
       if($p==2){
	 $steps = ($shuf2-$indexa+1);
	 if($steps<0){$steps= $steps+$numRegions}	
	 for($ap=indexa; $ap<$steps; $ap++)
	   {
	     $ind1 = ($indexa+$ap)% $numRegions;
	     $ind2 = ($shuf2-$ap) % $numRegions;
	     
	
	     $pos2[$ind1] = $pos[$ind2];
	   }  		  			  
	 
       }	
       return(\@pos2);
     }	


sub nonZeroIndices
       {
	 my $conflictPointer = shift ;
	 my @conf = @{$conflictPointer};

	 my @nonZeroIndex = ();
	 for(my $i=0; $i<@conf; $i++)
	   {	
	     if($conf[$i] ==0){}
	     else{
	       $nonZeroIndex[$#nonZeroIndex+1] = $i
	     }	
	   }	

	 return(\@nonZeroIndex);

       }


sub softShuffle
     {

       #shuffles about half of the positions

       my $posPointer = shift ;
       my @pos = @{$posPointer};

       my $numRegions = $#pos + 1;
       my @indices = (1 .. ($numRegions-1));

       my @shuffled = shuffle(@indices);      

       #shuffle only half of the regions
       my @shuffled1 = @shuffled[0..floor($numRegions/2)];        
       my @shuffled2 = shuffle(@shuffled[0..floor($numRegions/2)]); 
	    
       my @pos2 = @pos;
       @pos2[@shuffled1] = @pos[@shuffled2];
       
       return(\@pos2);
     }	

sub findMostConnectedRegion
     {
       my $regPointer = shift;
       my $disPointer = shift;

       my @regArray   = @{$regPointer};
       my %dis        = %{$disPointer};
       
       my $maxScore = 0;
       my $maxIndex = 0;
       
       my @scoreComplexity;

       for(my $ll=0; $ll<@regArray;$ll++)
	 {
	   $scoreComplexity[$ll] = 0;
	
	   foreach my $key (keys(%{$dis{$regArray[$ll]}}))
	     {
	       
	       $scoreComplexity[$ll] = $scoreComplexity[$ll] + $dis{$regArray[$ll]}{$key};
	     }	


	   if($scoreComplexity[$ll]>$maxScore)
	     {
	       $maxScore = $scoreComplexity[$ll];
	       $maxIndex = $ll;
	     }	
	 }	
    
       return(  $regArray[$maxIndex]  )
     }

sub orientCircle
     {
       my $regPointer = shift;
       my $orientLabel = shift;

       my @regArray   = @{$regPointer};
       
       my $orientInd = -1;
       my $newInd;
       my @regArray2;
       my $i;
       my $numReg = $#regArray + 1;

       for($i=0; $i<@regArray; $i++)
	 {
	   if($regArray[$i] eq $orientLabel){$orientInd = $i;}
	   
	 }	

       for($i=0; $i<@regArray; $i++)
	 {
	   $newInd = ($i + $orientInd) % $numReg;
	   #print(stderr "$i  $newInd\n");
	   $regArray2[$i] = $regArray[$newInd];
	 }
       @regArray = @regArray2;

###
       return (\@regArray2);       

       
     }	


sub initializeParameters
   {
     my %hash ;
     $hash{"--scoreThresh"}   = shift;    #Default score threshold
     $hash{"--clusterRegions"}= shift;    #Default, cluster regions with 200 iterations
     $hash{"--clusterGenes"}  = shift;    #Default, cluster genes within regions
     $hash{"--orient"}        = shift;    #Default, orient circle
     $hash{"--data"}          = shift;
     $hash{"--connect"}       = shift;
     

     return (\%hash);
   }


sub updateParams
     {
       my $inputPointer = shift;
       my $hashPointer = shift;

       my @args = @{$inputPointer};
       my %paramHash = %{$hashPointer};

       my %newParams = %paramHash;

       for(my $i=0; $i<@args; $i+=2)
	 {
	   
	   die("\n\n\"$args[$i]\" is not an appropriate parameter flag\nAppropriate parameter flags are \"--data\", \"--connect\", \"--clusterRegions\", \"--scoreThresh\", \"--clusterGenes\", and \"--orient\"\n\n") 
	     unless(exists($paramHash{$args[$i]}));
	   
	   die("\n\nNo argument for \"$args[$i]\" entered\n\n") 
	     unless(exists($args[$i+1]));
	   
	   $newParams{$args[$i]} = $args[$i+1];
	   

	 }	

       return(\%newParams);

       
     }	



sub errorCheck
    {

      my $paramPointer = shift;
      my %params = %{$paramPointer};
      my $line;
      my @reg;
      my %geneCheck; my %regCheck;
      my $flag;
      my %clusterFileRegions;

      my $regionFile = $params{"--data"}; 
      my $connectionFile = $params{"--connect"} ;
      my $clusterParameter = $params{"--clusterRegions"}; 
      my $orientParameter = $params{"--orient"} ;
      my $geneClusterParam = $params{"--clusterGenes"} ;
      my $scoreThresh =  $params{"--scoreThresh"} ;
      

      

      #check data (regions) file
      
      #check that it opens
      if(!open(IN, $regionFile)){die("Could not open $regionFile file.\n\n")}
     
      #check the format, line by line
      $flag = "";
      while($line=<IN>)
	{
	  chomp($line); @reg = split(/\s+/, $line);

	  #check format for each line
	  $flag = $flag . "Formatting problem in $regionFile:\nline==>$line.\n"
	    unless( ($reg[0]=~/.+/) && ($reg[1]=~/.+/) 
		    && ($reg[2]=~/^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/));

	  #check and see if any gene is lised twice
	  $flag = $flag . "The $reg[1] gene is in $regionFile multiple times.\n"
	    unless(!exists($geneCheck{$reg[1]}));
	  
	  $geneCheck{$reg[1]} = 1;  $regCheck{$reg[0]} = "1";


	  #check to make sure score/pvals are between 0 and 1
	  $flag = $flag . "P-value out of range in $regionFile:\n==>$line.\n"
	    unless ( ($reg[2]<=1) && ($reg[2]>=0))

	  }
      if(length($flag)>0){die("Error in $regionFile (--data) file:\n" . $flag)}
      
      close(IN);

      #ERROR - connection File
      if(!open(IN, "$connectionFile")){die("Could not open $connectionFile.\n\n")}

      #check connection file format
      $flag = "";
      while($line=<IN>)
	{
	  chomp($line); @reg = split(/\s+/, $line);

	  #check formatting
	  $flag = $flag . "Formatting problem in $connectionFile:\n$==>$line.\n"
	    unless( ($reg[0]=~/.+/) && ($reg[1]=~/.+/) 
		    && ($reg[2]=~/^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$/));

	  
	  #make sure pairs are not being duplicated
	  $flag = $flag . "==>$line\n\nThe $reg[1] and $reg[0] gene pair is in $connectionFile multiple times.\n"
	    unless(!exists($pairCheck{$reg[1]}{$reg[0]}));
	
	  $pairCheck{$reg[1]}{$reg[0]} = 1;    
	  $pairCheck{$reg[0]}{$reg[1]} = 1;

	  #check and make sure both genes are in the date file
	  $flag = $flag . "$reg[0] gene in $connectionFile is not listed as a gene in $regionFile.\n"
	    unless($geneCheck{$reg[0]});

	  $flag = $flag . "$reg[1] gene in $connectionFile is not listed as a gene in $regionFile.\n"
	    unless($geneCheck{$reg[1]});


	  print(stderr "WARNING : In $connectionFile, in the line\n==>$line\nThere is a connectivity score > $maxThick ... it will be corrected to $maxThick.\n\n")
	    unless($reg[2]<=$maxThick);
	}
      
      close(IN);
      if(length($flag)>0){die("Error in $connectionFile (--connect) file:\n" . $flag)}

      
      # Cluster Parameter
      #check to see if it is a file name
      if(open(IN, $clusterParameter))
	{
	  $flag = "";
	  #check that each region is in the Region Data file
	  while($line=<IN>)
	    {
	      chomp($line);
	      $clusterFileRegions{$line} = 1;
	      $flag = $flag . "The $line is not a region listed in $regionFile\n"
		unless(exists($regCheck{$line}));		
	    }
	  
	  #check that each region in the Region Data file is in this file
	  foreach $region (keys(%regCheck))
	    {
	      $flag = $flag . "The $region region listed in the $regionFile region file is not listed in the $clusterParameter clustering file\n"
		unless(exists($clusterFileRegions{$region}));
	    }	

	  
	  if(length($flag)>0){die("Error in $clusterParameter (--clusterRegions) file:\n" . $flag)}
	}	
      else
	{
	  die("Cluster Parameter (3rd argument) must be either 1 (no clustering), a value between 2 and $maxIter (number of clustering iterations), or an appropriate file name")
	unless (  ( ($clusterParameter =~ /^\d+$/)&&($clusterParameter<=$maxIter)) ||open(IN, $clusterParameter));
	}


      
      #check scoreThresh
      die("The $scoreThresh parameter must be a value between 0 and 1\n")
	unless( ($scoreThresh=~/^[+]?[0-9]*\.?[0-9]+$/) && ($scoreThresh<=1) && ($scoreThresh>=0));


      #check orient parameter
      die("The $orientParameter must be 0,1, or a region named in the region (--data) file")
	  unless( ($regCheck{$orientParameter})
		  || ($orientParameter eq "0") || ($orientParameter eq "1"));
      

      #check gene cluster parameter
      print(stderr "$geneClusterParam\n\n");
      die("The $geneClusterParam (--clusterGenes parameter) must be 0 or 1")
	  unless( ($geneClusterParam eq "0") || ($geneClusterParam eq "1"));




    }
