#phmamspipe.pl 
# Written  by Dan Gusfield,  copyright 2009
#  Permission to use this program comes with no guarantees. You may use it if you don't laugh at it and
#  don't expect the author to understand or explain the code in the future.
#  Permission is not granted for redistribution or distribution of modified versions. Please contact
#  the author concerning distributing any modifications.
#
#
#This is the master program that runs the pipeline to test the PI-graph
#approach to the Perfect Phylogeny problem with missing data. This version works with data
#generated by the Hudson program ms. For a version that works with data generated in some other
#way, use the program pamamspipe.pl.
#
#It all looks complicated, but you really don't need to understand it. What you need to
#run it, is to have all the programs that came in the tar file in the same directory, and
#you need to be able to call Cplex from that directory, and you need Perl. See the file `mydata'
#for an example of ms produced input data. See the files `msummary' and `mstats' for an example
#of the output generated from running phmamspipe.pl with mydata as input, starting with
#files msummary and mstats both empty. You should save those files, and then compare them 
#to the result of running phmamspipe.pl
#with mydata, starting with empty files msummary and mstats. There are other output
#files as well that get generated from each individual dataset as a result of running phmamspipe.pl. 
#See readme for details.
#
# This pipeline takes an input file generated by ms (the name of the file must be specified by the
# user), and the number of allowed states 
#  and runs the data through the pipeline to test the PI-graph approach to the PP problem
#  with missing data. The data file can contain many individual problem instances.  For example,
#  the file `mydata' has 50 instances of a 20 by 20 input matrix with up to 5 states per character.
#   The program is executed with
#  the line command: perl phmamspipe.pl mydata 5 
#
# In order to use this pipeline, you need to have all the programs found in the tar file in 
# one directory; you need to have Perl callable from that directory; and you need to have Cplex
# callable from that directory. If you don't have Cplex, you can use the version phmanspipenoilp.pl
# which will work on all problem instances that don't need to run an ILP (a large percentage), and
# will note which instances require an ILP. Also, the program ChordAlg is a C program compiled
# for Mac OSX. A version compiled for Linux will be release shortly (we don't do windows).
#
use Benchmark;

$data = $ARGV[0];
open (DATA, "$data");
$dataline = <DATA>;
close (DATA);
open (SUMMARY, '>>msummary'); # This appends information on this execution to the file `msummary'
print SUMMARY "\n*****************\nNew ms file $dataline";

$states = $ARGV[1];

#The next line specifies what fraction of the entries should be randomly deleted, in different tests. 
#The user specifies this. Uncomment (by removing the #) the line that you want to use, or
#create a new line.  In the following, the active line will only test the case of zero deletions, and
#10% deletions. 
 foreach $fractiondeletion (0.0, 0.1)   # specify the fraction of deletions here.
# foreach $fractiondeletion (0.0, 0.05, 0.2, 0.35)   # an alternative example
{
print SUMMARY "\n*****************\nNew processing of data generated by $dataline";
print "$fractiondeletion\n";

system ("perl multextract.pl $data $fractiondeletion $states"); # this processes the file $data
                                                                # creating individual data files and
								# a list of those files in file `datalist'
open (LIST, 'datalist');
system ("rm ilplist");
system ("rm nopplist");
system ("rm applist");
system ("rm graphcounts");
system ("rm initialchordality");
system ("rm finalchordality");

open (TOODENSE, '>dense');
while ($source = <LIST>) {
	chomp $source;
	print "$source\n";
	print SUMMARY "\n$source, $fractiondeletion\n";

	$source =~ /(\d+)state(\d+).(\d+).(\d+)/;
	$maxstates = $1;
	$graph = "$1graph.$4";
	$trans = "$1trans.$4";
	$sep = "$1sep.$4";
        $t0 = new Benchmark;
	$e1 = system ("perl makegraph2.pl $source $graph $trans $maxstates"); # this makes the
	                        # partition intersection graph, and node color information 
				# from each data instance. It returns e1 = 1 if the density
				# of the graph is too high for a perfect phylogeny solution.
	if ($e1 == 0) {
	system ("./ChordAlg -chordality $graph NULL >> initialchordality"); # this tests whether the
	                                                                    # graph is already chordal.

	system ("./ChordAlg -amscolors $graph $trans ill $sep NULL 0 0 0 1"); # this finds all the
	                                                       # minimal separators and determines which
							       # are legal and which are illegal, and
							       # determines which pairs of legal minimal
							       # separators cross, and if there are any
							       # illegal minimal separators that are not
							       # crossed by any legal minimal separators.

	system ("perl septranslate.pl $sep $trans $source"); # this further processes the minimal separators
	                                                     
	system ("perl mppilp.pl t$sep out"); # this program determines if an ILP needs to be created,
	                                     # and creates one if so. It creates a list `ilplist' of
					     # the ILP instances that need to be solved.
	system ("cat out > $1mppilp.$4.lp");
        }        
	else {
		print TOODENSE "$source\n";
		print SUMMARY "$source is too dense - no PP possible\n";
	}

        $t1 = new Benchmark;
        $td = timediff($t1, $t0);
        print ("Time used is to find the min. seps. and process the file is", timestr($td), "\n"); 
        print SUMMARY ("Time used is ", timestr($td), "\n"); 
}
	print SUMMARY "\nInformation about ILP solutions\n";
	system ("perl ./listsolveilp.pl $p");  # this calls a perl program which calls Cplex to 
	                                       # execute each of the ILPs in ilplist. The trace information
					       # from the Cplex executions is written to file `blat' 
        
$e1 =	system ("perl ./listaugmentM.pl ilplist blat");  # This uses the cplex solution, if feasible,
                                                         # to augment the data with
							 # new rows, as specified by the ILP solution.
							 # It creates a list `adatalist' of the augmented
							 # data sets.  Adding new rows to the data is a
							 # Kludgey and lazy, but correct, way to augment
							 # the data.

# Next the augmented data sets are processed to check that each generates a chordal parition
# intersection graph.
#
        if ($e1 == 0) {

	system ("rm b");
open (LIST, 'adatalist');

while ($source = <LIST>) {
	chomp $source;
	print "$source\n";

	$source =~ /(\d+)state(\d+).(\d+).(\d+)/;
	$graph = "a$1graph.$4";
	$trans = "a$1trans.$4";
	$sep = "a$1sep.$4";
	system ("perl makegraph.pl $source $graph $trans");
	system ("./ChordAlg -chordality $graph NULL >> finalchordality");
}	 
	open (B, 'finalchordality');
	$noflag = 1;
	while ($line = <B>) {
		if ($line =~ /no/) {
			print "Found a bad example - this should have been chordal, but is not.\n";
			$noflag = 0;
		}
	}
	if ($noflag) {
		print "All augmented matrices produce chordal graphs\n";
		print SUMMARY "\nAll augmented matrices produce chordal graphs\n";
	}
}

else {
	print "There were no ILPs to solve\n";
	print SUMMARY "There were no ILPs to solve\n";
}
}
      system ("cp msummary msummarysav"); 
      print SUMMARY "END\n";
      system ("cp mstats mstatssav");  
      system ("rm mstats");  
      system ("perl summstats.pl");
                # statistical information from this execution gets appended to the end of
                # file `mstats'
