#!/pkg/bin/perl -w
# June 2015, renamed PERILP.pl
#
# modified February 2015 by Dan Gusfield,
# persistent.pl modified from  bbmisstest.pl
# to create ILPs to solve the Persistent Phylogeny Problem
# If there is a persistent phylogeny, then there will be an optimal for the ILP of value 0,
# and if there is no persistent phylogeny, then the ILP will be infeasible.
#
# Written  by Dan Gusfield  copyright 2006
# Permission to use this program comes with no guarantees. You may use it if you don't laugh at it and
# don't expect the author to understand the code in the future.
#
# May 10, 2006 Try to get rid of the X variables 
# May 9, 2006, Modified misstest.pl to implement Dan Brown's suggestion on how to reduce the number of variables in the ILP.
#
# June 4, 2005 changed use of stringlen, right and left. This should have no affect on
# the output when all columns are selected, but fixed a bug when only an interval is
#selected, and also the logic is clearer now.
#
#December 5, 2004
# use diagnostics
# Generates the ILP to solve the problem of
# setting the missing values in order to minimize the number of
# resulting incompatible pairs of sites.
#
open IN, "$ARGV[0]"; # the input matrix consisting of 0s, 1s and 2s. The 2 is used here in place of a `?' in the persistent phylogeny
                     # problem.
open OUT, ">constraints";
open OUT1, ">objective";
$line1 = <IN>;
# print " line 1 is: $line1";
$line1 =~ /(\d+.(\d+))/; # this is the set identifier, which should be on the first line of the input.
$set = $1;
# print "The set is $set\n";


@lines = <IN>;
$line = $lines[1];
#print "$line";
chomp $line;
$line =~ tr/ //d;
$stringlen = length ($line);
# print "$stringlen ***\n";
$left = 0;
$right = $stringlen-1; # right and left now define the range of the indices into the data.

   %count = ();

   foreach $i ($left...$right -1) {
     foreach $j ($i+1...$right) {
     $ii = $i+1;
     $jj = $j+1;
     $key = $ii . ',' . $jj;
 #   print OUT "$key\n";

     %{$hash{$key}} = ();     
     $count{$key} = 0;

#     $secondkey = $key;
#     %{$secondhash{$secondkey}} = ();
#     $count{$secondkey} = 0;     
 
     }
   }


print OUT1 "minimize \n st \n";
# print OUTM "maximize \n";



     
$linecount = 1;
foreach $line (@lines) {
 $line =~ tr/ //d;
 chomp $line;

   @chars = split(//, $line);
   foreach $i ($left...$right -1) {    
     foreach $j ($i+1...$right) {
     $ii = $i+1;
     $jj = $j+1;
     $key = $ii . ',' . $jj;
     $value  = $chars[$i] . ',' . $chars[$j];
     if (($chars[$i] eq '2') or ($chars[$j] eq '2')) {
    #    print OUT "$i, $j \n";
       $variable{$key} = 1;  # this is a col. combination that needs further examination
       $vhash{$linecount}{$key} = $value;   # this holds the tuple-type at those two cols.
                                            # in row linecount.
   #   print OUT "Another $key; $value, $count{$key}\n";
     }
     elsif (! defined $hash{$key}{$value}) { # in this case, the tuple does not involve a 2, and
                                             # we want to record the binary type seen that col. pair
       $hash{$key}{$value} = 1;
       $count{$key}++;    # this accumulates the number of binary types seen so far in that  col. pair.
   #   print OUT "Another $key; $value, $count{$key}\n";
     }
  } 
 }
  $linecount++;  # this ends up overcounting the number of lines by one.
}


 $count = 0;
 foreach $ekey (sort keys %count) {
  if ($count{$ekey} == 4) {
#  print OUT "$ekey  $count{$ekey} \n";
   print OUT "LB$ekey = 1\n";           # this puts in an LB variable set to 1 for
                                        # every pair that is already in conflict. Remove this to
                                        # just compute the extra conflicts rather than the total conflicts.
       print OUT1 "+ LB$ekey ";
       if (0 == $count % 8) {
       print OUT1 "\n";
       }
  $count++; # this counts the number of pairs of cols. that are in conflict even ignoring the 2's
  }
 }
    print OUT1 "\n";


$linecount = 0;
foreach $line (@lines) {
$linecount++;

 $line =~ tr/ //d;
 chomp $line;
 @chars = split(//, $line);
 foreach $i ($left...$right) {
   $ii = $i+1;
     if ($chars[$i] eq '2') {
         print OUT "Y$linecount,$ii <= 1\n";  # May 9, 2006 is this really needed? It somehow helps the Cplex
 #       libraries output the values of all the variables, otherwise it does not output the values of variables that
 #       are eliminated during preprocessing. But for postprocessing, we need all the variable values.
         if ($ii % 2 == 0) {
            $iim1 = $ii - 1;
            print OUT "Y$linecount,$ii - Y$linecount,$iim1 = 0\n";
         }
         $bin{"Y$linecount,$ii"} = 1;
     }
     elsif ($chars[$i] eq '1') {
         print OUT "Y$linecount,$ii = 1\n";
     }
     else {
         print OUT "Y$linecount,$ii = 0\n";
     }
 }
}


 $k = 0;
 $useful = 0;
 $paircount = 0;
   foreach $i ($left...$right -1) {
     foreach $j ($i+1...$right) {
     $ii = $i+1;
     $jj = $j+1;
       $key = $ii . ',' . $jj;
      if ((defined $variable{$key}) and ($count{$key} < 4)) {           # we only further examine a pair of columns if they have some 
                                                                         # variable element(s) and are not already incompatible
        $c11 = $c10 = $c01 = $c00 = 0;
        $I11 = $I10 = $I01 = $I00 = 0;
        $stringforpair = "";

        foreach $lin (1..$linecount){
         if (defined $vhash{$lin}{$key}) {
         $paircount++; 
 #        print "$paircount\n";

             $value = $vhash{$lin}{$key};
             if ($vhash{$lin}{$key} eq '2,2') {   

             if (!defined $hash{$key}{'1,1'}) {
              $I11 = 1;
              $c11++;                         # this records that 1,1 is missing in the pair of columns but can be 
                                              # created in this row.

             $stringforpair .= "D$key,1,1 - Y$lin,$ii - Y$lin,$jj >= -1\n";  #if the Y variables are set to 1,1 then the D11
	                                                                     # variable must be set to 1 for that column pair.
             }

             if (!defined $hash{$key}{'0,0'}) {
              $I00 = 1;
              $c00++;                         # this records that 0,0 is missing in the pair of columns but can be 
                                              # created in this row.
             
             $stringforpair .= "D$key,0,0 + Y$lin,$ii + Y$lin,$jj >= 1\n";  #if the Y variables are set to 0,0 then the D00
	                                                                     # variable must be set to 1 for that column pair.
             }

             if (!defined $hash{$key}{'1,0'}) {
              $I10 = 1;
              $c10++;                         # this records that 1,0 is missing in the pair of columns but can be 
                                              # created in this row.

             $stringforpair .= "D$key,1,0 - Y$lin,$ii + Y$lin,$jj >= 0\n";  #if the Y variables are set to 1,0 then the D10
	                                                                     # variable must be set to 1 for that column pair.
             }

             if (!defined $hash{$key}{'0,1'}) {
              $I01 = 1;
              $c01++;                         # this records that 0,1 is missing in the pair of columns but can be 
                                              # created in this row.
             $stringforpair .= "D$key,0,1 + Y$lin,$ii - Y$lin,$jj >= 0\n";  #if the Y variables are set to 0,1 then the D01
	                                                                     # variable must be set to 1 for that column pair.
	     }


#            print "2,2\n";

             } # end of the 2,2 case

             elsif ($vhash{$lin}{$key} eq '2,1') {
#             print "2,1\n";
                if ((defined $hash{$key}{'1,1'}) and (defined $hash{$key}{'0,1'})) {
#                print "No help\n";
                } 
                else {

             if (!defined $hash{$key}{'1,1'}) {
              $I11 = 1;
              $c11++;                         # this records that 1,1 is missing in the pair of columns but can be 
                                              # created in this row.

             $stringforpair .= "D$key,1,1 - Y$lin,$ii - Y$lin,$jj >= -1\n";  #if the Y variables are set to 1,1 then the D11
	                                                                     # variable must be set to 1 for that column pair.
          #   $bin{"D$key,1,1"} = 1;
             }

             if (!defined $hash{$key}{'0,1'}) {
              $I01 = 1;
              $c01++;                         # this records that 0,1 is missing in the pair of columns but can be 
                                              # created in this row.
             $stringforpair .= "D$key,0,1 + Y$lin,$ii - Y$lin,$jj >= 0\n";  #if the Y variables are set to 0,1 then the D01
	                                                                     # variable must be set to 1 for that column pair.

           #  $bin{"D$key,0,1"} = 1;
	     }

              } # end of else
            } # end of 2,1 case

            elsif ($vhash{$lin}{$key} eq '2,0') {
     #      print "2,0\n";
                if ((defined $hash{$key}{'1,0'}) and (defined $hash{$key}{'0,0'})) {
#                print "No help\n";
                } 
                else {

             if (!defined $hash{$key}{'0,0'}) {
              $I00 = 1;
              $c00++;                         # this records that 0,0 is missing in the pair of columns but can be 
                                              # created in this row.
             
             $stringforpair .= "D$key,0,0 + Y$lin,$ii + Y$lin,$jj >= 1\n";  #if the Y variables are set to 0,0 then the D00
	                                                                     # variable must be set to 1 for that column pair.
           #  $bin{"D$key,0,0"} = 1;
             }

             if (!defined $hash{$key}{'1,0'}) {
              $I10 = 1;
              $c10++;                         # this records that 1,0 is missing in the pair of columns but can be 
                                              # created in this row.

             $stringforpair .= "D$key,1,0 - Y$lin,$ii + Y$lin,$jj >= 0\n";  #if the Y variables are set to 1,0 then the D10
	                                                                     # variable must be set to 1 for that column pair.
           #  $bin{"D$key,1,0"} = 1;
             }

               } # end of else
             } # end of 2,0 case

             elsif ($vhash{$lin}{$key} eq '1,2') {
         #   print "1,2\n";
                if ((defined $hash{$key}{'1,1'}) and (defined $hash{$key}{'1,0'})) {
#                print "No help\n";
                } 
                else {

             if (!defined $hash{$key}{'1,1'}) {
              $I11 = 1;
              $c11++;                         # this records that 1,1 is missing in the pair of columns but can be 
                                              # created in this row.

             $stringforpair .= "D$key,1,1 - Y$lin,$ii - Y$lin,$jj >= -1\n";  #if the Y variables are set to 1,1 then the D11
	                                                                     # variable must be set to 1 for that column pair.
          #   $bin{"D$key,1,1"} = 1;
             }

             if (!defined $hash{$key}{'1,0'}) {
              $I10 = 1;
              $c10++;                         # this records that 1,0 is missing in the pair of columns but can be 
                                              # created in this row.

             $stringforpair .= "D$key,1,0 - Y$lin,$ii + Y$lin,$jj >= 0\n";  #if the Y variables are set to 1,0 then the D10
	                                                                     # variable must be set to 1 for that column pair.
           #  $bin{"D$key,1,0"} = 1;
             }

               } #end of else
             } #end of 1,2 case

             elsif ($vhash{$lin}{$key} eq '0,2') {
       #     print "0,2\n";
                if ((defined $hash{$key}{'0,0'}) and (defined $hash{$key}{'0,1'})) {
#                print "No help\n";
                } 
                else {


             if (!defined $hash{$key}{'0,0'}) {
              $I00 = 1;
              $c00++;                         # this records that 0,0 is missing in the pair of columns but can be 
                                              # created in this row.
             
             $stringforpair .= "D$key,0,0 + Y$lin,$ii + Y$lin,$jj >= 1\n";  #if the Y variables are set to 0,0 then the D00
	                                                                     # variable must be set to 1 for that column pair.
          #   $bin{"D$key,0,0"} = 1;
             }

             if (!defined $hash{$key}{'0,1'}) {
              $I01 = 1;
              $c01++;                         # this records that 0,1 is missing in the pair of columns but can be 
                                              # created in this row.
             $stringforpair .= "D$key,0,1 + Y$lin,$ii - Y$lin,$jj >= 0\n";  #if the Y variables are set to 0,1 then the D01
	                                                                     # variable must be set to 1 for that column pair.

          #   $bin{"D$key,0,1"} = 1;
	     }

                   } #end of else
                } # end of 0,2 case

   #     print OUT "$lin, $key $vhash{$lin}{$key}\n";  
   #     print OUT "Another $key; $value, $count{$key}\n";
           }
         } # end of next line case

   #     print OUT "$key, $count{$key}\n";
         if ($I11 + $I10 + $I01 + $I00 + $count{$key}  == 4) {  # here we check whether an incompatibility is possible for that column pair.
         $useful++;

     #   print OUT "Column pair $ii, $jj is useful\n";
         print OUT "$stringforpair\n";
         $rstring = "";

         if ($c11 > 0) {                          # need to check whether there was a way to make D11 = 1 
                                                  # That is, if there is a 11 deficiency, and if it can be rectified.

                        $rstring .= "+ D$key,1,1 ";
                        $bin{"D$key,1,1"} = 1;
 
         }               
         if ($c10 > 0) {
                        $rstring .= "+ D$key,1,0 ";
                        $bin{"D$key,1,0"} = 1;
         }               
         if ($c01 > 0) {
                        $rstring .= "+ D$key,0,1 ";
                        $bin{"D$key,0,1"} = 1;
         }                
         if ($c00 > 0) {
                        $rstring .= "+ D$key,0,0 ";
                        $bin{"D$key,0,0"} = 1;
         }               

                        $rhs = 3 - $count{$key};
# (May 9, 2006)         print OUT "$rstring - 4 LB$key >= -$count{$key}\n";  # these inequalities set LB$key to 1 if and only if
                        print OUT "$rstring - LB$key <= $rhs\n";    # the Y values are set so that the column pair is
                                                                                # incompatible. The first line is the only if part.
                                                                       # and the second line is the if part.
                        $bin{"LB$key"} = 1;
                        print OUT1 "+ LB$key ";
                        $k++;
                        if (0 == $k % 8) {
                        print OUT1 "\n";
                        } 
                        
         }
#        else {
#            print OUT "Column pair $ii, $jj is not useful\n";
#        } 
      }
     }
  }

print OUT1 (" = 0\n"); # March 1, 2015 changed this for persistent.pl in order to force faster
                       # termination (as an infeasible) when there is no persistent phylogeny
# print OUTM "\n st \n";

print OUT "binaries\n";
 foreach $index (sort keys %bin) {
  print OUT "$index\n";
}

 print OUT "end\n";

# print "The number of non-perfect pairs is $count\n";
# print "The potential number of subpairs is $paircount\n";
#  print "The number of useful pairs is $useful\n";

close (IN);
close (OUT);
close (OUT1);

$outputfile = "lp$set.lp";
system ("cat objective constraints > $outputfile");
print "The output is in file $outputfile\n";
