#!/pkg/bin/perl -w
# hmisstesthk.pl
# July 20, 2005
# Modified to solve produce the ILP whose solution solves 
# the haplotyping problem so that the HK bound for the resulting haplotypes is
# minimized over all possible haplotyping solutions.
# 
# June 4, 2005 changed use of stringlen, right and left. This should have no affect on
# the output when all columns are selected, but fixed a bug when only an interval is
#selected, and also the logic is clearer now.
#
#December 5, 2004
# use diagnostics
# Generates the ILP to solve the problem of
# setting the missing values in order to minimize the number of
# resulting incompatible pairs of sites.
#
open IN, "$ARGV[0]";
open OUT, ">constraints";
open OUT1, ">objective";
open OUT2, ">trace"; # for debugging use
$line1 = <IN>;
$line2 = <IN>;
chomp $line1;
chomp $line2;
$lpoutfile = "hhklp$line1.$line2.lp";

@lines = <IN>;
$line = $lines[1];
#print "$line";
chomp $line;
$line =~ tr/ //d;
$stringlen = length ($line);
print "$stringlen ***\n";

   %count = ();
$left = 0;
$right = $stringlen-1; # right and left now define the range of the indices into the data. 


$li = 0;
foreach $line (@lines) {
      $hli = 2*$li;
      $hlines[$hli] = $line;
print "$hli, $hlines[$hli]\n";
      $hli++;
      $hlines[$hli] = $line;
print "$hli, $hlines[$hli]\n";
$li++;
}

#print "do the whole range or an interval - input d for whole range or two numbers for
#an interval\n";
#$line = <STDIN>;
#if ($line =~ /d/) {
#$left = 0;
#$right = $stringlen-1; # right and left now define the range of the indices into the data. 
#}
#elsif ($line =~ /(\d+) (\d+)/) {
#  $left = $1-1;
#  $right = $2-1;
# print "got those numbers\n";
#}

   foreach $i ($left...$right -1) { # note that the column indices begin at 1.
     foreach $j ($i+1...$right) {
     $ii = $i+1;
     $jj = $j+1;
     $key = $ii . ',' . $jj;
 #   print OUT "$key\n";

     %{$hash{$key}} = ();     
     $count{$key} = 0;
     }
   }


print OUT1 "minimize \n";
# print OUTM "maximize \n";



     
$linecount = 1;
foreach $line (@hlines) {
 $line =~ tr/ //d;
 chomp $line;

   @chars = split(//, $line);
   foreach $i ($left...$right -1) {    
     foreach $j ($i+1...$right) {
     $ii = $i+1;
     $jj = $j+1;
     $key = $ii . ',' . $jj;
     $value  = $chars[$i] . ',' . $chars[$j];

     if (($chars[$i] eq '2') or ($chars[$j] eq '2')) {
    #    print OUT "$i, $j \n";
       $variable{$key} = 1;
       $vhash{$linecount}{$key} = $value;
   #   print OUT "Another $key; $value, $count{$key}\n";
       if ($chars[$i] ne '2') {              # July 31, 2005 this is a kluge and a bass-ackwards way to do this,
                                             # but given the existing code, it is a quick hack and seems to
					     # work - anyway, the speed of this code is not critical. Rather
					     # it is the solution speed of the ILP produced that matters.
          $value0 = "$chars[$i],0";
          $value1 = "$chars[$i],1";
             if (! defined $hash{$key}{$value0}) {
             $hash{$key}{$value0} = 1;
             $count{$key}++;
             }

             if (! defined $hash{$key}{$value1}) {
             $hash{$key}{$value1} = 1;
             $count{$key}++;
             }
       }
       if ($chars[$j] ne '2') {
          $value0 = "0,$chars[$j]";
          $value1 = "1,$chars[$j]";
             if (! defined $hash{$key}{$value0}) {
             $hash{$key}{$value0} = 1;
             $count{$key}++;
             }

             if (! defined $hash{$key}{$value1}) {
             $hash{$key}{$value1} = 1;
             $count{$key}++;
             }
       }
     }

     elsif (! defined $hash{$key}{$value}) { # in this case, the tuple does not involve a 2, and
                                             # we want to record the binary type seen that col. pair
       $hash{$key}{$value} = 1;
       $count{$key}++;    # this accumulates the number of binary types seen so far in that  col. pair.
   #   print OUT "Another $key; $value, $count{$key}\n";
     }
  } 
 }
  $linecount++;  # this ends up overcounting the number of lines by one.
}


 $count = 0;
 foreach $ekey (sort keys %count) {
  print OUT2 "XX $ekey  $count{$ekey} \n";
  if ($count{$ekey} == 4) {
   print OUT "LB$ekey = 1\n";           # this puts in an LB variable set to 1 for
                                        # every pair that is already in conflict. Remove this to
                                        # just compute the extra conflicts rather than the total conflicts.

     $count++; # this counts the number of pairs of cols. that are in conflict even ignoring the 2's

    ($ii,$jj) = split(/,/, $ekey);
       foreach $kk ($ii..$jj-1) {

       print OUT "+ R$kk ";
       $bin{"R$kk"} = 1;
       $Rvar{"R$kk"} = 1;
       if (0 == (($kk - $ii) % 8)) {
       print OUT "\n";
       }
       }
       print OUT "-LB$ekey => 0\n";
  }
 }

$linecount = 0;
foreach $line (@hlines) {
$linecount++;

 $line =~ tr/ //d;
 chomp $line;
 @chars = split(//, $line);
 foreach $i ($left...$right) {
   $ii = $i+1;
     if ($chars[$i] eq '2') {
        print OUT "Y$linecount,$ii <= 1\n";
        $bin{"Y$linecount,$ii"} = 1;
        if (0 == $linecount %2 ) {     # if this is the second line of a variable pair
                                       # then add in the constraint that exactly one
                                       # of the variables must be set to 1 and the other to 0.
         $lcm1 = $linecount - 1;
         print OUT "Y$linecount,$ii + Y$lcm1,$ii = 1\n";
         }
     }
     elsif ($chars[$i] eq '1') {
         print OUT "Y$linecount,$ii = 1\n";
     }
     else {
         print OUT "Y$linecount,$ii = 0\n";
     }
 }
}


 $k = 0;
 $useful = 0;
 $paircount = $subcount = $sub22count = 0;
   foreach $i ($left...$right -1) {
     foreach $j ($i+1...$right) {
     $ii = $i+1;
     $jj = $j+1;
       $key = $ii . ',' . $jj;
      if ((defined $variable{$key}) and ($count{$key} < 4)) {           # we only further examine a pair of columns if they have some 
                                                                         # variable element(s) are not already incompatible
        $string11 =  $string10 = $string01 = $string00 = "";
        $c11 = $c10 = $c01 = $c00 = 0;
        $I11 = $I10 = $I01 = $I00 = 0;
        $stringforpair = "";

        foreach $lin (1..$linecount){
         if (defined $vhash{$lin}{$key}) {
         $paircount++; 
 #        print "$paircount\n";

             $value = $vhash{$lin}{$key};
             if ($vhash{$lin}{$key} eq '2,2') {   

#             print OUT "X$lin,$key,1,1 + X$lin,$key,1,0 +  X$lin,$key,0,1 + X$lin,$key,0,0 = 1\n";
#
#            print OUT "X$lin,$key,1,1 - Y$lin,$ii <= 0\n";  #if X is set to 1,1 then both Y variables
#            print OUT "X$lin,$key,1,1 - Y$lin,$jj <= 0\n";  # must be set to 1
#
#            print OUT "X$lin,$key,1,0 - Y$lin,$ii <= 0\n";  #if X is set to 1,0, then the first Y variable
#            print OUT "X$lin,$key,1,0 + Y$lin,$jj <= 1\n";  #is set to 1 and the second to 0
#
#            print OUT "X$lin,$key,0,1 + Y$lin,$ii <= 1\n";
#            print OUT "X$lin,$key,0,1 - Y$lin,$jj <= 0\n";
#
#            print OUT "X$lin,$key,0,0 + Y$lin,$ii <= 1\n";
#            print OUT "X$lin,$key,0,0 + Y$lin,$jj <= 1\n";
#
             $stringforpair .= "X$lin,$key,1,1 + X$lin,$key,1,0 +  X$lin,$key,0,1 + X$lin,$key,0,0 = 1\n";

             $stringforpair .= "X$lin,$key,1,1 - Y$lin,$ii <= 0\n";  #if X is set to 1,1 then both Y variables
             $stringforpair .= "X$lin,$key,1,1 - Y$lin,$jj <= 0\n";  # must be set to 1

             $stringforpair .= "X$lin,$key,1,0 - Y$lin,$ii <= 0\n";  #if X is set to 1,0, then the first Y variable
             $stringforpair .= "X$lin,$key,1,0 + Y$lin,$jj <= 1\n";  #is set to 1 and the second to 0

             $stringforpair .= "X$lin,$key,0,1 + Y$lin,$ii <= 1\n";  #this is the symmetric case when X is set to 0,1
             $stringforpair .= "X$lin,$key,0,1 - Y$lin,$jj <= 0\n";

             $stringforpair .= "X$lin,$key,0,0 + Y$lin,$ii <= 1\n";  # this is the case when  X is set to 0,0
             $stringforpair .= "X$lin,$key,0,0 + Y$lin,$jj <= 1\n";

             $bin{"X$lin,$key,1,1"} = $bin{"X$lin,$key,1,0"} =  $bin{"X$lin,$key,0,1"} = $bin{"X$lin,$key,0,0"} = 1;

#            print "2,2\n";

             if (!defined $hash{$key}{'1,1'}) {
              $I11 = 1;
              $c11++;                       # this records that 1,1 is missing in the pair of columns but can be 
                                              # created in this row.
              $string11 .= "+ X$lin,$key,1,1 ";    
                                                 # what we want to do here is to count the number of missing cases in
                                                 # this column pair, and then check that we even can create
                                                 # an incompatibility. If we can, then we need to create the
                                                 # inequalities to check if the appropriate X's were set to
                                                 # a new incompatibility. The string1,1 is accumulating the
                                                 # various X variables that would contribute to the 1,1 case,
                                                 # if they will later be used in the inequalities that check
                                                 # if the appropriate variables have been set in order to
                                                 # create a new incompatibility.
             }

              if (!defined $hash{$key}{'1,0'}) {
              $I10 = 1;
              $c10++;
              $string10 .= "+ X$lin,$key,1,0 ";

             }
              if (!defined $hash{$key}{'0,1'}) {
              $I01 = 1;
              $c01++;
              $string01 .= "+ X$lin,$key,0,1 ";
             }
              if (!defined $hash{$key}{'0,0'}) {
              $I00 = 1;
              $c00++;
              $string00 .= "+ X$lin,$key,0,0 ";
             }
             $sub22count++;
             $subcount++;
             }

             elsif ($vhash{$lin}{$key} eq '2,1') {
#             print "2,1\n";
                if ((defined $hash{$key}{'1,1'}) and (defined $hash{$key}{'0,1'})) {
#                print "No help\n";
                } 
                else {
                      $stringforpair .=  "X$lin,$key,1,1 + X$lin,$key,0,1  = 1\n";

                      $stringforpair .=  "X$lin,$key,1,1 - Y$lin,$ii <= 0\n";  #if X is set to 1,1 then both Y variables
                      $stringforpair .=  "X$lin,$key,1,1 - Y$lin,$jj <= 0\n";  # must be set to 1


                      $stringforpair .=  "X$lin,$key,0,1 + Y$lin,$ii <= 1\n";
                      $stringforpair .=  "X$lin,$key,0,1 - Y$lin,$jj <= 0\n";

                      $bin{"X$lin,$key,1,1"} = $bin{"X$lin,$key,0,1"} = 1;


             if (!defined $hash{$key}{'1,1'}) {
              $I11 = 1;
              $c11++;
              $string11 .= "+ X$lin,$key,1,1 ";    
             }

             if (!defined $hash{$key}{'0,1'}) {
              $I01 = 1;
              $c01++; 
              $string01 .= "+ X$lin,$key,0,1 ";
             }
             $sub21count++;
             $subcount++;
              }
            }

            elsif ($vhash{$lin}{$key} eq '2,0') {
     #      print "2,0\n";
                if ((defined $hash{$key}{'1,0'}) and (defined $hash{$key}{'0,0'})) {
#                print "No help\n";
                } 
                else {

                      $stringforpair .=  "X$lin,$key,1,0 + X$lin,$key,0,0  = 1\n";

                      $stringforpair .=  "X$lin,$key,1,0 - Y$lin,$ii <= 0\n";  #if X is set to 1,0 then both Yii must be set to 1 and
                      $stringforpair .=  "X$lin,$key,1,0 + Y$lin,$jj <= 1\n";  # Yjj must be set to 0

                      $stringforpair .=  "X$lin,$key,0,0 + Y$lin,$ii <= 1\n";
                      $stringforpair .=  "X$lin,$key,0,0 + Y$lin,$jj <= 1\n";

                      $bin{"X$lin,$key,1,0"} = $bin{"X$lin,$key,0,0"} = 1;
  
                      if (!defined $hash{$key}{'1,0'}) {
                      $I10 = 1;
                      $c10++;
                      $string10 .= "+ X$lin,$key,1,0 ";
                      }

                      if (!defined $hash{$key}{'0,0'}) {
                      $I00 = 1;
                      $c00++;
                      $string00 .= "+ X$lin,$key,0,0 ";
                      }
                      $sub20count++;
                      $subcount++;
                      }
             }

             elsif ($vhash{$lin}{$key} eq '1,2') {
         #   print "1,2\n";
                if ((defined $hash{$key}{'1,1'}) and (defined $hash{$key}{'1,0'})) {
#                print "No help\n";
                } 
                else {

                  $stringforpair .=  "X$lin,$key,1,1 + X$lin,$key,1,0 = 1\n";

                  $stringforpair .=  "X$lin,$key,1,1 - Y$lin,$ii <= 0\n";  #if X is set to 1,1 then both Y variables
                  $stringforpair .=  "X$lin,$key,1,1 - Y$lin,$jj <= 0\n";  # must be set to 1

                  $stringforpair .=  "X$lin,$key,1,0 - Y$lin,$ii <= 0\n";  #if X is set to 1,0, then the first Y variable
                  $stringforpair .=  "X$lin,$key,1,0 + Y$lin,$jj <= 1\n";  #is set to 1 and the second to 0

                  $bin{"X$lin,$key,1,1"} = $bin{"X$lin,$key,1,0"} = 1;

                  if (!defined $hash{$key}{'1,1'}) {
                  $I11 = 1;
                  $c11++;
                  $string11 .= "+ X$lin,$key,1,1 ";    
                  }

                  if (!defined $hash{$key}{'1,0'}) {
                  $I10 = 1;
                  $c10++;
                  $string10 .= "+ X$lin,$key,1,0 ";
                  }
               }
               $sub12count++;
               $subcount++;
             }

             elsif ($vhash{$lin}{$key} eq '0,2') {
       #     print "0,2\n";
                if ((defined $hash{$key}{'0,0'}) and (defined $hash{$key}{'0,1'})) {
#                print "No help\n";
                } 
                else {

                   $stringforpair .=  "X$lin,$key,0,1 + X$lin,$key,0,0 = 1\n";

                   $stringforpair .=  "X$lin,$key,0,1 + Y$lin,$ii <= 1\n";
                   $stringforpair .=  "X$lin,$key,0,1 - Y$lin,$jj <= 0\n";

                   $stringforpair .=  "X$lin,$key,0,0 + Y$lin,$ii <= 1\n";
                   $stringforpair .=  "X$lin,$key,0,0 + Y$lin,$jj <= 1\n";
             
                  $bin{"X$lin,$key,0,0"} = $bin{"X$lin,$key,0,1"} = 1;

                   if (!defined $hash{$key}{'0,1'}) {
                   $I01 = 1;
                   $c01++;
                   $string01 .= "+ X$lin,$key,0,1 ";
                   }
                   if (!defined $hash{$key}{'0,0'}) {
                   $I00 = 1;
                   $c00++;
                   $string00 .= "+ X$lin,$key,0,0 ";
                   }
                   $sub02count++;
                   $subcount++;
                   }
                }

   #     print OUT "$lin, $key $vhash{$lin}{$key}\n";  
   #     print OUT "Another $key; $value, $count{$key}\n";
           }
         }

   #     print OUT "$key, $count{$key}\n";
         if ($I11 + $I10 + $I01 + $I00 + $count{$key}  == 4) {  # here we check whether an incompatibility is possible for that column pair.
         $useful++;

     #   print OUT "Column pair $ii, $jj is useful\n";
         print OUT "$stringforpair\n";
         $rstring = "";

         if ($c11 > 0) {                          # need to create inequalities that set D11 = 1 iff one of the X11 variables are
                                                  # set to 1. That is, the 11 deficiency is rectified.

                        print OUT "$string11 - $c11 D$key,1,1 <= 0\n";  # This says that if any of the X11 variables are set to 1, then
                                                              # D11 will be set to 1.
                        print OUT "$string11 - D$key,1,1 => 0\n";  # This says that D11 is set to 1 only if at least one of the 
                                                          # X11 variables are set to 1.
                        $rstring .= "+ D$key,1,1 ";
                        $bin{"D$key,1,1"} = 1;

         }               
         if ($c10 > 0) {
                        print OUT "$string10 - $c10 D$key,1,0 <= 0\n";  
                        print OUT "$string10 - D$key,1,0 => 0\n";  
                        $rstring .= "+ D$key,1,0 ";
                        $bin{"D$key,1,0"} = 1;
         }               
         if ($c01 > 0) {
                        print OUT "$string01 - $c01 D$key,0,1 <= 0\n";  
                        print OUT "$string01 - D$key,0,1 => 0\n";  
                        $rstring .= "+ D$key,0,1 ";
                        $bin{"D$key,0,1"} = 1;
         }               
         if ($c00 > 0) {
                        print OUT "$string00 - $c00 D$key,0,0 <= 0\n";  
                        print OUT "$string00 - D$key,0,0 => 0\n";  
                        $rstring .= "+ D$key,0,0 ";
                        $bin{"D$key,0,0"} = 1;
         }               

                        $rhs = 3 - $count{$key};
                        print OUT "$rstring - 4 LB$key => -$count{$key}\n";  # these inequalities set LB to 1 if and only if
                        print OUT "$rstring - LB$key <= $rhs\n";    # the Y values are set so that the column pair is
                                                                                # incompatible.
                        $bin{"LB$key"} = 1;

       foreach $kk ($ii..$jj-1) {
       print OUT "+ R$kk ";
       $bin{"R$kk"} = 1;
       $Rvar{"R$kk"} = 1;
       if (0 == (($kk - $ii) % 8)) {
       print OUT "\n";
       }
       }
       print OUT "-LB$key => 0\n";
                        
         }
#        else {
#            print OUT "Column pair $ii, $jj is not useful\n";
#        } 
      }
     }
  }


       $kk = 0;
       foreach $rvar (sort keys %Rvar) {
          print OUT1 "+ $rvar";
          $kk++;
          if (0 == (($kk - $ii) % 8)) {
          print OUT1 "\n";
       }
       }
 print OUT1 "\n st \n";
# print OUTM "\n st \n";

print OUT "binaries\n";
 foreach $index (sort keys %bin) {
  print OUT "$index\n";
}

 print OUT "end\n";

 print "The number of non-perfect pairs is $count\n";
 print "The potential number of subpairs is $paircount\n";
 print "The subpotential number of subpairs is $subcount\n";
 print "The number of useful pairs is $useful\n";
 print "The specific subcounts are $sub22count, $sub21count, $sub12count, $sub20count, $sub02count\n";

close (IN);
close (OUT);
close (OUT1);

system ("cat objective constraints> $lpoutfile");
