# October 4, 2008 modify to allow missing data - only build the graph from known data.
# July 31, 2008 modify to print out the partition intersection graph with the char,state labels in the
# same order as when the nodes are given integers. 
#

#input is a character-state data matrix and output is an adjacenty structure for the partition-
#intersection graph of the data, and a translation table.
open (DATA, $ARGV[0]); # input of the character state data matrix
open (GRAPH, ">$ARGV[1]"); #output of the adjacency lists for the graph with node number labels
open (GRAPH1, ">p$ARGV[1]"); #output of the adjacency lists for the graph with char-state node labels
open (TRANS, ">$ARGV[2]");# output of the translation table, translating between node numbers
                          #for the graph and the data matrix.

$ext = <DATA>;
$line = <DATA>;
($n, $m) = $line =~ /(\d+) +(\d+)/;
print "$n, $m\n";

$nodenumber = 1;
while ($line = <DATA>) {
   chomp $line;
   @dataline = split (/ /, $line);
     for ($i = 0; $i < $m-1; $i++) {
       for ($j = $i+1; $j < $m; $j++) {
#	 if (($dataline[$i] ne "?") && ($dataline[$j] ne "?")) 
	 if (($dataline[$i] ne '?') && ($dataline[$j] ne '?')) 
	 {

         $ip1 = $i+1;
         $jp1 = $j+1;
         $keyi = "($ip1,$dataline[$i])";
         $keyj = "($jp1,$dataline[$j])";

         $edges{$keyi}{$keyj} = 1;
         $edges{$keyj}{$keyi} = 1;
         if (! defined $nodes{$keyi}) {
                $keycode{$nodenumber} = $keyi;
                $nodes{$keyi} = $nodenumber;
#                print "Node number set $nodes{$keyi}\n";
		print TRANS "$nodes{$keyi} $keyi\n";
#                print TRANS "$keycode{$nodenumber} $nodenumber\n";
                $nodenumber++;
         }   
         if (! defined $nodes{$keyj}) {
                $keycode{$nodenumber} = $keyj;
                $nodes{$keyj} = $nodenumber;
		print TRANS "$nodes{$keyj} $keyj\n";
#                print TRANS "$keycode{$nodenumber} $nodenumber\n";
                $nodenumber++;
         }   


         $nedges {$nodes{$keyi}}  {$nodes{$keyj}} = 1;
#     print "nedges set: $nodes{$keyi},  $nodes{$keyj}\n";
         $nedges {$nodes{$keyj}}  {$nodes{$keyi}} = 1;
#     print "nedges set: $nodes{$keyj},  $nodes{$keyi}\n";
       } # matching bracket for 'if ne ?'
      } 
     } 
}


# print out the translation table indexed by char-state labels
print TRANS "\n";
foreach $key (sort keys %nodes) {
  print TRANS "$key $nodes{$key}\n";
}

# output the graph with node numbers and also with char-state labels in the same order
$edgecount = 0;
foreach $node1 (sort {$a <=> $b} keys %nedges) {
  print GRAPH "$node1: ";
  print GRAPH1 "$keycode{$node1}: ";
   @neighbors = (sort {$a <=> $b} keys %{$nedges{$node1}});
    $first = shift(@neighbors);  # I don't know why it is done this way.
       print GRAPH "$first";
       print GRAPH1 "$keycode{$first}";
	 $edgecount++;
         foreach $node2  (@neighbors) {
           print GRAPH ", $node2"; 
           print GRAPH1 ", $keycode{$node2}"; 
	   $edgecount++;
         }
    print GRAPH "\n";
    print GRAPH1 "\n";
}
    $edgecount = $edgecount/2;
    $nodenumber-- ;
    $permitededges = ($nodenumber - ($m/2)) * ($m-1);
    print "nodecount $nodenumber edgecount $edgecount max permited edges  $permitededges\n";
    if ($edgecount > $permitededges) {
      print "No PP because the edge count is too high\n";
   }
