
BEGIN {
  print "<html>";
  print "<head>";
  print "<meta name=\"generator\" content=\"gzb-ascii-to-unicode.htm\">";
  print "<style> BODY { font-family: \"Lucida Sans Unicode\", sans-serif } </style>"
  print "<body>";

  # pseudo-enum
  p = 1;
  pre = 2;
  table = 3;
  hr = 4;
  h2 = 5;

  deflist = 1;
  interlinear = 2;
  first = 1;    # maybe treat first para as H1..... to do........
}

/^ *$/ {
  if ( in_para ) {
    if ( tag == p ) {
      print "</p>";
    } else if ( tag == table ) {
      print "</table>";
    } else if ( tag == h2 ) {
      print "</h2>";
    }
  }
    last_para_type = tag;
  in_para = 0;
}

! /^ *$/ {
  if ( 0 == in_para ) {
    in_para = 1;


    # Tabs hint we need a table.
    if ( $0 ~ /\t+/ ) { 
      print "<table cellspacing=\"5\" border=\"1\">";
      tabletype = deflist;
      tag = table;
    # three or more consecutive spaces hint we need a table (for interlinear glosses)
    } else if ( $0 ~ /   / ) { 
      print "<table cellspacing=\"5\">";
      tag = table;
      tabletype = interlinear;
    } else if ( $0 ~ /----+/ ) { 
      print "<hr>";
      tag = hr;
    } else {
      if ( last_para_type == hr ) {
	print "<h2>";
	tag = h2;
      } else {
	print "<p>";
	tag = p;
      }
    }
  }
  gsub ( "\&", "\\&amp;" );
  gsub ( "\"", "\\&quot;" );
  gsub ( "<", "\\&lt;" );
  gsub ( ">", "\\&gt;" );

  # this wants more sophistication to work across multiple lines
#  if ( sub ( "_", "<cite>" ) ) {
#     sub ( "_", "</cite>" )
#  }
}

{
  if ( in_para && tag == table ) {
    printf "<tr>";
    if ( tabletype == interlinear ) {
      for ( i = 1; i <= NF; i++ ) {
	printf "<td> " $i " </td> ";
      }
    } else {
      gsub( /\t+ */, "<td>" );
      line = $0;
      if ( $0 !~ /^\<td/ ) {
	line = "<td>" $0;
      }
      printf line;
    } 
      
    printf " </tr>\n";
  } else if ( tag == hr ) {
    gsub( /----+/, "" );
    print;
  } else {
    print;
  }
}

END {
#  print "</pre>"
  print "</body></html>"
}

