2005-08-06 15:08 

This is a patch for Interchange version 5.2.0.  When you
apply it, Interchange will:

  - generate HTML pages in Unicode UTF-8 encoding;

  - expect the template files to be in UTF-8 encoding;

  - store your data (like product names and descriptions) in
    UTF-8 in databases;

  - expect data from CSV database files, GDBM, DB_File and
    in-memory databases to be in UTF-8 encoding.

The patch is not complete.  It does not fix:

  - encoding for data coming from SQL (DBI, including
    MySQL) and LDAP databases -- I didn't try that, expect
    no problem;

  - it does not fix encoding for data coming from forms
    with method='POST' and ENCTYPE="multipart/form-data" --
    I tried to do that, but failed.

Also:

  - it is not well-tested;

  - it will probably break the existing internationalization
    schemes.

But it may be useful as an example, a starting point, that
shows how to procede if you want to finish it.


Ivan Kurmanov


diff -wcbr lib.orig/Vend/Config.pm lib/Vend/Config.pm
*** lib.orig/Vend/Config.pm	2005-07-06 00:09:31.000000000 +0300
--- lib/Vend/Config.pm	2005-08-06 15:02:19.298462760 +0300
***************
*** 359,364 ****
--- 359,365 ----
          ['Catalog',                      'catalog',              ''],
          ['SubCatalog',           'catalog',              ''],
          ['AutoVariable',         'autovar',              'UrlJoiner'],
+         ['HTMLCharset',          undef,                  'UTF-8' ],
  
          ];
          return $directives;
diff -wcbr lib.orig/Vend/Data.pm lib/Vend/Data.pm
*** lib.orig/Vend/Data.pm	2005-07-06 00:09:31.000000000 +0300
--- lib/Vend/Data.pm	2005-08-06 12:24:20.232499824 +0300
***************
*** 24,29 ****
--- 24,31 ----
  # MA  02111-1307  USA.
  
  package Vend::Data;
+ use open ':utf8', ':std';
+ 
  require Exporter;
  @ISA = qw(Exporter);
  @EXPORT = qw(
***************
*** 237,243 ****
      return '' unless defined $db->test_column($field_name);
  	$key = $db->foreign($key, $foreign) if $foreign;
      return '' unless $db->test_record($key);
!     return $db->field($key, $field_name);
  }
  
  sub database_row {
--- 239,247 ----
      return '' unless defined $db->test_column($field_name);
          $key = $db->foreign($key, $foreign) if $foreign;
      return '' unless $db->test_record($key);
!     
!     my $v = $db->field($key, $field_name);
!     return $v;
  }
  
  sub database_row {
diff -wcbr lib.orig/Vend/File.pm lib/Vend/File.pm
*** lib.orig/Vend/File.pm	2005-07-06 00:09:32.000000000 +0300
--- lib/Vend/File.pm	2005-07-07 09:09:04.000000000 +0300
***************
*** 46,51 ****
--- 46,53 ----
          writefile
  );
  
+ use open ":utf8";
+ 
  use strict;
  use Config;
  use Fcntl;
diff -wcbr lib.orig/Vend/Interpolate.pm lib/Vend/Interpolate.pm
*** lib.orig/Vend/Interpolate.pm	2005-07-06 00:09:32.000000000 +0300
--- lib/Vend/Interpolate.pm	2005-08-06 12:25:27.528269312 +0300
***************
*** 30,35 ****
--- 30,37 ----
  
  $VERSION = substr(q$Revision: 2.207 $, 10);
  
+ use open ":utf8";
+ 
  @EXPORT = qw (
  
  interpolate_html
***************
*** 149,157 ****
--- 151,162 ----
                                                          &encode_entities
                                                          &HTML
                                                          &interpolate_html
+                                                         &decode_utf8
                                                  /;
  }
  
+ use Encode qw(decode_utf8);
+ 
  use vars @Share_vars, @Share_routines,
                   qw/$ready_safe $safe_safe/;
  use vars qw/%Filter %Ship_handler $Safe_data/;
***************
*** 218,225 ****
  my %T;
  my %QR;
  
! my $All = '[\000-\377]*';
! my $Some = '[\000-\377]*?';
  my $Codere = '[-\w#/.]+';
  my $Coderex = '[-\w:#=/.%]+';
  my $Mandx = '\s+([-\w:#=/.%]+)';
--- 223,230 ----
  my %T;
  my %QR;
  
! my $All    = '[\S\s]*';   
! my $Some   = '[\S\s]*?'; 
  my $Codere = '[-\w#/.]+';
  my $Coderex = '[-\w:#=/.%]+';
  my $Mandx = '\s+([-\w:#=/.%]+)';
***************
*** 234,241 ****
  my $T    = '\]';
  my $D    = '[-_]';
  
! my $XAll = qr{[\000-\377]*};
! my $XSome = qr{[\000-\377]*?};
  my $XCodere = qr{[-\w#/.]+};
  my $XCoderex = qr{[-\w:#=/.%]+};
  my $XMandx = qr{\s+([-\w:#=/.%]+)};
--- 239,246 ----
  my $T    = '\]';
  my $D    = '[-_]';
  
! my $XAll    = qr{[\S\s]*};
! my $XSome   = qr{[\S\s]*?}; 
  my $XCodere = qr{[-\w#/.]+};
  my $XCoderex = qr{[-\w:#=/.%]+};
  my $XMandx = qr{\s+([-\w:#=/.%]+)};
diff -wcbr lib.orig/Vend/Search.pm lib/Vend/Search.pm
*** lib.orig/Vend/Search.pm	2005-07-06 00:09:31.000000000 +0300
--- lib/Vend/Search.pm	2005-07-07 09:20:59.000000000 +0300
***************
*** 21,26 ****
--- 21,27 ----
  # MA  02111-1307  USA.
  
  package Vend::Search;
+ use open ":utf8";
  
  $VERSION = substr(q$Revision: 2.22 $, 10);
  
diff -wcbr lib.orig/Vend/Server.pm lib/Vend/Server.pm
*** lib.orig/Vend/Server.pm	2005-07-06 00:09:31.000000000 +0300
--- lib/Vend/Server.pm	2005-08-06 12:28:20.524969816 +0300
***************
*** 25,30 ****
--- 25,32 ----
  
  package Vend::Server;
  
+ use open ':utf8', ':std';
+ 
  use vars qw($VERSION);
  $VERSION = substr(q$Revision: 2.50 $, 10);
  
***************
*** 284,289 ****
--- 286,292 ----
  # Doesn't do unhexify
  sub store_cgi_kv {
          my ($key, $value) = @_;
+         $value = Encode::decode_utf8( $value );
          $key = $::IV->{$key} if defined $::IV->{$key};
          if(defined $CGI::values{$key} and ! defined $::SV{$key}) {
                  $CGI::values{$key} = "$CGI::values{$key}\0$value";
***************
*** 345,350 ****
--- 348,354 ----
  #::logDebug("mapping  --> $key");
                  $value =~ tr/+/ /;
                  $value =~ s/%([0-9a-fA-F][0-9a-fA-F])/chr(hex $1)/ge;
+                 $value = Encode::decode_utf8( $value );
                  # Handle multiple keys
                  if(defined $CGI::values{$key} and ! defined $::SV{$key}) {
                          $CGI::values{$key} = "$CGI::values{$key}\0$value";
***************
*** 484,489 ****
--- 488,496 ----
          # $body is now a reference
      my ($s, $body) = @_;
  #show_times("begin response send") if $Global::ShowTimes;
+     my $charset = $Global::HTMLCharset;
+     if ( $charset ) { $charset = "; charset=$charset"; }
+ 
          my $status;
          return if $Vend::Sent;
          if($Vend::StatusLine) {
***************
*** 494,500 ****
  
  	if($CGI::redirect_status and ! $Vend::StatusLine) {
  		$status = "200 OK";
! 		$Vend::StatusLine = "Status: 200 OK\nContent-Type: text/html";
  	}
  
  	$$body =~ s/^\s+//
--- 501,507 ----
  
          if($CGI::redirect_status and ! $Vend::StatusLine) {
                  $status = "200 OK";
!                 $Vend::StatusLine = "Status: 200 OK\nContent-Type: text/html$charset";
          }
  
          $$body =~ s/^\s+//
***************
*** 502,508 ****
  
  	if(! $s and $Vend::StatusLine) {
  		$Vend::StatusLine .= ($Vend::StatusLine =~ /^Content-Type:/im)
! 							? '' : "\r\nContent-Type: text/html\r\n";
  # TRACK
          $Vend::StatusLine .= "X-Track: " . $Vend::Track->header() . "\r\n"
  			if $Vend::Track;
--- 509,516 ----
  
          if(! $s and $Vend::StatusLine) {
                  $Vend::StatusLine .= ($Vend::StatusLine =~ /^Content-Type:/im)
!                                                         ? '' 
! : "\r\nContent-Type: text/html$charset\r\n";
  # TRACK
          $Vend::StatusLine .= "X-Track: " . $Vend::Track->header() . "\r\n"
                          if $Vend::Track;
***************
*** 611,617 ****
  		print $fh canon_status($Vend::StatusLine);
  	}
  	elsif(! $Vend::ResponseMade) {        
! 		print $fh canon_status("Content-Type: text/html");
  # TRACK        
          print $fh canon_status("X-Track: " . $Vend::Track->header())
  			if $Vend::Track;
--- 619,625 ----
                  print $fh canon_status($Vend::StatusLine);
          }
          elsif(! $Vend::ResponseMade) {        
!                 print $fh canon_status("Content-Type: text/html$charset");
  # TRACK        
          print $fh canon_status("X-Track: " . $Vend::Track->header())
                          if $Vend::Track;
***************
*** 821,826 ****
--- 829,835 ----
    
    for (;;) {
      $block = _find( \$in, "\n" );
+ 
      if (($n) = ($block =~ m/^env (\d+)$/)) {
        foreach $i (0 .. $n - 1) {
          $e = _string(\$in);
diff -wcbr lib.orig/Vend/Table/Common.pm lib/Vend/Table/Common.pm
*** lib.orig/Vend/Table/Common.pm	2005-07-06 00:09:32.000000000 +0300
--- lib/Vend/Table/Common.pm	2005-08-06 12:37:45.966009712 +0300
***************
*** 303,309 ****
  	$s = $s->import_db() if ! defined $s->[$TIE_HASH];
  	return undef unless $s->record_exists($key);
  	my %row;
!     @row{ @{$s->[$COLUMN_NAMES]} } = $s->row($key);
  	return \%row;
  }
  
--- 303,311 ----
      $s = $s->import_db() if ! defined $s->[$TIE_HASH];
      return undef unless $s->record_exists($key);
      my %row;
! 
!     my @v = $s->row($key);
!     @row{ @{$s->[$COLUMN_NAMES]} } = @v;
      return \%row;
  }
  
***************
*** 311,316 ****
--- 313,324 ----
      my ($s, $key) = @_;
      $s->lock_table() if $s->[$CONFIG]{IC_LOCKING};
      my $line = $s->[$TIE_HASH]{"k$key"};
+     
+     # this may work and may not:
+     my $d = Encode::decode_utf8( $line ); 
+     # if it did not work, we restore the previous value:
+     if ( $d ) { $line = $d; }
+    
      $s->unlock_table() if $s->[$CONFIG]{IC_LOCKING};
      die $s->log_error(
                        "There is no row with index '%s' in database %s",
***************
*** 525,531 ****
  sub field {
      my ($s, $key, $column) = @_;
  	$s = $s->import_db() if ! defined $s->[$TIE_HASH];
!     return ($s->row($key))[$s->column_index($column)];
  }
  
  sub set_field {
--- 533,539 ----
  sub field {
      my ($s, $key, $column) = @_;
          $s = $s->import_db() if ! defined $s->[$TIE_HASH];
!     return (($s->row($key))[$s->column_index($column)] );
  }
  
  sub set_field {
***************
*** 686,700 ****
  			return ();
  		}
  		$key =~ s/^k// or next;
  		if($restrict) {
- 			my (@row) = $s->row($key);
  #::logDebug("each_nokey: rfield='$row[$rfield]' eq '$rsession' ??") if defined $rfield;
  #::logDebug("each_nokey: hfield='$row[$hfield]'") if defined $hfield;
  			next if defined $hfield and $row[$hfield];
  			next if defined $rfield and $row[$rfield] ne $rsession;
  			return \@row;
  		}
! 		return [ $s->row($key) ];
      }
  }
  
--- 694,709 ----
                          return ();
                  }
                  $key =~ s/^k// or next;
+ 
+                 my @row = $s->row($key);
                  if($restrict) {
  #::logDebug("each_nokey: rfield='$row[$rfield]' eq '$rsession' ??") if defined $rfield;
  #::logDebug("each_nokey: hfield='$row[$hfield]'") if defined $hfield;
                          next if defined $hfield and $row[$hfield];
                          next if defined $rfield and $row[$rfield] ne $rsession;
                          return \@row;
                  }
!                 return [ @row ];
      }
  }
  
diff -wcbr lib.orig/Vend/Table/InMemory.pm lib/Vend/Table/InMemory.pm
*** lib.orig/Vend/Table/InMemory.pm	2005-07-06 00:09:32.000000000 +0300
--- lib/Vend/Table/InMemory.pm	2005-07-08 10:55:56.000000000 +0300
***************
*** 111,116 ****
--- 111,119 ----
                      $key,
                      $s->[$FILENAME],
                     ) unless defined $a;
+   foreach ( @$a ) {
+     $_ = Encode::decode_utf8( $_ );
+   }
    return @$a;
  }
  
***************
*** 119,124 ****
--- 122,130 ----
    my $a = $s->[$TIE_HASH]{$key}
      or return undef;
  #::logDebug("here is row $key: " . ::uneval($a));
+   foreach ( @$a ) {
+     $_ = Encode::decode_utf8( $_ );
+   }
    my %row;
    @row{ @{$s->[$COLUMN_NAMES]} } = @$a;
    return \%row;
diff -wcbr lib.orig/Vend/Table/Shadow.pm lib/Vend/Table/Shadow.pm
*** lib.orig/Vend/Table/Shadow.pm	2005-07-06 00:09:32.000000000 +0300
--- lib/Vend/Table/Shadow.pm	2005-07-08 11:48:54.000000000 +0300
***************
*** 358,363 ****
--- 358,364 ----
                          for $row (@$result) {
                                  for $pos (@map_matches) {
                                          ($name, $map_entry) = @{$map_entries[$pos]};
+                                         ### XXX decode_utf8
                                          $row->[$pos] = $s->_map_column($row->[$keypos], $name, 1, $row->[$pos], $map_entry);
                                  }
                          }
diff -wcbr lib.orig/Vend/TextSearch.pm lib/Vend/TextSearch.pm
*** lib.orig/Vend/TextSearch.pm	2005-07-06 00:09:31.000000000 +0300
--- lib/Vend/TextSearch.pm	2005-07-07 09:20:48.000000000 +0300
***************
*** 23,28 ****
--- 23,31 ----
  # MA  02111-1307  USA.
  
  package Vend::TextSearch;
+ 
+ use open ":utf8";
+ 
  require Vend::Search;
  require Exporter;
  

