Skip to content

Commit

Permalink
Updated UTF8-handling for GDBM files
Browse files Browse the repository at this point in the history
Remove the existing cruft related to GDBM_ENABLE_UTF8, which
appears never to have worked entirely correctly.

In the meantime, tie this explicitly to whether UTF8 is enabled in the
catalog only and don't configure separately.

Also make sure :utf8 is set for data files read in and out.

Make sure we respect the MINIVEND_DISABLE_UTF8 environment variable.
  • Loading branch information
David Christensen committed Sep 22, 2016
1 parent e81794d commit 25f07cb
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 33 deletions.
5 changes: 4 additions & 1 deletion lib/Vend/Data.pm
Expand Up @@ -1248,7 +1248,10 @@ sub export_database {
or die "Couldn't exclusive lock $file: $!\n";
open(EXPORT, "+>$file") or
die "Couldn't write $file: $!\n";


# we should be outputting as UTF8 if we're so configured
binmode(\*EXPORT, ':utf8') if $::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8};

#::logDebug("EXPORT_SORT=" . $db->config('EXPORT_SORT'));
if($opt->{sort} ||= $db->config('EXPORT_SORT')) {
#::logDebug("Found EXPORT_SORT=$opt->{sort}");
Expand Down
26 changes: 22 additions & 4 deletions lib/Vend/Table/Common.pm
@@ -1,8 +1,6 @@
# Vend::Table::Common - Common access methods for Interchange databases
#
# $Id: Common.pm,v 2.51 2008-05-26 02:30:04 markj Exp $
#
# Copyright (C) 2002-2008 Interchange Development Group
# Copyright (C) 2002-2016 Interchange Development Group
# Copyright (C) 1996-2002 Red Hat, Inc.
#
# This program was originally based on Vend 0.2 and 0.3
Expand All @@ -23,7 +21,7 @@
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301 USA.

$VERSION = substr(q$Revision: 2.51 $, 10);
$VERSION = '2.52';
use strict;

package Vend::Table::Common;
Expand All @@ -34,6 +32,19 @@ no warnings qw(uninitialized numeric);
use Symbol;
use Vend::Util;

our $Has_Encode = 0;

if ($ENV{MINIVEND_DISABLE_UTF8}) {
# stub routines to pass-thru data if disabled
*encode_utf8 = sub {@_};
*decode_utf8 = sub {@_};
}
else {
require Encode;
import Encode qw( encode_utf8 decode_utf8 );
$Has_Encode = 1;
}

use Exporter;
use vars qw($Storable $VERSION @EXPORT @EXPORT_OK);
@EXPORT = qw(create_columns import_ascii_delimited import_csv config columns);
Expand Down Expand Up @@ -164,13 +175,17 @@ sub unlock_table {

sub stuff {
my ($val) = @_;
$val = encode_utf8($val)
if $Has_Encode && ($::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8});
$val =~ s,([\t\%]),$Hex_string[ord($1)],eg;
return $val;
}

sub unstuff {
my ($val) = @_;
$val =~ s,%(..),chr(hex($1)),eg;
$val = decode_utf8($val)
if $Has_Encode && ($::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8});
return $val;
}

Expand Down Expand Up @@ -1080,6 +1095,9 @@ sub import_ascii_delimited {

new_filehandle(\*IN);

# we should be inputting as UTF8 if we're so configured
binmode(\*IN, ':utf8') if $::Variable->{MV_UTF8} || $Global::Variable->{MV_UTF8};

my $field_hash;
my $para_sep;
my $codere = '[\w-_#/.]+';
Expand Down
29 changes: 1 addition & 28 deletions lib/Vend/Table/GDBM.pm
Expand Up @@ -27,17 +27,8 @@ use vars qw($VERSION @ISA);
use GDBM_File;
use Vend::Table::Common;

if ($ENV{MINIVEND_DISABLE_UTF8}) {
sub encode($$;$){}
sub decode($$;$){}
}
else {
require Encode;
import Encode qw( decode encode );
}

@ISA = qw(Vend::Table::Common);
$VERSION = '2.21';
$VERSION = '2.22';

sub new {
my ($class, $obj) = @_;
Expand Down Expand Up @@ -120,8 +111,6 @@ sub open_table {
die ::errmsg("%s could not tie to '%s': %s", 'GDBM', $filename, $!)
unless $dbm;

apply_utf8_filters($dbm) if $config->{GDBM_ENABLE_UTF8};

my $columns = [split(/\t/, $tie->{'c'})];

$config->{VERBATIM_FIELDS} = 1 unless defined $config->{VERBATIM_FIELDS};
Expand All @@ -140,22 +129,6 @@ sub open_table {
bless $s, $class;
}

sub apply_utf8_filters {
my ($handle) = shift;

#::logDebug("applying UTF-8 filters to GDBM handle");

my $out_filter = sub { $_ = encode('utf8', $_) };
my $in_filter = sub { $_ = decode('utf8', $_) };

$handle->filter_store_key($out_filter);
$handle->filter_store_value($out_filter);
$handle->filter_fetch_key($in_filter);
$handle->filter_fetch_value($in_filter);

return $handle;
}

# Unfortunate hack need for Safe searches
*column_index = \&Vend::Table::Common::column_index;
*column_exists = \&Vend::Table::Common::column_exists;
Expand Down

0 comments on commit 25f07cb

Please sign in to comment.