1 # Vend::CharSet - utility methods for handling character encoding
3 # $Id: CharSet.pm,v 2.11 2009-03-22 19:32:31 mheins Exp $
5 # Copyright (C) 2008 Interchange Development Group
6 # Copyright (C) 2008 Sonny Cook <sonny@endpoint.com>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public
19 # License along with this program; if not, write to the Free
20 # Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
23 package Vend::CharSet;
25 @ISA = qw( Exporter );
35 use utf8; eval "\$\343\201\257 = 42"; # attempt to automatically load the utf8 libraries.
36 require "utf8_heavy.pl";
38 unless( $ENV{MINIVEND_DISABLE_UTF8} ) {
40 import Encode qw( decode is_utf8 find_encoding );
43 sub decode_urlencode {
44 my ($octets, $encoding) = (@_);
46 #::logDebug("decode_urlencode--octets: $octets, encoding: $encoding");
49 $$octets =~ s/%([0-9a-fA-F][0-9a-fA-F])/chr(hex $1)/ge;
51 return $octets unless $encoding and $Global::UTF8 and validate_encoding($encoding);
53 to_internal($encoding, $octets);
55 #::logDebug("decoded string: " . display_chars($string)) if $string;
60 my ($encoding, $octets) = @_;
62 #::logDebug("to_internal - no encoding specified"),
63 return $octets unless $encoding;
64 #::logDebug("to_internal - octets are already UTF-8 flagged"),
65 return $octets if is_utf8($octets);
67 #::logDebug("to_internal - converting octets from $encoding to internal");
68 $$octets = eval { decode($encoding, $$octets, Encode::FB_CROAK()) };
70 ::logError("Unable to properly decode <%s> with encoding %s: %s", display_chars($octets), $encoding, $@);
76 # returns a true value (the normalized name of the encoding) if the
77 # specified encoding is recognized by Encode.pm, otherwise return
79 sub validate_encoding {
81 my $enc = find_encoding($encoding);
84 return $enc->can('mime_name') ? $enc->mime_name : mime_name($enc->name);
87 # fallback routine to provide a pretty-style mime_name in versions of
88 # Encode which predate the actual method. The main use would be to
89 # normalize "utf8-strict" to "utf8", but there are other cases where
90 # this can/will come in handy.
92 my $encoding_name = shift;
94 $encoding_name =~ s/-strict//i;
95 return lc $encoding_name;
99 my $c = $Global::Selector{$CGI::script_name};
100 return $c->{Variable}{MV_HTTP_CHARSET} || $Global::Variable->{MV_HTTP_CHARSET};
103 # this sub taken from the perluniintro man page, for diagnostic purposes
108 $_ > 255 ? # if wide character...
109 sprintf("\\x{%04X}", $_) : # \x{...}
110 chr($_) =~ /[[:cntrl:]]/ ? # else if control character ...
111 sprintf("\\x%02X", $_) : # \x..
112 quotemeta(chr($_)) # else quoted or as themselves
113 } unpack("U*", $_[0])); # unpack Unicode characters