12-02-04 01:46 AM
Just thought I'd send this for the record to the PERL mysql list.
Here's a brief summary of what I needed to do to get unicode working
happily with Perls DBI DBD::MySQL and MySQL.
I'm using PERL 5.8.1 and mysql 4.1.7
Web pages are encoded as utf-8 and put together using cgi.pm
The following 3 things are used at the start of the PERL script:
use utf8; #tells PERL that I will be using utf8 in the script itself.
use Encode qw/is_utf8 decode/; #allows me to decode utf8 strings and test th
em
binmode(STDOUT, ":utf8"); #prints characters in utf8
The web page is started with utf8 encoding using cgi:
print start_html(-encoding => 'utf-8',-title => "Simple UTF 8 test");
I used cgi to pass utf data to the next page and then decoded it into
a variable:
my $univalue = decode("utf8", param("param_name"));
I have a table in mysql whose charset is utf8,
Here's the bit that's not mentioned much:
When opening the connection to the DB using DBI I needed to tell it
that I wanted the connection to use utf8.
So I connected as usual:
my $dsn = "dbi:mysql:database=$dbname;host=$dbhost";
my $dbh = DBI->connect($dsn, $dbun, $dbpw);
and then to make the conenction utf8:
my $sql = qq{SET NAMES 'utf8';};
$dbh->do($sql);
I then insert it into the database the normal way:
INSERT INTO unitest (id, aword) VALUES ( "$id", "$unicode" );
And retrieve the normal way:
select aword from unitest where id = "$id";
Finally to tell PERL the string back from the db ($char) is unicode:
decode("utf8", $char);
That's it through cgi to the db and back.
below is the whole script in case it helps anyone in the future
looking through the archive:
Script specified 5.8.1 but should work with any 5.8.x version:
HTH someone someday.
Angie
#!/usr/bin/perl
#
# Author Angie Ahl (angie@vertebrate.co.uk) : 2004/12/01 22:14:00
#
# script to test unicode, standard cgi mode.
# requires PERL 5.8, DBI, DBD::MySQL, CGI and Encode
use strict;
use warnings;
use 5.008001;
use utf8;
use DBI;
use CGI (':standard');
use Encode qw/is_utf8 decode/;
binmode(STDOUT, ":utf8");
BEGIN {
print header(-type => "text/html", -charset => "utf-8");
print start_html(
# This seems to be no important?
-encoding => 'utf-8',
-title => "Simple UTF 8 test");
print "\n\n";
}
END {
print "\n\n", end_html;
}
my $dbh = &opendb || die "Could not open DB";
# page calls
unless (param('VertDo')) {
&startpage;
}
if (param('VertDo') eq 'test unicode') {
&testpage;
}
sub startpage {
my %uni = &unihash;
print "\n";
print start_form;
print h1("Unicode test: Page 1");
print '<table border="1" cellpadding="5">';
my $tablinedef = "<tr>" . "<td>%s</td>" x 3 . "</td>\n";
# Headerline
printf $tablinedef,
"description","language","char in unicode";
foreach (sort keys %uni) {
printf $tablinedef,
$_,
$uni{$_}->{language},
textfield(-name=>$_, -default=>$uni{$_}->{character}, -size=>50,
-maxlength=>80);
}
print '</table>';
print hidden(-name=>'VertDo',
-default=>['test unicode']);
print submit(-name=>'Do',
-value=>'test unicode');
print endform;
}
sub testpage {
my %uni = &unihash;
print h1("Unicode test: Page 2: Form results");
print '<table border="1" cellpadding="5">';
my $tablinedef = "<tr>" . "<td>%s</td>" x 4 . "</td>\n";
printf $tablinedef,
"description","language","char in unicode before form", "char
returned through form";
foreach (sort keys %uni) {
my $univalue = decode("utf8", param($_));
printf $tablinedef,
$_,
$uni{$_}->{language},
$uni{$_}->{character},
$univalue;
}
print '</table>';
print h2("Unicode test: From Form to DB and back again");
&drop_tables;
&create_tables;
&cgi_unitable;
print h2("Unicode test: mysql settings");
&get_mysql_charsets_inuse;
}
# subroutines
sub opendb {
# EDIT DB config HERE:
my $dbname = "cms_dev";
my $dbhost = "localhost";
my $dbun = "vertcms";
my $dbpw = "trilogy5parts";
my $dsn = "dbi:mysql:database=$dbname;host=$dbhost";
my $dbh = DBI->connect($dsn, $dbun, $dbpw) ||
eval {
print "Could not connect to database<br/>\n";
exit;
};
my $sql = qq{SET NAMES 'utf8';};
$dbh->do($sql);
return $dbh;
}
sub unihash {
my %uni = (
hebrew_alef => {
character => chr(0x05d0),
language => "hebrew",
},
smiley => {
character => "\x{263a}",
language => "none",
},
zcaron => {
character => "\x{017d}",
language => "czech",
},
aogonec => {
character => "\x{0104}",
language => "polish",
},
lslash => {
character => "\x{0142}",
language => "polish",
},
recenu => {
character => "re\x{e7}enu",
language => "french",
},
citroen => {
character => "citro\x{eb}n",
language => "french",
},
abimer => {
character => "ab\x{ee}mer",
language => "french",
},
disco => {
character => "discoth\x{e8}que",
language => "french",
},
angies => {
character =>
"\x{0142}\x{e7}\x{263a}\x{0104}\x{263a}\x{0104
}re\x{e7}enu\x{263a}",
language => "mixed",
},
);
return %uni;
}
sub cgi_unitable {
my %uni = &unihash;
print '<table border="1">';
my $tablinedef = "<tr>" . "<td>%s</td>" x 5 . "</td>\n";
# Headerline
printf $tablinedef,
"description","language","char-original",
"char-back-from-db<br/>should be the same!","is-it-utf8?";
foreach (sort keys %uni) {
my $univalue = decode("utf8", param($_));
&dbpush( $_, $univalue );
printf $tablinedef,
$_,
$uni{$_}->{language},
$uni{$_}->{character},
&dbget( $_ ),
is_utf8( &dbget($_)) ? "is unicode" : "is not unicode";
}
print '</table>';
}
sub get_mysql_charsets_inuse {
my $sql = qq{SHOW VARIABLES LIKE "character%";};
my $sth = $dbh->prepare($sql);
my $printer = "";
$printer .= h2("MySQL Characters in use for this script:");
$printer .= qq{<table border="1">};
$sth->execute;
while (my @ary = $sth->fetchrow_array ()) {
$printer .= "<tr>";
for (my $i = 0; $i < @ary; $i++) {
$printer .= "<td>" . $ary[$i] . "</td>";
}
$printer .= "</tr>";
}
$sth->finish;
$printer .= "</table><br /><br />";
print $printer;
}
sub drop_tables {
my $sql;
print "dropping tables<br/>";
$sql = qq{ drop table unitest };
$dbh->do($sql) || print $dbh->{'mysql_error'};
}
sub create_tables {
my $sql;
print "creating tables<br/>";
$sql = qq{
CREATE TABLE unitest (
id varchar(30) not null,
aword varchar(80) default NULL
) TYPE=MyISAM CHARSET=utf8
};
$dbh->do($sql) || print $dbh->{'mysql_error'};
}
sub dbpush {
my ($id,$ucode) = @_;
my $sql = qq {INSERT INTO unitest (id, aword) VALUES ( "$id",
"$ucode" )};
$dbh->do($sql) || print $dbh->{'mysql_error'};
}
sub dbget {
my $id = shift;
my $sql = "select aword from unitest where id = \"$id\"";
my $sth = $dbh->prepare($sql);
$sth->execute;
my $char = ($sth->fetchrow)[0];
$sth->finish;
# return $char;
# I get back some bytes although mysql thinks it's unicode
# I have to set that flag in PERL too, so that PERL knows.
# Without that I would get 1, 2 or 3 single chars per letter
return decode("utf8", $char);
}
# not called but for testing input of hash directly to DB
sub plain_unitable {
my %uni = &unihash;
print '<table border="1">';
my $tablinedef = "<tr>" . "<td>%s</td>" x 5 . "</td>\n";
# Headerline
printf $tablinedef,
"description","language","char-original",
"char-back-from-db<br/>should be the same!","is-it-utf8?";
foreach (sort keys %uni) {
&dbpush( $_, $uni{$_}->{character} );
printf $tablinedef,
$_,
$uni{$_}->{language},
$uni{$_}->{character},
&dbget( $_ ),
is_utf8( &dbget($_)) ? "is unicode" : "is not unicode";
}
print '</table>';
}
--
MySQL PERL Mailing List
For list archives: http://lists.mysql.com/perl
To unsubscribe: http://lists.mysql.com/perl?unsub=m...
ie.nctu.edu.tw
[ Post a follow-up to this message ]
|