Removed szlig conversion. Too dangerous as \341 is a valid character in

latin-1, too.
This commit is contained in:
hjp 2006-03-08 18:08:52 +00:00
parent 3f46eea7cc
commit 0e56ed880b
2 changed files with 77 additions and 4 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/perl -w
#
# $Id: sanitize_umlauts,v 1.1 2002-10-27 12:28:59 hjp Exp $
# $Id: sanitize_umlauts,v 1.2 2006-03-08 18:08:52 hjp Exp $
#
use strict;
@ -9,9 +9,9 @@ use File::Find;
sub wanted {
if (/[\204\224\201\216\231\232\341\202]/) {
if (/[\204\224\201\216\231\232\202]/) {
my $new = $_;
$new =~ tr/\204\224\201\216\231\232\341\202/äöüÄÖÜßé/;
$new =~ tr/\204\224\201\216\231\232\202/äöüÄÖÜé/;
print $File::Find::dir, ": $_ -> $new\n";
rename $_, $new or die "cannot rename $_ to $new: $!";
}
@ -26,6 +26,10 @@ print "\n\n";
# $Log: sanitize_umlauts,v $
# Revision 1.1 2002-10-27 12:28:59 hjp
# Revision 1.2 2006-03-08 18:08:52 hjp
# Removed szlig conversion. Too dangerous as \341 is a valid character in
# latin-1, too.
#
# Revision 1.1 2002/10/27 12:28:59 hjp
# *** empty log message ***
#

View File

@ -0,0 +1,69 @@
#!/usr/bin/perl -w
#
# $Id: sanitize_umlauts_utf8,v 1.1 2006-03-08 18:08:52 hjp Exp $
#
use strict;
use File::Find;
use Encode;
binmode STDOUT, ":raw";
my %ex = (
"\204" => "ä",
"\224" => "ö",
"\201" => "ü",
"\216" => "Ä",
"\231" => "Ö",
"\232" => "Ü",
"\202" => "é",
);
sub wanted {
eval {
decode("utf-8", $_, 1);
};
if ($@) {
my $new = "";
for (split(//, $_)) {
if (ord ($_) >= 0x0021 && ord($_) <= 0x007E) {
$new .= $_;
} else {
if (defined $ex{$_}) {
$new .= $ex{$_};
} else {
$new .= $_;
}
}
}
$new = encode("utf-8", $new);
print $File::Find::dir, ": $_ -> $new\n";
rename $_, $new or die "cannot rename $_ to $new: $!";
}
# if (/[\204\224\201\216\231\232\341\202]/) {
# my $new = $_;
# $new =~ tr/\204\224\201\216\231\232\341\202/äöüÄÖÜßé/;
# }
}
if (@ARGV == 0) { push (@ARGV, "."); }
finddepth(\&wanted, @ARGV);
print "\n\n";
# $Log: sanitize_umlauts_utf8,v $
# Revision 1.1 2006-03-08 18:08:52 hjp
# Removed szlig conversion. Too dangerous as \341 is a valid character in
# latin-1, too.
#
# Revision 1.1 2002/10/27 12:28:59 hjp
# *** empty log message ***
#
# vim:sw=4 expandtab