Removed szlig conversion. Too dangerous as \341 is a valid character in
latin-1, too.
This commit is contained in:
parent
3f46eea7cc
commit
0e56ed880b
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# $Id: sanitize_umlauts,v 1.1 2002-10-27 12:28:59 hjp Exp $
|
||||
# $Id: sanitize_umlauts,v 1.2 2006-03-08 18:08:52 hjp Exp $
|
||||
#
|
||||
|
||||
use strict;
|
||||
|
@ -9,9 +9,9 @@ use File::Find;
|
|||
|
||||
sub wanted {
|
||||
|
||||
if (/[\204\224\201\216\231\232\341\202]/) {
|
||||
if (/[\204\224\201\216\231\232\202]/) {
|
||||
my $new = $_;
|
||||
$new =~ tr/\204\224\201\216\231\232\341\202/äöüÄÖÜßé/;
|
||||
$new =~ tr/\204\224\201\216\231\232\202/äöüÄÖÜé/;
|
||||
print $File::Find::dir, ": $_ -> $new\n";
|
||||
rename $_, $new or die "cannot rename $_ to $new: $!";
|
||||
}
|
||||
|
@ -26,6 +26,10 @@ print "\n\n";
|
|||
|
||||
|
||||
# $Log: sanitize_umlauts,v $
|
||||
# Revision 1.1 2002-10-27 12:28:59 hjp
|
||||
# Revision 1.2 2006-03-08 18:08:52 hjp
|
||||
# Removed szlig conversion. Too dangerous as \341 is a valid character in
|
||||
# latin-1, too.
|
||||
#
|
||||
# Revision 1.1 2002/10/27 12:28:59 hjp
|
||||
# *** empty log message ***
|
||||
#
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
#!/usr/bin/perl -w
|
||||
#
|
||||
# $Id: sanitize_umlauts_utf8,v 1.1 2006-03-08 18:08:52 hjp Exp $
|
||||
#
|
||||
|
||||
use strict;
|
||||
use File::Find;
|
||||
use Encode;
|
||||
|
||||
binmode STDOUT, ":raw";
|
||||
|
||||
my %ex = (
|
||||
"\204" => "ä",
|
||||
"\224" => "ö",
|
||||
"\201" => "ü",
|
||||
"\216" => "Ä",
|
||||
"\231" => "Ö",
|
||||
"\232" => "Ü",
|
||||
"\202" => "é",
|
||||
);
|
||||
|
||||
sub wanted {
|
||||
|
||||
eval {
|
||||
decode("utf-8", $_, 1);
|
||||
};
|
||||
if ($@) {
|
||||
my $new = "";
|
||||
for (split(//, $_)) {
|
||||
if (ord ($_) >= 0x0021 && ord($_) <= 0x007E) {
|
||||
$new .= $_;
|
||||
} else {
|
||||
if (defined $ex{$_}) {
|
||||
$new .= $ex{$_};
|
||||
} else {
|
||||
$new .= $_;
|
||||
}
|
||||
}
|
||||
}
|
||||
$new = encode("utf-8", $new);
|
||||
print $File::Find::dir, ": $_ -> $new\n";
|
||||
rename $_, $new or die "cannot rename $_ to $new: $!";
|
||||
}
|
||||
|
||||
|
||||
# if (/[\204\224\201\216\231\232\341\202]/) {
|
||||
# my $new = $_;
|
||||
# $new =~ tr/\204\224\201\216\231\232\341\202/äöüÄÖÜßé/;
|
||||
# }
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (@ARGV == 0) { push (@ARGV, "."); }
|
||||
finddepth(\&wanted, @ARGV);
|
||||
|
||||
print "\n\n";
|
||||
|
||||
|
||||
# $Log: sanitize_umlauts_utf8,v $
|
||||
# Revision 1.1 2006-03-08 18:08:52 hjp
|
||||
# Removed szlig conversion. Too dangerous as \341 is a valid character in
|
||||
# latin-1, too.
|
||||
#
|
||||
# Revision 1.1 2002/10/27 12:28:59 hjp
|
||||
# *** empty log message ***
|
||||
#
|
||||
# vim:sw=4 expandtab
|
||||
|
Loading…
Reference in New Issue