70 lines
1.3 KiB
Plaintext
70 lines
1.3 KiB
Plaintext
|
#!/usr/bin/perl -w
|
|||
|
#
|
|||
|
# $Id: sanitize_umlauts_utf8,v 1.1 2006-03-08 18:08:52 hjp Exp $
|
|||
|
#
|
|||
|
|
|||
|
use strict;
|
|||
|
use File::Find;
|
|||
|
use Encode;
|
|||
|
|
|||
|
binmode STDOUT, ":raw";
|
|||
|
|
|||
|
my %ex = (
|
|||
|
"\204" => "<22>",
|
|||
|
"\224" => "<22>",
|
|||
|
"\201" => "<22>",
|
|||
|
"\216" => "<22>",
|
|||
|
"\231" => "<22>",
|
|||
|
"\232" => "<22>",
|
|||
|
"\202" => "<22>",
|
|||
|
);
|
|||
|
|
|||
|
sub wanted {
|
|||
|
|
|||
|
eval {
|
|||
|
decode("utf-8", $_, 1);
|
|||
|
};
|
|||
|
if ($@) {
|
|||
|
my $new = "";
|
|||
|
for (split(//, $_)) {
|
|||
|
if (ord ($_) >= 0x0021 && ord($_) <= 0x007E) {
|
|||
|
$new .= $_;
|
|||
|
} else {
|
|||
|
if (defined $ex{$_}) {
|
|||
|
$new .= $ex{$_};
|
|||
|
} else {
|
|||
|
$new .= $_;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
$new = encode("utf-8", $new);
|
|||
|
print $File::Find::dir, ": $_ -> $new\n";
|
|||
|
rename $_, $new or die "cannot rename $_ to $new: $!";
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
# if (/[\204\224\201\216\231\232\341\202]/) {
|
|||
|
# my $new = $_;
|
|||
|
# $new =~ tr/\204\224\201\216\231\232\341\202/<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>/;
|
|||
|
# }
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
|
|||
|
if (@ARGV == 0) { push (@ARGV, "."); }
|
|||
|
finddepth(\&wanted, @ARGV);
|
|||
|
|
|||
|
print "\n\n";
|
|||
|
|
|||
|
|
|||
|
# $Log: sanitize_umlauts_utf8,v $
|
|||
|
# Revision 1.1 2006-03-08 18:08:52 hjp
|
|||
|
# Removed szlig conversion. Too dangerous as \341 is a valid character in
|
|||
|
# latin-1, too.
|
|||
|
#
|
|||
|
# Revision 1.1 2002/10/27 12:28:59 hjp
|
|||
|
# *** empty log message ***
|
|||
|
#
|
|||
|
# vim:sw=4 expandtab
|
|||
|
|