#!/usr/bin/perl -w # # $Id: sanitize_umlauts_utf8,v 1.1 2006-03-08 18:08:52 hjp Exp $ # use strict; use File::Find; use Encode; binmode STDOUT, ":raw"; my %ex = ( "\204" => "ä", "\224" => "ö", "\201" => "ü", "\216" => "Ä", "\231" => "Ö", "\232" => "Ü", "\202" => "é", ); sub wanted { eval { decode("utf-8", $_, 1); }; if ($@) { my $new = ""; for (split(//, $_)) { if (ord ($_) >= 0x0021 && ord($_) <= 0x007E) { $new .= $_; } else { if (defined $ex{$_}) { $new .= $ex{$_}; } else { $new .= $_; } } } $new = encode("utf-8", $new); print $File::Find::dir, ": $_ -> $new\n"; rename $_, $new or die "cannot rename $_ to $new: $!"; } # if (/[\204\224\201\216\231\232\341\202]/) { # my $new = $_; # $new =~ tr/\204\224\201\216\231\232\341\202/äöüÄÖÜßé/; # } } if (@ARGV == 0) { push (@ARGV, "."); } finddepth(\&wanted, @ARGV); print "\n\n"; # $Log: sanitize_umlauts_utf8,v $ # Revision 1.1 2006-03-08 18:08:52 hjp # Removed szlig conversion. Too dangerous as \341 is a valid character in # latin-1, too. # # Revision 1.1 2002/10/27 12:28:59 hjp # *** empty log message *** # # vim:sw=4 expandtab