#!/usr/bin/perl #Last Updated: 2003.11.02 (xris) # fix_utf-8.pl # Load some modules use Encode; use File::Find; use File::Temp qw/ tempfile /; use Cwd; use MP3::Tag; #Load in the commandline arguments use Getopt::Long; my %Args; GetOptions(\%Args, 'undo'); # Default to the current directory @ARGV = ('.') unless (@ARGV); foreach my $path (@ARGV) { next unless (-e $path); # In order to get proper access to parent directory names, we have to do the chdir ourselves finddepth({wanted => \&process, bydepth => 1}, $path); } sub process { return if (/^\.\.?$/); print "Processing: $File::Find::name\n"; # Rename my $file = fix_utf8($_); rename $_, $file; # Get a shell-safe filename $safe = ShellSafe($file); # Ogg file? fix the comments if ($file =~ /\.ogg$/) { # Get/convert the comments my $comments = `vorbiscomment $safe`; $comments = fix_utf8($comments); # Save the comments open(OGG, "| vorbiscomment -w $safe") or die "Can't open pipe to vorbiscomment: $!\n\n"; print OGG $comments; close OGG; } # MP3 file? fix the comments elsif ($file =~ /\.mp3$/) { my $command = ''; # Open the mp3 and grab the tags my $mp3 = MP3::Tag->new($file); if ($mp3) { $mp3->get_tags; # Rebuild the ID3v2 tag if ($mp3->{ID3v2}) { my %v2; foreach my $frame ('TRCK', 'TIT2', 'TPE1', 'TALB', 'TCON', 'MCDI', 'TYER') { my ($info, $name) = $mp3->{ID3v2}->get_frame($frame); next unless ($info); if (ref \$info eq 'SCALAR') { $info = fix_utf8($info, 1); } $v2{$frame} = $info; } # Fix double-genre issues my $l = length($v2{TCON}) / 2; if (int($l) == $l) { $v2{TCON} =~ s/^(.{$l})\1$/$1/s; } # Remove the old tag and create a new one $mp3->{ID3v2}->remove_tag; $mp3->new_tag('ID3v2'); # Store the new tags foreach my $frame (keys %v2) { $mp3->{ID3v2}->add_frame($frame, $v2{$frame}); } $mp3->{ID3v2}->write_tag; } # Rebuild the ID3v1 tag if ($mp3->{ID3v1}) { my $track = $mp3->{ID3v1}->track; my $song = $mp3->{ID3v1}->song;; my $artist = $mp3->{ID3v1}->artist; my $album = $mp3->{ID3v1}->album; my $genre = $mp3->{ID3v1}->genre; my $year = $mp3->{ID3v1}->year; # Remove the old tag and create a new one $mp3->{ID3v1}->remove_tag; $mp3->new_tag('ID3v1'); # Store the new tags $mp3->{ID3v1}->track( $track); $mp3->{ID3v1}->song( fix_utf8($song, 1)); $mp3->{ID3v1}->artist(fix_utf8($artist, 1)); $mp3->{ID3v1}->album( fix_utf8($album, 1)); $mp3->{ID3v1}->genre( $genre); $mp3->{ID3v1}->year( $year); $mp3->{ID3v1}->write_tag; } # Close $mp3->close(); } } # Textfiles elsif (0) { # elsif (-T $file || $file =~ /\.txt$/i) { # Get a temp file my ($fh, $tempfile) = tempfile(); open(TEMP, ">$tempfile") or die "Can't write temp $tempfile: $!\n\n"; # Read/parse the data open(DATA, $file) or die "Can't read $file: $!\n\n"; while () { print TEMP fix_utf8($_); } close DATA; # CLose and rename close TEMP; rename $tempfile, $file or die "Can't rename $tempfile to $file: $!\n\n"; } } sub fix_utf8 { my $val = shift; my $str = ref $val ? $val : \$val; my $undo = shift; # Return Early? return '' unless ($$str and length($$str)); # Get a temp var so we don't actually modify $$str my $tmp = $$str; # Decode the string to UTF-8 and check for malformed characters - if there are some, this isn't already UTF-8 Encode::_utf8_on($tmp); my $is_utf8 = Encode::is_utf8($tmp, Encode::FB_QUIET); # Undoing utf-8? if ($Args{undo} || $undo) { # Malformed utf-8 characters, this is probably NOT utf-8 return $$str if (!$is_utf8); # Now we convert back to iso-8859-1 Encode::from_to($$str, 'utf-8', 'iso-8859-1'); return $$str; } # No malformed characters - this is already UTF-8 - convert it back to latin1 check again to make sure that it's encoded properly if ($is_utf8) { Encode::from_to($$str, 'utf-8', 'iso-8859-1'); # Check again to see if it wasn't just a malformed string $tmp = $$str; Encode::_utf8_on($tmp); $is_utf8 = Encode::is_utf8($tmp, Encode::FB_QUIET); if ($is_utf8) { Encode::from_to($$str, 'utf-8', 'iso-8859-1'); } } # Now we decode from iso-8859-1 Encode::from_to($$str, 'iso-8859-1', 'utf-8'); return $$str; } sub ShellSafe { my $str = (shift or ''); $str =~ s/("|\$)/\\$1/sg; return "\"$str\""; }