#! /usr/bin/perl # $Id: valookup,v 1.14 2000/07/18 18:02:56 doi Exp doi $ # # valookup - look up voice actor information # this program will look up the information in the anime voice # actor database (The USENET Anime Seiyuu Collection), which # used to be maintained by Jeff Okamoto,and print them out "nicely". # I have expanded Jeff's database and I am currently making # minor changes to it, along with corresponding changes in this # program to process the data. # # the seiyuu information (and database) is now available via WWW at: # http://www.usagi.org/doi/seiyuu/ # # original hack by Hitoshi Doi (doi@usagi.org), 1993.10.25 # # this program is public domain. do whatever you want with it. # this program is offered "as is", and does not have any warrantee. # # --------------------------------------------------------------- # modifications # # 1993.12.06 Hitoshi Doi # added -ascii option for displaying only ASCII # the default is to show both Japanese and ASCII # # 1994.05.02 Hitoshi Doi # added -www option to generate a html syntax entry # # 1994.06.28 Hitoshi Doi # process the new keywords in the comment section # # 1994.07.25 Hitoshi Doi # speed up processing for WWW mode # # 1994.12.21 Hitoshi Doi # create birthday list with -birthday option # # 1995.03.19 Hitoshi Doi # process episode numbers in data # # --------------------------------------------------------------- $me = $0; $me =~ s/.*\///; # the file containing the voice actor information $vafile = $ENV{'SEIYUU'}; # to determine if we should match romaji spellings strictly # the default is non-strict match. this is very slow! $strict_match = 0; @an_lookup = (); @an_out = (); $an_ct = 0; @ch_lookup = (); @ch_out = (); $ch_ct = 0; @va_lookup = (); @va_out = (); $va_ct = 0; $ascii = 0; $printall = 0; $www = 0; $pinfo = 0; $fname = 0; $birthday = 0; $sub = 0; while ($ai = shift(@ARGV)) { if ($ai =~ m/^-totitle/i) { $totitle = shift(@ARGV); $ascii = 1; } elsif ($ai =~ m/^-t/i) { $an_lookup[$an_ct++] = shift(@ARGV); } elsif ($ai =~ m/^-c/i) { $ch_lookup[$ch_ct++] = shift(@ARGV); } elsif ($ai =~ m/^-db/i) { $vafile = shift(@ARGV); } elsif ($ai =~ m/^-strict/i) { $strict_match = 1; } elsif ($ai =~ m/^-ascii/i) { $ascii = 1; } elsif ($ai =~ m/^-printall/i) { $printall = 1; } elsif ($ai =~ m/^-pinfo/i) { $pinfo = 1; $www_done = 0; } elsif ($ai =~ m/^-fname/i) { $fname = 1; $www_done = 0; } elsif ($ai =~ m/^-www/i) { $www = 1; $www_done = 0; } elsif ($ai =~ m/-sub/i) { $sub = shift(@ARGV); } elsif ($ai =~ m/^-birthday/i) { $birthday = 1; } elsif ($ai =~ m/^-help/i) { print "$me - voice actor information lookup usage: $me [options] options: NAME display all characters for the actor NAME -t NAME display all characters for the anime title NAME -c NAME display all voice actors for the character NAME -db FILE use the file FILE as the voice actor database -strict match romaji spellings strictly [default is non-strict] -ascii display only in ASCII [default is Japanese and ASCII] -pinfo display personal information -fname display full name information -www generate WWW entry in HTML format -sub DIR split main database into separate alphabetical files -birthday generate a birthday listing "; print "the voice actor database is "; if ($vafile) { print "$vafile.\n"; } else { print "currently undefined. please set the environment variable SEIYUU, or specify the database with the -db option.\n"; } exit 0; } elsif ($ai =~ m/^-debug/i) { $debug = 1; } else { $va_lookup[$va_ct++] = $ai; } } if (!$vafile) { print "$me: the voice actor database is undefined. please set the environment variable SEIYUU, or specify the database with the -db option.\n"; exit -1; } $va_db = "voice actor database $vafile"; if (!open(VA, "<$vafile")) { print "$me: can't open the $va_db.\n"; exit -1; } if ($debug) { print "using $va_db\n"; if ($va_ct) { print "voice actors to search:\n"; for ($i = 0; $i < $va_ct; $i++) { print "$va_lookup[$i]\n"; } } if ($an_ct) { print "anime to search:\n"; for ($i = 0; $i < $an_ct; $i++) { print "$an_lookup[$i]\n"; } } if ($ch_ct) { print "characters to search:\n"; for ($i = 0; $i < $ch_ct; $i++) { print "$ch_lookup[$i]\n"; } } } if ($sub) { system "/bin/mv $sub $sub.old"; mkdir($sub, 0755); $cur_letter = 'A'; open(SUBFILE, ">$sub/$cur_letter"); } # # go through the voice actor file and do processing # $read_done = 0; $va_proc = 0; while (1) { if (!$read_done) { $l = ; last if (!$l); # end of file chop($l); } $read_done = 0; next if ($l =~ m/^$/); if ($sub) { if ($l =~ m/^[a-z]/i) { while (!($l =~ m/^$cur_letter/i)) { close(SUBFILE); $cur_letter++; open(SUBFILE, ">$sub/$cur_letter"); } } print SUBFILE "$l\n"; next; } if ($l =~ m/^[a-z]/i) { # # new voice actor, set current name # $name = $l; $name =~ s/[ ]*\(.*\)// if ($ascii); # last if ($www_done); # # check to see if we have to try to match this one # $va_proc = &check_va($name); $va_out[$va_proc - 1] .= "$name\n" if ($va_proc); if ($debug) { print "voice actor: $name"; print " DO PROCESSING" if ($va_proc); print "\n"; } if ($va_proc && $fname) { print "$name\n"; exit 0; } } elsif ($l =~ m/^[ ]\/\*.*\*\//) { # # pull out some information with keywords # if ($birthday) { &get_birthday($l); next; } next if (!$pinfo); next if (!$va_proc); $l =~ s/^[ ]\/\*[ ]*//; $l =~ s/[ ]*\*\///; &print_pinfo($l); $www_done = 1; } elsif ($l =~ m/^[ ][a-z0-9#\(\[]/i) { # # get anime and character of current voice actor # there might be more than one character # next if ($www && !$va_proc); $ani = $l; $ani =~ s/^ //; if ($ani =~ m/-- debut/i) { ($ani, $debut) = split(/ -- /, $ani); } else { $debut = ''; } $ani =~ s/[ ]*$//; $cha_ct = 0; @ep = (); while (1) { chop($l = ); if ($l =~ m/^[ ][ ]/) { $l =~ s/^[ ]*//; $l =~ s/[ ]*$//; if ($l =~ m/{.*}/) { ($n, $e) = split(/{/, $l); $e =~ s/}//; $ep[$cha_ct] = $e; $l = $n; $l =~ s/[ ]*$//; } $cha[$cha_ct++] = $l; } else { $read_done = 1; last; } } # # print out DB format # if ($printall) { for ($i = 0; $i < $cha_ct; $i++) { print "[$debut] " if $debut; print "$cha[$i] - $ani "; print "{$ep[$i]} " if ($ep[$i]); print "- $name\n"; } } # # print out title format # if ($totitle) { $afile = $ani; $afile =~ y#A-Z /#a-z_-#; if (-f "$totitle/$afile") { open(ANI, ">>$totitle/$afile"); } else { open(ANI, ">$totitle/$afile"); print ANI "\n$ani\n"; } for ($i = 0; $i < $cha_ct; $i++) { print ANI " $cha[$i] :: $name"; print ANI " {$ep[$i]}" if ($ep[$i]); print ANI " -- $debut" if $debut; print ANI "\n"; } close(ANI); } # # if we are processing this voice actor, # output all info # if ($va_proc) { if ($www) { $www_done = 1; for ($i = 0; $i < $cha_ct; $i++) { print "
$ani"; print " {$ep[$i]}" if ($ep[$i]); print " [$debut] " if $debut; print " :: $cha[$i]\n"; } next; } for ($i = 0; $i < $cha_ct; $i++) { $va_out[$va_proc - 1] .= " [$debut]" if $debut; $va_out[$va_proc - 1] .= " $cha[$i] - $ani"; $va_out[$va_proc - 1] .= " {$ep[$i]}" if ($ep[$i]); $va_out[$va_proc - 1] .= "\n"; } } # # check if we have to match this anime # $a_id = &check_anime($ani); if ($a_id) { $a_id--; if (!$an_out[$a_id]) { $an_out[$a_id] = "$ani\n"; } for ($i = 0; $i < $cha_ct; $i++) { $an_out[$a_id] .= " [$debut]" if $debut; $an_out[$a_id] .= " {$ep[$i]}" if ($ep[$i]); $an_out[$a_id] .= " $cha[$i] - $name\n"; } } # # check if we have to match this char # for ($i = 0; $i < $cha_ct; $i++) { $c_id = &check_char($cha[$i]); if ($c_id) { $c_id--; if (!$ch_out[$c_id]) { $ch_out[$c_id] = "$ch_lookup[$c_id]\n"; } $ch_out[$c_id] .= " [$debut]" if $debut; $ch_out[$c_id] .= " $cha[$i] - $ani"; $ch_out[$c_id] .= " {$ep[$i]}" if ($ep[$i]); $ch_out[$c_id] .= " - $name\n"; } } } elsif ($debug) { print "$me: syntax error: $l\n"; } } if ($sub) { close(SUBFILE); exit 0; } exit 0 if ($www); exit 0 if ($pinfo); # # print out the results # for ($i = 0; $i < $va_ct; $i++) { print $va_out[$i]; print "\n"; } for ($i = 0; $i < $an_ct; $i++) { print $an_out[$i]; print "\n"; } for ($i = 0; $i < $ch_ct; $i++) { print $ch_out[$i]; print "\n"; } # # subroutines # # # match_rname($n1, $n2) # # try to match the names, taking into consideration the # various ways of spelling romaji # sub match_rname { local($n1) = $_[0]; local($n2) = $_[1]; local($n3, $n4); return(1) if ($n1 =~ m/^$n2/i); # # don't go any further if we want strict matching # return(0) if ($strict_match); # try oh -> o, oo -> o, ou -> o $n3 = $n1; $n3 =~ s/o[hou]/o/gi; $n4 = $n2; $n4 =~ s/o[hou]/o/gi; return(1) if ($n3 =~ m/^$n4/i); # try ye -> e $n3 = $n1; $n3 =~ s/ye/e/gi; $n4 = $n2; $n4 =~ s/ye/e/gi; return(1) if ($n3 =~ m/^$n4/i); # try uu -> u $n3 = $n1; $n3 =~ s/uu/u/gi; $n4 = $n2; $n4 =~ s/uu/u/gi; return(1) if ($n3 =~ m/^$n4/i); # try tsu -> tu $n3 = $n1; $n3 =~ s/tsu/tu/gi; $n4 = $n2; $n4 =~ s/tsu/tu/gi; return(1) if ($n3 =~ m/^$n4/i); return(0); } # # get_birthday($line) # # print out the birthday information for the birthday list # sub get_birthday { local($line) = $_[0]; local($i, $j, $k, $l); return if (!($line =~ m/born/i)); $line =~ s/^[ ]\/\*[ ]*//; $line =~ s/[ ]*\*\///; $line =~ s/^born: //i; ($i, $j, $k) = split(/\//, $line); ($l, $line) = split(/,/, $k); printf "%02d.%02d", $j, $k; if ($i =~ m/\?\?/) { print " ????"; } else { printf " %4d", &western_year($i); } print " $name\n"; } # # western_year($jy) # # return the western year from the japanese year # sub western_year { local($jy) = $_[0]; local($wy); if ($jy =~ m/^s/i) { $jy =~ s/^s//i; $wy = int($jy) + 1925; } elsif ($jy =~ m/^t/i) { $jy =~ s/^t//i; $wy = int($jy) + 1911; } elsif ($jy =~ m/^h/i) { $jy =~ s/^h//i; $wy = int($jy) + 1988; } else { $wy = $jy; } return ($wy); } # # print_pinfo($line) # # print out the personal information # (if the keywords are recognized) # sub print_pinfo { local($line) = $_[0]; local($i, $j, $k, $l); local(@Month) = ( '', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ); local(@Day) = ( '', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'st', 'nd', 'rd', 'th', 'th', 'th', 'th', 'th', 'th', 'th', 'st' ); if ($debug) { print "pinfo line=$line\n"; } if ($line =~ m/^born: /i) { # # Born: S??/??/??, ????? # $line =~ s/^born: //i; ($i, $j, $k) = split(/\//, $line, 3); ($k, $line) = split(/,/, $k, 2); printf "$name was born on %s %d%s", $Month[$j], $k, $Day[$k]; if (!($i =~ m/\?\?/)) { print ", "; printf "%4d", &western_year($i); } if ($line) { $line =~ s/[ ]+\*\///g; print " in $line"; } print ".\n"; return; } if ($line =~ m/^raised: /i) { # # Raised: ????? # $line =~ s/^raised: //i; print "$gender was raised in $line.\n"; return; } if ($line =~ m/^deceased: /i) { $line =~ s/^deceased: //i; print "$gender passed away on $line.\n"; return; } if ($line =~ m/^true name: /i) { # # True name: ????? # $line =~ s/^true name: //i; return if ($line =~ m/same/i); print "$gender_s true name is $line.\n"; return; } if ($line =~ m/^type /i) { # # Type ?? # $line =~ s/^type //i; $line =~ s/ //g; print "$gender_s blood type is $line.\n"; return; } if ($line =~ m/^sizes: /i) { # # Sizes: ??? cm, ?? kg, ??-??-??, ?? cm # Sizes: ??? cm, ?? kg # $line =~ s/^sizes: //i; ($i, $j, $k, $l) = split(/,/, $line); if (!($i =~ m/\?\?/) || !($j =~ m/\?\?/)) { print "$gender is $i tall and weighs $j.\n"; } print "$gender_s three sizes are $k.\n" if ($k); if ($l && !($l =~ m/\?\?/)) { print "$gender_s shoe size is $l.\n"; } return; } if ($line =~ m/^production: /i) { # # Production: ????? # $line =~ s/^production: //i; if ($line =~ m/NONE/) { print "$gender is not associated with any production.\n"; } else { print "$gender works for $line.\n"; } return; } if ($line eq 'M') { $gender = 'He'; $gender_s = 'His'; } elsif ($line eq 'F') { $gender = 'She'; $gender_s = 'Her'; } } sub check_va { local($name) = $_[0]; local($i); for ($i = 0; $i < $va_ct; $i++) { if (&match_rname($name, $va_lookup[$i])) { return($i + 1); } } return(0); } sub check_anime { local($name) = $_[0]; local($i); for ($i = 0; $i < $an_ct; $i++) { if (&match_rname($name, $an_lookup[$i])) { return($i + 1); } } return(0); } sub check_char { local($name) = $_[0]; local($i); for ($i = 0; $i < $ch_ct; $i++) { if (&match_rname($name, $ch_lookup[$i])) { return($i + 1); } } return(0); }