1099 lines
30 KiB
Perl
1099 lines
30 KiB
Perl
|
package Net::Http::Useragent;
|
||
|
use warnings;
|
||
|
use strict;
|
||
|
|
||
|
=head1 NAME
|
||
|
|
||
|
Net::Http::Useragent - extract information from HTTP User-Agent header
|
||
|
|
||
|
=head2 DESCRIPTION
|
||
|
|
||
|
This class represents the contents of a HTTP User-Agent header, and can
|
||
|
be used to extract information (user agent, version, os, whether it's a
|
||
|
bot, ...) from it.
|
||
|
|
||
|
=cut
|
||
|
|
||
|
our $VERSION = 0.002;
|
||
|
|
||
|
sub new {
|
||
|
my ($class, $useragent) = @_;
|
||
|
my $self = { useragent => $useragent };
|
||
|
bless $self, $class;
|
||
|
return $self;
|
||
|
}
|
||
|
|
||
|
=head2 canonical_os
|
||
|
|
||
|
Returns a canonic OS name
|
||
|
|
||
|
=cut
|
||
|
|
||
|
sub canonical_os($) {
|
||
|
my ($os) = @_;
|
||
|
|
||
|
$os =~ s/Windows Windows/Windows/g;
|
||
|
$os =~ s/.*Linux.*/Linux/g;
|
||
|
$os =~ s/.*Mac OS X.*/Mac OS X/g;
|
||
|
|
||
|
return $os;
|
||
|
}
|
||
|
|
||
|
=head2 useragent_munged
|
||
|
|
||
|
Tries to find out the real user agent and returns that information as a
|
||
|
hash ref:
|
||
|
|
||
|
=over 4
|
||
|
=item robot
|
||
|
is this a known robot?
|
||
|
|
||
|
=item useragent
|
||
|
(short) name of the agent, e.g. "Netscape", "MSIE", "Mozilla",
|
||
|
"Opera", "w3m", ...
|
||
|
|
||
|
=item major_version
|
||
|
The version number with minor revisions omitted. This is not
|
||
|
necessarily just the number before the first dot, as different
|
||
|
vendors have different policies in regard to version numbers.
|
||
|
|
||
|
=item version
|
||
|
|
||
|
=item os
|
||
|
The operating system
|
||
|
|
||
|
=cut
|
||
|
|
||
|
my $uam = {};
|
||
|
sub munged {
|
||
|
my ($self) = @_;
|
||
|
my $ua = $self->{useragent};
|
||
|
|
||
|
if ($ua =~ m{^(FAST-WebCrawler)/(\d+)\.(\S+)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => 'FAST Crawler',
|
||
|
major_version => $2,
|
||
|
version => "$2.$3"
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
if ($ua =~ m{^(Googlebot)/(\d+)\.(\d+)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => "$2.$3"
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
if ($ua =~ m{^Mozilla/5.0 \((Slurp)/(\w+); slurp\@inktomi.com; http://www.inktomi.com/slurp.html\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) 13155 22.43 3.16
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; Yahoo! (Slurp); http://help.yahoo.com/help/us/ysearch/slurp\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => "",
|
||
|
version => "",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; Yahoo! (Slurp) China; http://misc.yahoo.com.cn/help.html\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => "",
|
||
|
version => "",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; Yahoo! (Slurp)/([.\d]+); http://help.yahoo.com/help/us/ysearch/slurp\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)
|
||
|
# Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (http://www.voila.com/)
|
||
|
# Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)
|
||
|
|
||
|
if ($ua =~ m{Mozilla/[45].0 \(.* Windows.*\) (VoilaBot) (BETA 1.2) \(.*voila.*\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# MSIE
|
||
|
|
||
|
if ($ua =~ m{^Mozilla/\d+.\d+ \(compatible; (MSIE) ((\d+\.\d)\d*); ([^;\)]*).*\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2,
|
||
|
os => canonical_os($4),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html) 3609 6.15 0.87
|
||
|
# Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml) 2600 1.82 0.29
|
||
|
# Mozilla/5.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml) 18686 21.79 2.20
|
||
|
if ($ua =~ m{^Mozilla/[25].0 \(compatible; (Ask Jeeves/Teoma)(; \+http://\w+.ask.com/.*docs/about/.*html)?\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
if ($ua =~ m{^Mozilla/4.0 \(compatible; (?i:B.l.i.t.z.B.O.T)\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => "Blitzbot",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
if ($ua =~ m{^(Mozilla)/5.0 \((\w+); U; ([^;]*); [-\w]+; .*\) Gecko/(\d+) (Netscape)\d?/((\d)\.\d+)$}) {
|
||
|
# 1 2 3 4 5 67
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $5,
|
||
|
os => canonical_os("$2 $3"),
|
||
|
major_version => $7,
|
||
|
version => "$6/$4",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1 1418 2.42 0.34
|
||
|
if ($ua =~ m{^(Mozilla)/5.0 \((\w+); U; ([^;]*); [-\w]+; rv:(\d.[.\w]*)\) Gecko/(\d+) .*(Firefox|GranParadiso|Minefield|Iceweasel)/((\d+\.\d+)[.\d]*)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => 'Firefox',
|
||
|
os => canonical_os("$2 $3"),
|
||
|
major_version => $8,
|
||
|
version => "$7/$5",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
if ($ua =~ m{^(Mozilla)/5.0 \((\w+); U; ([^;]*); [-\w]+; rv:((\d.\d+)[.\w]*)\) Gecko/(\d+)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
os => canonical_os("$2 $3"),
|
||
|
major_version => $5,
|
||
|
version => "$4/$6",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# honest Opera
|
||
|
# Opera/9.52 (X11; Linux i686; U; en)
|
||
|
if ($ua =~ m{^(Opera)/((\d).\d+) \((?:[^;]+; )?([^;]+); U(?:; ([-a-z]*))\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
os => canonical_os($4),
|
||
|
lang => $5,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Opera masquerading as MSIE
|
||
|
if ($ua =~ m{^Mozilla/4.0 \(compatible;.* MSIE \d\.\d; ([^\)]+)\) (Opera) ((\d).\d+) \[\w\w\]}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $2,
|
||
|
version => $3,
|
||
|
major_version => $4,
|
||
|
os => canonical_os($1),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
if ($ua =~ m{^Mozilla/4.0 \(compatible; MSIE \d\.\d; (?:.*; )*([^;]+); ([-a-z]*)\) (Opera) ((\d).\d+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $3,
|
||
|
version => $4,
|
||
|
major_version => $5,
|
||
|
os => canonical_os($1),
|
||
|
lang => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Opera masquerading as Mozilla
|
||
|
# Mozilla/5.0 (Linux 2.4.2 i386; U) Opera 6.0 [en]
|
||
|
if ($ua =~ m{^Mozilla/\d.\d+ \(([^;]+); [IU]\) (Opera) ((\d).\d+) \[\w\w\]}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $2,
|
||
|
version => $3,
|
||
|
major_version => $4,
|
||
|
os => canonical_os($1),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
if ($ua =~ m{^Mozilla/\d\.\d \(compatible; (Konqueror)/([.\d]+); (\S+)\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
os => canonical_os($3),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
if ($ua =~ m{^Mozilla/\d\.\d \(compatible; (Konqueror)/(([.\d]+)(?:-rc\d+)); ([^;\)]+).*\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
os => canonical_os($4),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
if ($ua =~ m{^(sitecheck.internetseer.com) \(For more info see: http://sitecheck.internetseer.com\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/4.77 [en] (Windows NT 5.0; U)/ 37 2.26 0.15
|
||
|
# Mozilla/4.79 [en] (Windows NT 5.0; U)/ 1288 4.94 0.62
|
||
|
if ($ua =~ m{^Mozilla/((\d\.\d)\d*) \[\w\w\].* \(([^;]+); [IU](?:;( [^;\)]*))?\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => "Netscape",
|
||
|
version => $1,
|
||
|
major_version => $2,
|
||
|
os => canonical_os($3 . ($4 || "")),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/4.73 (Macintosh; U; PPC)/ 332 20.31 1.31
|
||
|
# apparently the Mac version of Netscape omits the language tag
|
||
|
if ($ua =~ m{^Mozilla/((\d\.\d)\d*) \((Macintosh); [IU](?:;( [^;\)]*))?\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => "Netscape",
|
||
|
version => $1,
|
||
|
major_version => $2,
|
||
|
os => canonical_os($3 . ($4 || "")),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# don't really know what that is.
|
||
|
# The access patterns look human, not robot-like,
|
||
|
# so it's probably some browser behind a UA-mangling proxy
|
||
|
# Mozilla/3.01 (compatible;)
|
||
|
if ($ua =~ m{^Mozilla/((3).01) \(compatible;\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => "masquerades as Mozilla compatible",
|
||
|
version => $1,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Altavista search bot:
|
||
|
# Scooter/3.2.SF0
|
||
|
# Scooter/3.3
|
||
|
if ($ua =~ m{^(Scooter)/((\d).*)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# http://www.almaden.ibm.com/cs/crawler [c01]
|
||
|
if ($ua =~ m{^http://www.(almaden.ibm.com)/cs/crawler \[(c01)\]}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mercator-2.0
|
||
|
if ($ua =~ m{^(Mercator)-((\d).\d)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/4.0 (compatible; BorderManager 3.0)
|
||
|
# Bordermanager (http://www.novell.com/products/bordermanager/)
|
||
|
# seems to be a Novell Proxy server.
|
||
|
# classifing that as a user agent is a bit wrong,
|
||
|
if ($ua =~ m{^Mozilla/4.0 \(compatible; (BorderManager) ((3).0)\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
os => canonical_os("Novell"),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# contype
|
||
|
# Adobe Acrobat Reader?
|
||
|
if ($ua =~ m{^(contype)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)
|
||
|
if ($ua =~ m{^(Firefly)/((\d).\d)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# gpv3.1/ 4462 17.13 2.16
|
||
|
# No idea what that is. Provisionally classifying as bot
|
||
|
if ($ua =~ m{^(gpv)((\d).\d)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# ia_archiver/ 62 3.79 0.25
|
||
|
if ($ua =~ m{^(ia_archiver)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# LinkWalker/ 322 1.24 0.16
|
||
|
if ($ua =~ m{^(LinkWalker)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Melvil3.0 http://www.uma.at/ 8291 31.82 4.02
|
||
|
# No idea what that is. Provisionally classifying as bot
|
||
|
if ($ua =~ m{^(Melvil)((\d).\d) http://www.uma.at$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/4.0 (compatible; grub-client-1.0.6; Crawl your own stuff with http://grub.org)/ 307 1.18 0.15
|
||
|
if ($ua =~ m{^Mozilla/4.0 \(compatible; (grub-client)-((\d)[.\d]+); Crawl your own stuff with http://grub.org\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/4.75 (compatible; PortalBSpider; spider@portalb.com)/ 513 1.97 0.25
|
||
|
if ($ua =~ m{^Mozilla/4.75 \(compatible; PortalBSpider; spider\@portalb.com\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# NPBot-1/2.0 (http://www.nameprotect.com/botinfo.html)/ 61 3.73 0.24
|
||
|
if ($ua =~ m{^(NPBot)-((\d)/[.\d]+) \(http://www.nameprotect.com/botinfo.html\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# oBot/ 75 4.59 0.30
|
||
|
if ($ua =~ m{^(oBot)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# TeWIS/0.3 TeWIS/0.3 libwww-perl/5.53/ 307 1.18 0.15
|
||
|
if ($ua =~ m{^(TeWIS)/((\d\.\d)) TeWIS/\d\.\d libwww-perl/\d\.\d+$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# xFIND2000/0.8 RPT-HTTPClient/0.3-2/ 3761 14.43 1.82
|
||
|
# No idea what that is. Provisionally classifying as bot
|
||
|
if ($ua =~ m{^(xFIND2000)/((\d.\d)) RPT-HTTPClient/}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# htdig/3.1.6 (webmaster@luga.at)/ 1241844 97.96 67.33
|
||
|
if ($ua =~ m{^(htdig)/((\d.\d)\.\d) \(.*\@.*\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Gigabot/1.0
|
||
|
# Gigabot/2.0 (http://www.gigablast.com/spider.html) 9165 6.42 1.04
|
||
|
if ($ua =~ m{^(Gigabot)/(\d.\d)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Gigabot/2.0; http://www.gigablast.com/spider.html/ 16888 14.41 1.03
|
||
|
if ($ua =~ m{^(Gigabot)/(\d.\d); http://www.gigablast.com/spider.html$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# msnbot/2.0b (+http://search.msn.com/msnbot.htm)
|
||
|
# msnbot/1.0 (+http://search.msn.com/msnbot.htm)
|
||
|
if ($ua =~ m{^(msnbot)(?:-media)?/(\d.\d\w?) \(\+http://search.msn.com/msnbot.htm\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# check_http/1.24.2.4 (nagios-plugins )
|
||
|
if ($ua =~ m{^(check_http)/(\d+(.\d+)+) \(nagios-plugins \)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# check_http/v2053 (nagios-plugins 1.4.13)
|
||
|
if ($ua =~ m{^(check_http)/v\d+ \(nagios-plugins (\d+(.\d+)+)\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# Intraseek_1
|
||
|
if ($ua =~ m{^Intraseek_1$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => 'Intraseek',
|
||
|
version => 1,
|
||
|
major_version => 1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# WebVac (webmaster@pita.stanford.edu WebVac.org ) 2199 3.75 0.53
|
||
|
if ($ua =~ m{^(WebVac) \(webmaster\@pita.stanford.edu WebVac.org \)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => "",
|
||
|
major_version => "",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# ichiro/1.0 (ichiro@nttr.co.jp) 1425 2.43 0.34
|
||
|
if ($ua =~ m{^(ichiro)/((\d+)(.\d+)+) \(ichiro\@nttr.co.jp\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
version => $2,
|
||
|
major_version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 1180 2.01 0.28
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Googlebot)/(\d+).(\d+); \+http://www.google.com/bot.html\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => "$2.$3"
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_4; de-de) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1
|
||
|
# Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/312.1 (KHTML, like Gecko) Safari/312 849 1.45 0.20
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(Macintosh; U; (.* Mac OS X)(?: [_0-9]+)?; [-a-z]+\) AppleWebKit/[.0-9]+ \(KHTML, like Gecko\)(?: Version/[.0-9]+)? (Safari)/((\d+)[.\w]+)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
os => canonical_os($1),
|
||
|
useragent => $2,
|
||
|
major_version => $4,
|
||
|
version => $3,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3 1420 2.42 0.34
|
||
|
if ($ua =~ m{^(Seekbot)/(\d+).(\d+) \(http://www.seekbot.net/bot.html\) HTTPFetcher/\d+.\d+$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => "$2.$3"
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; heritrix/1.6.0 +http://innovationblog.com) 2624 15.88 2.41
|
||
|
if ($ua =~ m{^(?:Mozilla/5.0|webcrawler) \(compatible; (heritrix)/((\d+)[-.\d]+) \+\+?http://.*\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# psbot/0.1 (+http://www.picsearch.com/bot.html) 2252 13.63 2.07
|
||
|
if ($ua =~ m{^(psbot)/((\d+).\d+) \(\+http://www.picsearch.com/bot.html\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Ocelli/1.3 (http://www.globalspec.com/Ocelli) 1482 8.97 1.36
|
||
|
if ($ua =~ m{^(Ocelli)/((\d+).\d+) \(http://www.globalspec.com/Ocelli\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Googlebot-Image/1.0 1282 7.76 1.18
|
||
|
if ($ua =~ m{^(Googlebot-Image)/((\d+).\d+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Francis/2.0 (francis@neomo.de http://www.neomo.de/pages/crawler.php) 5559 14.57 1.33
|
||
|
if ($ua =~ m{^(Francis)/((\d+).\d+) \(francis\@neomo.de http://www.neomo.de/pages/crawler.php\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; Herold; +http://www.herold.at) 3637 9.53 0.87
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Herold); \+http://www.herold.at\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; Interseek/3.1) 12415 67.90 16.66
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Interseek)/((\d+).\d+)\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# mnogo 3313 18.12 4.45
|
||
|
if ($ua =~ m{^(mnogo)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; TridentSpider/3.1)/ 10556 21.35 1.62
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (TridentSpider)/((\d).\d)\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus// 5955 12.05 0.91
|
||
|
if ($ua =~ m{^FAST Enterprise Crawler (\d) / Scirus scirus-crawler\@fast.no; http://www.scirus.com/srsapp/contactus/$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => 'FAST Crawler',
|
||
|
major_version => $1,
|
||
|
version => $1
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# mnogo 47921 33.58 5.43
|
||
|
if ($ua =~ m{^(mnogo)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# MnoGoSearch/3.2.31
|
||
|
if ($ua =~ m{^MnoGoSearch/((\d+\.\d+)\.\d+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => "mnogo",
|
||
|
version => $1,
|
||
|
major_version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
if ($ua =~ m{^(Aport)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/5.0 (compatible; Interseek/3.1)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Interseek)/(\d.\d+)\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Mozilla/4.0 (compatible; DepSpid/5.07; +http://about.depspid.net) 12838 45.87 8.04
|
||
|
if ($ua =~ m{^Mozilla/4.0 \(compatible; (DepSpid)/(\d.\d+); \+http://about.depspid.net\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# libwww-perl/5.803
|
||
|
if ($ua =~ m{^(libwww-perl)/(\d\.\d+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Java/1.4.2_10
|
||
|
if ($ua =~ m{^(Java)/(\d\.\d+)(\.[\d_]+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => "$2$3"
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Xenu Link Sleuth 1.2i
|
||
|
if ($ua =~ m{^(Xenu Link Sleuth) (\d\.(.+))$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => "$2$3"
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Google Chrome
|
||
|
if ($ua =~ m{^Mozilla/5.0 \([^;]*; .; ([^;]*); [-\w]*\) AppleWebKit/\d+.\d+ \(KHTML, like Gecko\) (Chrome)/((\d+\.\d+)[.\d]*) Safari/\d+.\d+$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $2,
|
||
|
major_version => $4,
|
||
|
version => $3,
|
||
|
os => canonical_os($1),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Echoping/5.2.0
|
||
|
if ($ua =~ m{^(Echoping)/(\d+\.[.\d]+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Shelob (shelob@gmx.net) 1858 31.93 2.89
|
||
|
if ($ua =~ m{^(Shelob) \(shelob\@gmx.net\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => '',
|
||
|
version => '',
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# SiteUptime.com 448 11.31 0.70
|
||
|
if ($ua =~ m{^(SiteUptime.com)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => '',
|
||
|
version => '',
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (SymbianOS/9.1; U; en-us) AppleWebKit/413 (KHTML, like Gecko) Safari/413 es65
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(([^;]*); .; [-\w]*\) AppleWebKit/\d+ \(KHTML, like Gecko\) (Safari)/(\d+)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => $2,
|
||
|
major_version => $3,
|
||
|
version => $3,
|
||
|
os => canonical_os($1),
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Snoopy v1.2.3
|
||
|
if ($ua =~ m{^(Snoopy) v([.\d]+)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; egothor/8.0g; +http://ego.ms.mff.cuni.cz/)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (egothor)/(\d+\.\w+); \+http://ego.ms.mff.cuni.cz/\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# remi 1.5e using w-3-m-i-r (http://nep.repec.org)
|
||
|
# remi 1.5c using w3mir (socionet@socionet.ru)
|
||
|
if ($ua =~ m{^(remi) (\d+\.\d+)([a-z]*) using w-?3-?m-?i-?r }) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => "$2$3",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html
|
||
|
|
||
|
if ($ua =~ m{Mozilla/5.0 \(compatible; (IDBot)/(\d+.\d+); \+http://www.id-search.org/bot.html\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)
|
||
|
# Mozilla/5.0 (Twiceler-0.9 http://www.cuil.com/twiceler/robot.html)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \((Twiceler)-(0.9) http://www.cuill?.com/twiceler/robot.html\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
# Yandex/1.01.001 (compatible; Win16; P)
|
||
|
if ($ua =~ m{^(Yandex)/((\d+)\.[\d.]+) \(compatible; Win16; [A-Z]\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)
|
||
|
if ($ua =~ m{^(Yanga) WorldSearch Bot v((1).1/beta) \(http://www.yanga.co.uk/\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# Jyxobot/1
|
||
|
if ($ua =~ m{^(Jyxobot)/(\d+)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)
|
||
|
# SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)
|
||
|
if ($ua =~ m{\(compatible; (Googlebot-Mobile)/((\d+).\d+); \+http://www.google.com/bot.html\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $3,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (X11; Linux i686; rv:2.0b11) Gecko/20100101 Firefox/4.0b11
|
||
|
# Mozilla/5.0 (Windows NT 6.0; rv:2.0b10) Gecko/20100101 Firefox/4.0b10
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(((?:Windows|X11; Linux|\w+) [^;]*); rv:([0-9.b]*)\) Gecko/(\d+) Firefox/((\d+\.\d+)(?:[.b]\d+)?)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => 'Firefox',
|
||
|
os => canonical_os($1),
|
||
|
major_version => $5,
|
||
|
version => $4,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# Netluchs/Nutch-1.0-dev ( ; http://www.netluchs.de/; _do_not_spam_me___humans_please_use_info_at_netluchs.de_without_the_dash)
|
||
|
if ($ua =~ m{^(Netluchs/Nutch)-(\d\.\S*) \( ; http://www.netluchs.de/; _do_not_spam_me___humans_please_use_info_at_netluchs.de_without_the_dash\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; DotBot/1.1; http://www.dotnetdotcom.org/, crawler@dotnetdotcom.org) 1643 17.03 2.17
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (DotBot)/(\d.\d); http://www.dotnetdotcom.org/, crawler\@dotnetdotcom.org\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# Baiduspider+(+http://www.baidu.com/search/spider.htm) 1100 13.74 1.45
|
||
|
if ($ua =~ m{^(Baiduspider)\+\(\+http://\w+.baidu.\w+/\S+\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => '',
|
||
|
version => '',
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# findlinks/1.1.6-beta1%20(+http://wortschatz.uni-leipzig.de/findlinks/)
|
||
|
if ($ua =~ m{^findlinks/((\d.\d.\d)-beta\d+) \(\+http://wortschatz.uni-leipzig.de/findlinks/\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => "wortschatz-findlinks",
|
||
|
major_version => $2,
|
||
|
version => $1,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
|
||
|
# Eurobot/1.1%20(http://eurobot.ayell.eu)
|
||
|
if ($ua =~ m{^(Eurobot)/(\d.\d) \(http://eurobot.ayell.eu\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; Charlotte/1.1; http://www.searchme.com/support/)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Charlotte)/(\d.\d); http://www.searchme.com/support/\)$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Exabot)(?:-images)?/(\d.\d)(?: \S+)?; \+http://www.exabot.com/go/robot\)}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => $1,
|
||
|
major_version => $2,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.11) Gecko GranParadiso/3.0.11
|
||
|
# looks like AskJeeves in disguise
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(X11; U; Linux i686; en-US; rv:1.9.0.11\) Gecko GranParadiso/3.0.11}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 1,
|
||
|
useragent => "Ask Jeeves/Teoma",
|
||
|
major_version => "stealth",
|
||
|
version => "stealth",
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
|
||
|
# Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B367 Safari/531.21.10
|
||
|
if ($ua =~ m{^Mozilla/5.0 \(iPad; U; CPU OS (\w+) like Mac OS X; [-a-z]+\) AppleWebKit/((\d+)(?:\.\d+)*) \(KHTML, like Gecko\) Version/[.0-9]+ Mobile/\w+ Safari/\2$}) {
|
||
|
$uam->{$ua} = {
|
||
|
robot => 0,
|
||
|
useragent => 'Safari',
|
||
|
os => canonical_os("iOS/iPad"),
|
||
|
major_version => $3,
|
||
|
version => $2,
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|
||
|
$uam->{$ua} = {
|
||
|
useragent => $ua
|
||
|
};
|
||
|
return $uam->{$ua};
|
||
|
}
|