Net-Http-Useragent/lib/Net/Http/Useragent.pm

1099 lines
30 KiB
Perl

package Net::Http::Useragent;
use warnings;
use strict;
=head1 NAME
Net::Http::Useragent - extract information from HTTP User-Agent header
=head2 DESCRIPTION
This class represents the contents of a HTTP User-Agent header, and can
be used to extract information (user agent, version, os, whether it's a
bot, ...) from it.
=cut
our $VERSION = 0.002;
sub new {
my ($class, $useragent) = @_;
my $self = { useragent => $useragent };
bless $self, $class;
return $self;
}
=head2 canonical_os
Returns a canonic OS name
=cut
sub canonical_os($) {
my ($os) = @_;
$os =~ s/Windows Windows/Windows/g;
$os =~ s/.*Linux.*/Linux/g;
$os =~ s/.*Mac OS X.*/Mac OS X/g;
return $os;
}
=head2 useragent_munged
Tries to find out the real user agent and returns that information as a
hash ref:
=over 4
=item robot
is this a known robot?
=item useragent
(short) name of the agent, e.g. "Netscape", "MSIE", "Mozilla",
"Opera", "w3m", ...
=item major_version
The version number with minor revisions omitted. This is not
necessarily just the number before the first dot, as different
vendors have different policies in regard to version numbers.
=item version
=item os
The operating system
=cut
my $uam = {};
sub munged {
my ($self) = @_;
my $ua = $self->{useragent};
if ($ua =~ m{^(FAST-WebCrawler)/(\d+)\.(\S+)}) {
$uam->{$ua} = {
robot => 1,
useragent => 'FAST Crawler',
major_version => $2,
version => "$2.$3"
};
return $uam->{$ua};
}
if ($ua =~ m{^(Googlebot)/(\d+)\.(\d+)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => "$2.$3"
};
return $uam->{$ua};
}
if ($ua =~ m{^Mozilla/5.0 \((Slurp)/(\w+); slurp\@inktomi.com; http://www.inktomi.com/slurp.html\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) 13155 22.43 3.16
if ($ua =~ m{^Mozilla/5.0 \(compatible; Yahoo! (Slurp); http://help.yahoo.com/help/us/ysearch/slurp\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => "",
version => "",
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)
if ($ua =~ m{^Mozilla/5.0 \(compatible; Yahoo! (Slurp) China; http://misc.yahoo.com.cn/help.html\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => "",
version => "",
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)
if ($ua =~ m{^Mozilla/5.0 \(compatible; Yahoo! (Slurp)/([.\d]+); http://help.yahoo.com/help/us/ysearch/slurp\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)
# Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (http://www.voila.com/)
# Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)
if ($ua =~ m{Mozilla/[45].0 \(.* Windows.*\) (VoilaBot) (BETA 1.2) \(.*voila.*\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# MSIE
if ($ua =~ m{^Mozilla/\d+.\d+ \(compatible; (MSIE) ((\d+\.\d)\d*); ([^;\)]*).*\)$}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
major_version => $3,
version => $2,
os => canonical_os($4),
};
return $uam->{$ua};
}
# Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html) 3609 6.15 0.87
# Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml) 2600 1.82 0.29
# Mozilla/5.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml) 18686 21.79 2.20
if ($ua =~ m{^Mozilla/[25].0 \(compatible; (Ask Jeeves/Teoma)(; \+http://\w+.ask.com/.*docs/about/.*html)?\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
if ($ua =~ m{^Mozilla/4.0 \(compatible; (?i:B.l.i.t.z.B.O.T)\)}) {
$uam->{$ua} = {
robot => 1,
useragent => "Blitzbot",
};
return $uam->{$ua};
}
if ($ua =~ m{^(Mozilla)/5.0 \((\w+); U; ([^;]*); [-\w]+; .*\) Gecko/(\d+) (Netscape)\d?/((\d)\.\d+)$}) {
# 1 2 3 4 5 67
$uam->{$ua} = {
robot => 0,
useragent => $5,
os => canonical_os("$2 $3"),
major_version => $7,
version => "$6/$4",
};
return $uam->{$ua};
}
# Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1 1418 2.42 0.34
if ($ua =~ m{^(Mozilla)/5.0 \((\w+); U; ([^;]*); [-\w]+; rv:(\d.[.\w]*)\) Gecko/(\d+) .*(Firefox|GranParadiso|Minefield|Iceweasel)/((\d+\.\d+)[.\d]*)}) {
$uam->{$ua} = {
robot => 0,
useragent => 'Firefox',
os => canonical_os("$2 $3"),
major_version => $8,
version => "$7/$5",
};
return $uam->{$ua};
}
if ($ua =~ m{^(Mozilla)/5.0 \((\w+); U; ([^;]*); [-\w]+; rv:((\d.\d+)[.\w]*)\) Gecko/(\d+)}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
os => canonical_os("$2 $3"),
major_version => $5,
version => "$4/$6",
};
return $uam->{$ua};
}
# honest Opera
# Opera/9.52 (X11; Linux i686; U; en)
if ($ua =~ m{^(Opera)/((\d).\d+) \((?:[^;]+; )?([^;]+); U(?:; ([-a-z]*))\)}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
version => $2,
major_version => $3,
os => canonical_os($4),
lang => $5,
};
return $uam->{$ua};
}
# Opera masquerading as MSIE
if ($ua =~ m{^Mozilla/4.0 \(compatible;.* MSIE \d\.\d; ([^\)]+)\) (Opera) ((\d).\d+) \[\w\w\]}) {
$uam->{$ua} = {
robot => 0,
useragent => $2,
version => $3,
major_version => $4,
os => canonical_os($1),
};
return $uam->{$ua};
}
if ($ua =~ m{^Mozilla/4.0 \(compatible; MSIE \d\.\d; (?:.*; )*([^;]+); ([-a-z]*)\) (Opera) ((\d).\d+)$}) {
$uam->{$ua} = {
robot => 0,
useragent => $3,
version => $4,
major_version => $5,
os => canonical_os($1),
lang => $2,
};
return $uam->{$ua};
}
# Opera masquerading as Mozilla
# Mozilla/5.0 (Linux 2.4.2 i386; U) Opera 6.0 [en]
if ($ua =~ m{^Mozilla/\d.\d+ \(([^;]+); [IU]\) (Opera) ((\d).\d+) \[\w\w\]}) {
$uam->{$ua} = {
robot => 0,
useragent => $2,
version => $3,
major_version => $4,
os => canonical_os($1),
};
return $uam->{$ua};
}
if ($ua =~ m{^Mozilla/\d\.\d \(compatible; (Konqueror)/([.\d]+); (\S+)\)}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
version => $2,
major_version => $2,
os => canonical_os($3),
};
return $uam->{$ua};
}
if ($ua =~ m{^Mozilla/\d\.\d \(compatible; (Konqueror)/(([.\d]+)(?:-rc\d+)); ([^;\)]+).*\)}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
version => $2,
major_version => $3,
os => canonical_os($4),
};
return $uam->{$ua};
}
if ($ua =~ m{^(sitecheck.internetseer.com) \(For more info see: http://sitecheck.internetseer.com\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# Mozilla/4.77 [en] (Windows NT 5.0; U)/ 37 2.26 0.15
# Mozilla/4.79 [en] (Windows NT 5.0; U)/ 1288 4.94 0.62
if ($ua =~ m{^Mozilla/((\d\.\d)\d*) \[\w\w\].* \(([^;]+); [IU](?:;( [^;\)]*))?\)}) {
$uam->{$ua} = {
robot => 0,
useragent => "Netscape",
version => $1,
major_version => $2,
os => canonical_os($3 . ($4 || "")),
};
return $uam->{$ua};
}
# Mozilla/4.73 (Macintosh; U; PPC)/ 332 20.31 1.31
# apparently the Mac version of Netscape omits the language tag
if ($ua =~ m{^Mozilla/((\d\.\d)\d*) \((Macintosh); [IU](?:;( [^;\)]*))?\)}) {
$uam->{$ua} = {
robot => 0,
useragent => "Netscape",
version => $1,
major_version => $2,
os => canonical_os($3 . ($4 || "")),
};
return $uam->{$ua};
}
# don't really know what that is.
# The access patterns look human, not robot-like,
# so it's probably some browser behind a UA-mangling proxy
# Mozilla/3.01 (compatible;)
if ($ua =~ m{^Mozilla/((3).01) \(compatible;\)$}) {
$uam->{$ua} = {
robot => 0,
useragent => "masquerades as Mozilla compatible",
version => $1,
major_version => $2,
};
return $uam->{$ua};
}
# Altavista search bot:
# Scooter/3.2.SF0
# Scooter/3.3
if ($ua =~ m{^(Scooter)/((\d).*)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# http://www.almaden.ibm.com/cs/crawler [c01]
if ($ua =~ m{^http://www.(almaden.ibm.com)/cs/crawler \[(c01)\]}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $2,
};
return $uam->{$ua};
}
# Mercator-2.0
if ($ua =~ m{^(Mercator)-((\d).\d)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# Mozilla/4.0 (compatible; BorderManager 3.0)
# Bordermanager (http://www.novell.com/products/bordermanager/)
# seems to be a Novell Proxy server.
# classifing that as a user agent is a bit wrong,
if ($ua =~ m{^Mozilla/4.0 \(compatible; (BorderManager) ((3).0)\)$}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
version => $2,
major_version => $3,
os => canonical_os("Novell"),
};
return $uam->{$ua};
}
# contype
# Adobe Acrobat Reader?
if ($ua =~ m{^(contype)$}) {
$uam->{$ua} = {
robot => 0,
useragent => $1,
};
return $uam->{$ua};
}
# Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)
if ($ua =~ m{^(Firefly)/((\d).\d)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# gpv3.1/ 4462 17.13 2.16
# No idea what that is. Provisionally classifying as bot
if ($ua =~ m{^(gpv)((\d).\d)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# ia_archiver/ 62 3.79 0.25
if ($ua =~ m{^(ia_archiver)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# LinkWalker/ 322 1.24 0.16
if ($ua =~ m{^(LinkWalker)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# Melvil3.0 http://www.uma.at/ 8291 31.82 4.02
# No idea what that is. Provisionally classifying as bot
if ($ua =~ m{^(Melvil)((\d).\d) http://www.uma.at$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# Mozilla/4.0 (compatible; grub-client-1.0.6; Crawl your own stuff with http://grub.org)/ 307 1.18 0.15
if ($ua =~ m{^Mozilla/4.0 \(compatible; (grub-client)-((\d)[.\d]+); Crawl your own stuff with http://grub.org\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# Mozilla/4.75 (compatible; PortalBSpider; spider@portalb.com)/ 513 1.97 0.25
if ($ua =~ m{^Mozilla/4.75 \(compatible; PortalBSpider; spider\@portalb.com\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# NPBot-1/2.0 (http://www.nameprotect.com/botinfo.html)/ 61 3.73 0.24
if ($ua =~ m{^(NPBot)-((\d)/[.\d]+) \(http://www.nameprotect.com/botinfo.html\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# oBot/ 75 4.59 0.30
if ($ua =~ m{^(oBot)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# TeWIS/0.3 TeWIS/0.3 libwww-perl/5.53/ 307 1.18 0.15
if ($ua =~ m{^(TeWIS)/((\d\.\d)) TeWIS/\d\.\d libwww-perl/\d\.\d+$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# xFIND2000/0.8 RPT-HTTPClient/0.3-2/ 3761 14.43 1.82
# No idea what that is. Provisionally classifying as bot
if ($ua =~ m{^(xFIND2000)/((\d.\d)) RPT-HTTPClient/}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# htdig/3.1.6 (webmaster@luga.at)/ 1241844 97.96 67.33
if ($ua =~ m{^(htdig)/((\d.\d)\.\d) \(.*\@.*\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# Gigabot/1.0
# Gigabot/2.0 (http://www.gigablast.com/spider.html) 9165 6.42 1.04
if ($ua =~ m{^(Gigabot)/(\d.\d)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $2,
};
return $uam->{$ua};
}
# Gigabot/2.0; http://www.gigablast.com/spider.html/ 16888 14.41 1.03
if ($ua =~ m{^(Gigabot)/(\d.\d); http://www.gigablast.com/spider.html$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $2,
};
return $uam->{$ua};
}
# msnbot/2.0b (+http://search.msn.com/msnbot.htm)
# msnbot/1.0 (+http://search.msn.com/msnbot.htm)
if ($ua =~ m{^(msnbot)(?:-media)?/(\d.\d\w?) \(\+http://search.msn.com/msnbot.htm\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $2,
};
return $uam->{$ua};
}
# check_http/1.24.2.4 (nagios-plugins )
if ($ua =~ m{^(check_http)/(\d+(.\d+)+) \(nagios-plugins \)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $2,
};
return $uam->{$ua};
}
# check_http/v2053 (nagios-plugins 1.4.13)
if ($ua =~ m{^(check_http)/v\d+ \(nagios-plugins (\d+(.\d+)+)\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $2,
};
return $uam->{$ua};
}
# Intraseek_1
if ($ua =~ m{^Intraseek_1$}) {
$uam->{$ua} = {
robot => 1,
useragent => 'Intraseek',
version => 1,
major_version => 1,
};
return $uam->{$ua};
}
# WebVac (webmaster@pita.stanford.edu WebVac.org ) 2199 3.75 0.53
if ($ua =~ m{^(WebVac) \(webmaster\@pita.stanford.edu WebVac.org \)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => "",
major_version => "",
};
return $uam->{$ua};
}
# ichiro/1.0 (ichiro@nttr.co.jp) 1425 2.43 0.34
if ($ua =~ m{^(ichiro)/((\d+)(.\d+)+) \(ichiro\@nttr.co.jp\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
version => $2,
major_version => $3,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 1180 2.01 0.28
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Googlebot)/(\d+).(\d+); \+http://www.google.com/bot.html\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => "$2.$3"
};
return $uam->{$ua};
}
# Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_4; de-de) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1
# Mozilla/5.0 (Macintosh; U; PPC Mac OS X; de-de) AppleWebKit/312.1 (KHTML, like Gecko) Safari/312 849 1.45 0.20
if ($ua =~ m{^Mozilla/5.0 \(Macintosh; U; (.* Mac OS X)(?: [_0-9]+)?; [-a-z]+\) AppleWebKit/[.0-9]+ \(KHTML, like Gecko\)(?: Version/[.0-9]+)? (Safari)/((\d+)[.\w]+)}) {
$uam->{$ua} = {
robot => 0,
os => canonical_os($1),
useragent => $2,
major_version => $4,
version => $3,
};
return $uam->{$ua};
}
# Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3 1420 2.42 0.34
if ($ua =~ m{^(Seekbot)/(\d+).(\d+) \(http://www.seekbot.net/bot.html\) HTTPFetcher/\d+.\d+$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => "$2.$3"
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; heritrix/1.6.0 +http://innovationblog.com) 2624 15.88 2.41
if ($ua =~ m{^(?:Mozilla/5.0|webcrawler) \(compatible; (heritrix)/((\d+)[-.\d]+) \+\+?http://.*\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# psbot/0.1 (+http://www.picsearch.com/bot.html) 2252 13.63 2.07
if ($ua =~ m{^(psbot)/((\d+).\d+) \(\+http://www.picsearch.com/bot.html\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# Ocelli/1.3 (http://www.globalspec.com/Ocelli) 1482 8.97 1.36
if ($ua =~ m{^(Ocelli)/((\d+).\d+) \(http://www.globalspec.com/Ocelli\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# Googlebot-Image/1.0 1282 7.76 1.18
if ($ua =~ m{^(Googlebot-Image)/((\d+).\d+)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# Francis/2.0 (francis@neomo.de http://www.neomo.de/pages/crawler.php) 5559 14.57 1.33
if ($ua =~ m{^(Francis)/((\d+).\d+) \(francis\@neomo.de http://www.neomo.de/pages/crawler.php\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Herold; +http://www.herold.at) 3637 9.53 0.87
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Herold); \+http://www.herold.at\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Interseek/3.1) 12415 67.90 16.66
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Interseek)/((\d+).\d+)\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# mnogo 3313 18.12 4.45
if ($ua =~ m{^(mnogo)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; TridentSpider/3.1)/ 10556 21.35 1.62
if ($ua =~ m{^Mozilla/5.0 \(compatible; (TridentSpider)/((\d).\d)\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2
};
return $uam->{$ua};
}
# FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus// 5955 12.05 0.91
if ($ua =~ m{^FAST Enterprise Crawler (\d) / Scirus scirus-crawler\@fast.no; http://www.scirus.com/srsapp/contactus/$}) {
$uam->{$ua} = {
robot => 1,
useragent => 'FAST Crawler',
major_version => $1,
version => $1
};
return $uam->{$ua};
}
# mnogo 47921 33.58 5.43
if ($ua =~ m{^(mnogo)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# MnoGoSearch/3.2.31
if ($ua =~ m{^MnoGoSearch/((\d+\.\d+)\.\d+)$}) {
$uam->{$ua} = {
robot => 1,
useragent => "mnogo",
version => $1,
major_version => $2,
};
return $uam->{$ua};
}
if ($ua =~ m{^(Aport)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Interseek/3.1)
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Interseek)/(\d.\d+)\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2
};
return $uam->{$ua};
}
# Mozilla/4.0 (compatible; DepSpid/5.07; +http://about.depspid.net) 12838 45.87 8.04
if ($ua =~ m{^Mozilla/4.0 \(compatible; (DepSpid)/(\d.\d+); \+http://about.depspid.net\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2
};
return $uam->{$ua};
}
# libwww-perl/5.803
if ($ua =~ m{^(libwww-perl)/(\d\.\d+)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2
};
return $uam->{$ua};
}
# Java/1.4.2_10
if ($ua =~ m{^(Java)/(\d\.\d+)(\.[\d_]+)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => "$2$3"
};
return $uam->{$ua};
}
# Xenu Link Sleuth 1.2i
if ($ua =~ m{^(Xenu Link Sleuth) (\d\.(.+))$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => "$2$3"
};
return $uam->{$ua};
}
# Google Chrome
if ($ua =~ m{^Mozilla/5.0 \([^;]*; .; ([^;]*); [-\w]*\) AppleWebKit/\d+.\d+ \(KHTML, like Gecko\) (Chrome)/((\d+\.\d+)[.\d]*) Safari/\d+.\d+$}) {
$uam->{$ua} = {
robot => 0,
useragent => $2,
major_version => $4,
version => $3,
os => canonical_os($1),
};
return $uam->{$ua};
}
# Echoping/5.2.0
if ($ua =~ m{^(Echoping)/(\d+\.[.\d]+)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Shelob (shelob@gmx.net) 1858 31.93 2.89
if ($ua =~ m{^(Shelob) \(shelob\@gmx.net\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => '',
version => '',
};
return $uam->{$ua};
}
# SiteUptime.com 448 11.31 0.70
if ($ua =~ m{^(SiteUptime.com)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => '',
version => '',
};
return $uam->{$ua};
}
# Mozilla/5.0 (SymbianOS/9.1; U; en-us) AppleWebKit/413 (KHTML, like Gecko) Safari/413 es65
if ($ua =~ m{^Mozilla/5.0 \(([^;]*); .; [-\w]*\) AppleWebKit/\d+ \(KHTML, like Gecko\) (Safari)/(\d+)}) {
$uam->{$ua} = {
robot => 0,
useragent => $2,
major_version => $3,
version => $3,
os => canonical_os($1),
};
return $uam->{$ua};
}
# Snoopy v1.2.3
if ($ua =~ m{^(Snoopy) v([.\d]+)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; egothor/8.0g; +http://ego.ms.mff.cuni.cz/)
if ($ua =~ m{^Mozilla/5.0 \(compatible; (egothor)/(\d+\.\w+); \+http://ego.ms.mff.cuni.cz/\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# remi 1.5e using w-3-m-i-r (http://nep.repec.org)
# remi 1.5c using w3mir (socionet@socionet.ru)
if ($ua =~ m{^(remi) (\d+\.\d+)([a-z]*) using w-?3-?m-?i-?r }) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => "$2$3",
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html
if ($ua =~ m{Mozilla/5.0 \(compatible; (IDBot)/(\d+.\d+); \+http://www.id-search.org/bot.html\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)
# Mozilla/5.0 (Twiceler-0.9 http://www.cuil.com/twiceler/robot.html)
if ($ua =~ m{^Mozilla/5.0 \((Twiceler)-(0.9) http://www.cuill?.com/twiceler/robot.html\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Yandex/1.01.001 (compatible; Win16; P)
if ($ua =~ m{^(Yandex)/((\d+)\.[\d.]+) \(compatible; Win16; [A-Z]\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2,
};
return $uam->{$ua};
}
# Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)
if ($ua =~ m{^(Yanga) WorldSearch Bot v((1).1/beta) \(http://www.yanga.co.uk/\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2,
};
return $uam->{$ua};
}
# Jyxobot/1
if ($ua =~ m{^(Jyxobot)/(\d+)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)
# SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)
if ($ua =~ m{\(compatible; (Googlebot-Mobile)/((\d+).\d+); \+http://www.google.com/bot.html\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $3,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (X11; Linux i686; rv:2.0b11) Gecko/20100101 Firefox/4.0b11
# Mozilla/5.0 (Windows NT 6.0; rv:2.0b10) Gecko/20100101 Firefox/4.0b10
if ($ua =~ m{^Mozilla/5.0 \(((?:Windows|X11; Linux|\w+) [^;]*); rv:([0-9.b]*)\) Gecko/(\d+) Firefox/((\d+\.\d+)(?:[.b]\d+)?)$}) {
$uam->{$ua} = {
robot => 0,
useragent => 'Firefox',
os => canonical_os($1),
major_version => $5,
version => $4,
};
return $uam->{$ua};
}
# Netluchs/Nutch-1.0-dev ( ; http://www.netluchs.de/; _do_not_spam_me___humans_please_use_info_at_netluchs.de_without_the_dash)
if ($ua =~ m{^(Netluchs/Nutch)-(\d\.\S*) \( ; http://www.netluchs.de/; _do_not_spam_me___humans_please_use_info_at_netluchs.de_without_the_dash\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; DotBot/1.1; http://www.dotnetdotcom.org/, crawler@dotnetdotcom.org) 1643 17.03 2.17
if ($ua =~ m{^Mozilla/5.0 \(compatible; (DotBot)/(\d.\d); http://www.dotnetdotcom.org/, crawler\@dotnetdotcom.org\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Baiduspider+(+http://www.baidu.com/search/spider.htm) 1100 13.74 1.45
if ($ua =~ m{^(Baiduspider)\+\(\+http://\w+.baidu.\w+/\S+\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => '',
version => '',
};
return $uam->{$ua};
}
# findlinks/1.1.6-beta1%20(+http://wortschatz.uni-leipzig.de/findlinks/)
if ($ua =~ m{^findlinks/((\d.\d.\d)-beta\d+) \(\+http://wortschatz.uni-leipzig.de/findlinks/\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => "wortschatz-findlinks",
major_version => $2,
version => $1,
};
return $uam->{$ua};
}
# Eurobot/1.1%20(http://eurobot.ayell.eu)
if ($ua =~ m{^(Eurobot)/(\d.\d) \(http://eurobot.ayell.eu\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Charlotte/1.1; http://www.searchme.com/support/)
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Charlotte)/(\d.\d); http://www.searchme.com/support/\)$}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)
if ($ua =~ m{^Mozilla/5.0 \(compatible; (Exabot)(?:-images)?/(\d.\d)(?: \S+)?; \+http://www.exabot.com/go/robot\)}) {
$uam->{$ua} = {
robot => 1,
useragent => $1,
major_version => $2,
version => $2,
};
return $uam->{$ua};
}
# Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.11) Gecko GranParadiso/3.0.11
# looks like AskJeeves in disguise
if ($ua =~ m{^Mozilla/5.0 \(X11; U; Linux i686; en-US; rv:1.9.0.11\) Gecko GranParadiso/3.0.11}) {
$uam->{$ua} = {
robot => 1,
useragent => "Ask Jeeves/Teoma",
major_version => "stealth",
version => "stealth",
};
return $uam->{$ua};
}
# Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B367 Safari/531.21.10
if ($ua =~ m{^Mozilla/5.0 \(iPad; U; CPU OS (\w+) like Mac OS X; [-a-z]+\) AppleWebKit/((\d+)(?:\.\d+)*) \(KHTML, like Gecko\) Version/[.0-9]+ Mobile/\w+ Safari/\2$}) {
$uam->{$ua} = {
robot => 0,
useragent => 'Safari',
os => canonical_os("iOS/iPad"),
major_version => $3,
version => $2,
};
return $uam->{$ua};
}
$uam->{$ua} = {
useragent => $ua
};
return $uam->{$ua};
}