|
PLACES TO GO:
|
|
UNIQUE VISITORS
#!/usr/bin/perl -w
use strict;
use Socket;
####################################################################
# UNIQUE VISITORS - A very simple utility to parse Apache log files
# and print a listing of unique visiting domains.
# Copyleft 2005 - Nathan E. Pralle
#
# DESCRIPTION: This utility parses an Apache log file and determines
# how many unique domains visited your site, then compiles
# a list of them (good for directing to an email or logfile).
# It uses a cache file to speed up the process as well (not
# having to do DNS lookups all the time). It only handles
# the first level of a domain, IE: blah.mchsi.com and dink.mchsi.com
# will just be listed as 2 entries from mchsi.com.
#
# SYNTAX: perl unique_visitors.pl <days>
# Where <days> is the number of days back you want to look.
# I usually run mine at 12:30am and use a '1' to get the previous day.
#
# EXTRA: You must have a file available called "unique_domains.dat"
# in the same location as this binary and chmod'ed to 777.
# This is the cache file for this binary. I recommend:
# 'touch unique_domains.dat;chmod 777 unique_domains.dat'
#
# CONTACT: Questions, comments, etc.
# http://www.nathanpralle.com/contact.html
####################################################################
########################
# CONFIGURATION OPTIONS
#full path to your Apache access_log file
my $logfile="/path/to/access_log";
#######################
my $offset=shift||0;
my @months=qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;
my @time=localtime;
my $year=$time[5]+1900;
my $shortmonth=$months[$time[4]];
my $day;
if(($time[3] - $offset)<1){
$shortmonth=$months[$time[4] - 1];
$day=30;
}
else{
$day=pad($time[3] - $offset,2);
}
my $hour=pad($time[2],2);
my $min=pad($time[1],2);
my $sec=pad($time[0],2);
my $formatted="$day/$shortmonth/$year";
my $domainstring;
my @temparray;
my %iparray;
my $counter=0;
my $hitcounter=0;
my %domainlookups;
open(DOM,"unique_domains.dat");
while(<DOM>){
chomp;
my($ip,$domain)=split(/\t/);
$domainlookups{$ip}=$domain;
}
close(DOM);
my @logfile;
open(LOGFILE,"$logfile")||die("can't open logfile\n");
@logfile=<LOGFILE>;
close(LOGFILE);
foreach my $line(@logfile){
if($line=~/$formatted/){
$counter++;
@temparray=split(/ /,$line);
my $lookup=$domainlookups{$temparray[0]};
my $host;
if(!defined $lookup){
my $iaddr=inet_aton("$temparray[0]");
$host=gethostbyaddr($iaddr,AF_INET);
}
else{
$hitcounter++;
$host=$lookup;
}
if($host){
$domainlookups{$temparray[0]}=$host;
if($host=~/[0-9]$/){
$domainstring=$host;
}
else{
my @domainarray=split(/\./,$host);
my $limit=scalar @domainarray;
$limit--;
if(length($domainarray[$limit])<3){
$domainstring=$domainarray[$limit - 2].'.'.$domainarray[$limit - 1].'.'.$domainarray[$limit];
}
else{
$domainstring=$domainarray[$limit - 1].'.'.$domainarray[$limit];
}
}
}
else{
$domainstring=$temparray[0];
$domainlookups{$temparray[0]}=$temparray[0];
}
my $num=$iparray{$domainstring};
if($num){
$iparray{$domainstring}=$num+1;
}
else{
$iparray{$domainstring}=1;
}
}
}
open(DOM,">unique_domains.dat")||die("Can't open datafile for writing!\n");
foreach my $keyitem (keys %domainlookups){
print DOM "$keyitem\t$domainlookups{$keyitem}\n";
}
close(DOM);
print "Visitors on $shortmonth $day, $year: $counter ($hitcounter cached hits)\n\n";
foreach my $key (sort hashValueAscendingNum (keys(%iparray))){
print pad($iparray{$key},4)." $key\n";
}
exit;
sub pad{
my $thing=shift;
my $len=shift;
while(length($thing)<$len){
$thing="0".$thing;
}
return $thing;
}
sub hashValueAscendingNum{
$iparray{$a}<=>$iparray{$b};
}
This site and all content (C)2002-2008 Nathan E. Pralle (www.nathanpralle.com).
|