Yields a hashmap for every line:
require 'date'
class Apache
def self.each_request(src=$stdin)
ip_pat=/(?:[0-9]+\.){3}[0-9]+/
date_pat=/\[[^\]]+\]/
req_pat=/"([A-Z]+)\s([^\s]+)\s([^\s]+)"/
ref_pat=/"([^"]+)"/
date_fmt = '[%d/%b/%Y:%H:%M:%S %Z]'
apache_pat = Regexp.new( "(#{ip_pat})\s([^\s]+)\s([^\s]+)\s" <<
"(#{date_pat})\s#{req_pat}\s([0-9]+)\s(-|[0-9]+)\s#{ref_pat}\s#{ref_pat}")
src.each_line do |l|
m = apache_pat.match l
if m
r = { :ip => m[1],
:uid => m[2],
:auth => m[3],
:date => DateTime.strptime(m[4],date_fmt),
:method => m[5],
:url => m[6],
:http => m[7],
:status => m[8],
:bytes => m[9],
:referrer => m[10],
:agent => m[11] }
yield r
else
$stderr.puts "Unparseable line: '#{l}'"
end
end
end
end
maybe there are faster ways, but it’s quite convenient this way.