ruby: parse Apache Serverlogs

Wed, 01. Jul 2009

Categories: en sysadmin Tags: apache logfile parse Ruby serverlogs

Yields a hashmap for every line:

require 'date'

class Apache
  def self.each_request(src=$stdin)
    ip_pat=/(?:[0-9]+\.){3}[0-9]+/
    date_pat=/\[[^\]]+\]/
    req_pat=/"([A-Z]+)\s([^\s]+)\s([^\s]+)"/
    ref_pat=/"([^"]+)"/
    date_fmt = '[%d/%b/%Y:%H:%M:%S %Z]'
    apache_pat = Regexp.new( "(#{ip_pat})\s([^\s]+)\s([^\s]+)\s" <<
      "(#{date_pat})\s#{req_pat}\s([0-9]+)\s(-|[0-9]+)\s#{ref_pat}\s#{ref_pat}")
    src.each_line do |l|
      m = apache_pat.match l
      if m
        r = { :ip => m[1],
          :uid => m[2],
          :auth => m[3],
          :date => DateTime.strptime(m[4],date_fmt),
          :method => m[5],
          :url => m[6],
          :http => m[7],
          :status => m[8],
          :bytes => m[9],
          :referrer => m[10],
          :agent => m[11] }
        yield r
       else
        $stderr.puts "Unparseable line: '#{l}'"
      end
    end
  end
end

maybe there are faster ways, but it’s quite convenient this way.