lampware banner lampware banner lampware banner
Home FAQ Careers Clients Utilities Downloads
httpd

Apache logging to MySQL
Apache by default logs to a file defined in the httpd[2].conf file.  Then a tool such as webalizer can be used to display the logs on a webpage.  That works really good for basic site statistic.  However, if the records are logged to a database SQL analysis can be preformed very easily!

Here is how to make it happen:

  1. Setup a database/table in MySQL for the log data.  I am assuming you know a little bit about MySQL.  Use "mysql" either by redirecting the following commands in the kernel's shell using the "mysql" command or by entering mysql's shell and enter the commands there.
    CREATE DATABASE apache;
    
    USE apache;
    
    CREATE TABLE httpdlog
    (
      dt      DATETIME NOT NULL,     # request date
      method  VARCHAR(4) NOT NULL,   # request method (GET, PUT, etc.)
      request BLOB NOT NULL,         # first line of request
      url     BLOB NOT NULL,         # URL path
      referer BLOB NOT NULL,         # referer to current URL
      status  INT NOT NULL,          # request status
      size    INT,                   # number of bytes transferred
      time    INT,                   # time taken to serve the request, in seconds
      host    VARCHAR(255) NOT NULL, # client host
      agent   VARCHAR(255)           # user agent
    )TYPE=MyISAM;
                    
  2. Use a program written in a language of your choice to log the records to the database.  The following program example is written in C.  Set the "#define"s to your system's values.
    /* © 2004 lampware, inc. */
    /* program written by kemiko (KEnt MIchael KOhlmeyer) of phx, az, usa */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include "/usr/include/mysql/mysql.h"
     
    #define HOSTNAME "hostname"
    #define DATABASE "database"
    #define USERNAME "username"
    #define PASSWORD "password"
    #define FILENAME "apache2mysql.log"
    
    int main( int argc, char **argv )
    {
      FILE *fp = NULL;
      char buffer[2048] = "";
      char *field = 0;
      char query[2048] = "";
      int i = 0;
      MYSQL mysql;
         
      if( ! ( fp = fopen( FILENAME, "a" ) ) )
        exit( EOF );
       
      mysql_init( &mysql );
         
      if( !mysql_real_connect( &mysql, HOSTNAME, USERNAME, PASSWORD, DATABASE, 0, NULL, 0 ) )
        fprintf( fp, "Failed to connect to database: %s\n", mysql_error( &mysql ) );
       
      sprintf( query, "INSERT INTO httpdlog VALUES(" );
         
      fgets( buffer, sizeof( buffer ), stdin );
           
      if( *buffer )
      {
        field = strtok( buffer, "\t" );
        i = 1;
        while( field != NULL )
        {
          if( strcmp( field, "\n" ) )
          {
            if( i > 1 )
              strcat( query, "," );
            if( i < 6 || i == 8 )
            {
              strcat( query, "'" );
              strcat( query, field );
              strcat( query, "'" );
            }
            else
            {
              strcat( query, field );
            }
          }
          field = strtok( NULL, "\t" );
          i++;
        }
        strcat( query, ")" );
       
        if( mysql_query( &mysql, query ) )
          fprintf( fp, "Failed to insert the record: %s\nThe failed query was: %s\n",
                   mysql_error( &mysql ), query );
      }
      fclose( fp );
    }
    
  3. Choose a directory for this program's executable to live in.  I like an Apache directory since Apache will be the one using it.  Make sure user "apache" or "httpd" can execute this executable.

  4. Add the following two lines to the end of httpd[2].conf:
    • LogFormat "%{%Y-%m-%d %H:%M:%S}t\t%h\t%m\t%U\t%>s\t%b\t%{User-Agent}i\t\n" mysql
    • CustomLog "|<executable directory>/<executable name>" mysql

  5. Restart httpd by running "service httpd restart" as "root" in the kernel's shell.

  6. Run some SQL queries to see if the table is being populated and/or check the log file (#define FILENAME) for errors.
    mysql> select count(*) from httpdlog;
    mysql> select count(distinct host) from httpdlog;
    mysql> select count(distinct url) from httpdlog;
    mysql> select url, count(*) as count from httpdlog group by url order by count desc limit 10;
    mysql> select min(dt), max(dt) from httpdlog;
    mysql> select from_days(to_days(dt)) as day, count(*) from httpdlog group by day;
    mysql> select hour(dt) as hour, count(*) from httpdlog group by hour;
    mysql> select count(*)/(to_days(max(dt)) - to_days(min(dt)) + 1) from httpdlog;
    mysql> select max(length(url)) from httpdlog;
    mysql> select max(length(request)) from httpdlog;
    mysql> select max(length(referer)) from httpdlog;
    mysql> select dt, url, size, time from httpdlog where time > 0;
                    
  7. Write and use a nice little web interface to run these log queries.  Below is example code.

    web_usage.css
    H1, H2, H3 {
                FONT-STYLE: bold
               }
    BODY {
          COLOR: #FFFFFF;
          BACKGROUND-COLOR: #000000;
          MARGIN-TOP: 0px;
          MARGIN-LEFT: 0px;
          MARGIN-RIGHT: 0px;
          MARGIN-BOTTOM: 0px;
          FONT-FAMILY: Tahoma, Arial;
          FONT-SIZE: 14px;
         }
    TABLE {
           BORDER: 0px;
           FONT-FAMILY: Tahoma, Arial;
           FONT-SIZE: 14pt;
          }
    TD {
        FONT-FAMILY: Tahoma, Arial;
        FONT-SIZE: 16px;
        FONT-WEIGHT: bold;
       }
    A:LINK {
            COLOR: #FFFFFF;
            FONT-FAMILY: Tahoma, Arial;
            FONT-WEIGHT: bold;
            TEXT-DECORATION: none;
           }
    A:ACTIVE {
              COLOR: #FFFFFF;
              FONT-FAMILY: Tahoma, Arial;
              FONT-WEIGHT: bold;
              TEXT-DECORATION: none;
             }
    A:VISITED {
               COLOR: #FFFFFF;
               FONT-FAMILY: Tahoma, Arial;
               FONT-WEIGHT: bold;
               TEXT-DECORATION: none;
              }
    A:HOVER {
             COLOR: #297082;
             FONT-FAMILY: Tahoma, Arial;
             FONT-WEIGHT: bold
             TEXT-DECORATION: none;
            }
    
    web_usage.htm
    <HTML>
     <!-- © 2004 lampware, inc. -->
     <!-- script written by kemiko (KEnt MIchael KOhlmeyer) of phx, az, usa -->
     <HEAD>
      <TITLE>
       Web Usage
      </TITLE>
      <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
      <LINK REL="stylesheet" HREF="web_usage.css" TYPE="text/css">
     </HEAD>
     <BODY>
      <TABLE WIDTH="100%" CELLPADDING="0" CELLSPACING="0" BORDER="0" BORDERCOLOR="000000">
       <TR>
        <TD>
         <TABLE WIDTH="100%" BORDER="0" BORDERCOLOR="000000">
          <TR>
           <TD HEIGHT="25" ALIGN="CENTER" WIDTH="100%" BGCOLOR="155C6E">
            <B>Web Usage</B>
           </TD>
          </TR>
         </TABLE>
        </TD>
       </TR>
      </TABLE>
      <TABLE BORDER="0" CELLSPACING="5">
       <TR>
        <TD>
         <B>
          <TABLE BORDER="0" CELLSPACING="5">
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT * FROM httpdlog WHERE
     date_format( dt, '%y' ) = date_format( curdate( ), '%y' ) AND
     date_format( dt, '%m' ) = date_format( curdate( ), '%m' ) AND
     date_format( dt, '%e' ) = date_format( curdate( ), '%e' )"><B>all
     today</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT dt, url, status, time,
     host from httpdlog WHERE 32 <> SUBSTRING_INDEX( SUBSTRING_INDEX( host,
     '.', 3 ), '.', -1 ) and 172 <> SUBSTRING_INDEX( SUBSTRING_INDEX( host,
     '.', 1 ), '.', -1 )"><B>outside SBU</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT dt, url, status, time,
     size, host FROM httpdlog WHERE time > 0"><B>time > 0</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT
     SUBSTRING_INDEX(SUBSTRING_INDEX(url,'?',1),'.',-1) AS extension,
     COUNT(size) AS requests, SUM(size) AS bytes, AVG(size) AS
     'bytes/request' FROM httpdlog WHERE url LIKE '%.%' GROUP BY
     extension"><B>by filetype</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT dt, url, status, time,
     host FROM httpdlog WHERE status <> 304 AND status <>
     200"><B>errors</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT url, count(url) AS
     count FROM httpdlog GROUP BY url"><B>url</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT url, count(url) AS
     count FROM httpdlog WHERE url NOT LIKE '%InternetImages%' GROUP BY
     url"><B>url minus images</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT from_days(to_days(dt))
     AS day, count(*) AS count FROM httpdlog GROUP BY day"><B>total hits by
     day</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT host, count(*) AS count
     FROM httpdlog WHERE date_format( dt, '%y' ) = date_format( curdate( ),
     '%y' ) AND date_format( dt, '%m' ) = date_format( curdate( ), '%m' )
     AND date_format( dt, '%e' ) = date_format( curdate( ), '%e' ) GROUP BY
     host"><B>today by host</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT agent, host FROM
     httpdlog WHERE agent LIKE '%Hotbar%' AND date_format( dt, '%y' ) =
     date_format( curdate( ), '%y' ) AND date_format( dt, '%m' ) =
     date_format( curdate( ), '%m' ) GROUP BY agent"><B>agents with
     hotbar</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT agent, host FROM
     httpdlog WHERE agent LIKE '%Linux%' AND date_format( dt, '%y' ) =
     date_format( curdate( ), '%y' ) AND date_format( dt, '%m' ) =
     date_format( curdate( ), '%m' ) GROUP BY agent"><B>linux
     agents</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query=SELECT * FROM httpdlog ORDER
     BY dt DESC LIMIT 10;"><B>all limit 10</B></A>
            </TD>
           </TR>
           <TR>
            <TD NOWRAP="NOWRAP">
             <A HREF="web_usage.php?query="><B></B></A>
            </TD>
           </TR>
          </TABLE>
         </B>
        </TD>
       </TR>
      </TABLE>
     </BODY>
    </HTML>
    
    web_usage.php
    <HTML>
     <!-- © 2004 lampware, inc. -->
     <!-- script written by kemiko (KEnt MIchael KOhlmeyer) of phx, az, usa -->
     <HEAD>
      <TITLE>
    <?
      $query = str_replace( "\\", "", $query );
      printf( "   Web Usage: %s\n", $query );
    ?>
      </TITLE>
      <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">
      <LINK REL="stylesheet" HREF="./sql_usage.css" TYPE="text/css">
     </HEAD>
     <BODY>
    <?
      $mysql_link = mysql_connect( "localhost", "readonly", "onlyread" );
     
      mysql_select_db( "apache", $mysql_link );
     
      print( "  <TABLE BORDER=\"0\" WIDTH=\"100%\" CELLSPACING=\"0\" CELLPADDING=\"0\">\n" );
      print( "   <TR>\n" );
      print( "    <TD BGCOLOR=\"155C6E\" NOWRAP=\"NOWRAP\">\n" );
      $query = str_replace( "\\", "", $query );
      printf( "       SQL Usage: %s\n", $query );
      print( "    </TD>\n" );
      print( "   </TR>\n " );
      print( "  </TABLE>\n" );
     
      $mysql_result = mysql_query( $query, $mysql_link );
     
      if( $mysql_result )
      {
        print( "  <TABLE BORDER=\"0\" WIDTH=\"100%\" CELLSPACING=\"0\" CELLPADDING=\"0\">\n" );
        print( "   <TR>\n" );
        while( $i < mysql_num_fields( $mysql_result ) )
        {
          print( "    <TD BGCOLOR=\"000000\">\n" );
          $field = mysql_fetch_field( $mysql_result );
          printf( "       %s\n", $field->name );
          print( "    </TD>\n" );
          $i++;
        }
        $columns = $i;
        print( "   </TR>\n" );
     
        $i = 0;
        while( $row = mysql_fetch_row( $mysql_result ) )
        {
          print( "   <TR>\n" );
          if( $i % 2 )
            $BGCOLOR = "156E53";
          else
            $BGCOLOR = "155C6E";
     
          for( $j = 0; $j < mysql_num_fields( $mysql_result ); $j++ )
          {
            printf( "    <TD BGCOLOR=\"$BGCOLOR\" NOWRAP=\"NOWRAP\">\n" );
            if( $row[$j] || $row[$j] == 0 )
              printf( "     <FONT COLOR=\"%s\">  %s</FONT>\n", "FFFFFF", $row[$j] );
            else
              printf( "      \n" );
            printf( "    </TD>\n" );
          }
          print( "   </TR>\n" );
     
          $i++;
        }
        $total = $i;
     
        print( "   <TR>\n" );
        for( $i = 0; $i < $columns; $i++ )
        {
          print( "    <TD BGCOLOR=\"000000\">\n" );
          if( $i == 0 )
            print( "       total:\n" );
          else if( $i == $columns - 1 )
            print( "       $total\n" );
          else
            print( "      \n" );
          print( "    </TD>\n " );
        }
        print( "   </TR>\n " );
        print( "  </TABLE>\n" );
      }
      else
      {
        printf( "    <B>Failed to read the record(s): %s</B>\n", mysql_error( $mysql_link ) );
      }
    ?>
     </BODY>
    </HTML>
    

© 2002-2015 lampware.us