logfile = $logfile; if ($logfile and !is_writeable($logfile)) { trigger_error (sprintf(_("%s is not writable."), _("The PhpWiki access log file")) . "\n" . sprintf(_("Please ensure that %s is writable, or redefine %s in config/config.ini."), sprintf(_("the file ā€œ%sā€"), ACCESS_LOG), 'ACCESS_LOG') , E_USER_NOTICE); } //$request->_accesslog =& $this; //if (empty($request->_accesslog->entries)) register_shutdown_function("Request_AccessLogEntry_shutdown_function"); if ($do_sql) { if (!$request->_dbi->isSQL()) { trigger_error("Unsupported database backend for ACCESS_LOG_SQL.\nNeed DATABASE_TYPE=SQL or ADODB or PDO"); } else { global $DBParams; //$this->_dbi =& $request->_dbi; $this->logtable = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '') . "accesslog"; } } $this->entries = array(); $this->entries[] = new Request_AccessLogEntry($this); } function _do($cmd, &$arg) { if ($this->entries) for ($i = 0; $i < count($this->entries); $i++) $this->entries[$i]->$cmd($arg); } function push(&$request) { $this->_do('push', $request); } function setSize($arg) { $this->_do('setSize', $arg); } function setStatus($arg) { $this->_do('setStatus', $arg); } function setDuration($arg) { $this->_do('setDuration', $arg); } /** * Read sequentially all previous entries from the beginning. * while ($logentry = Request_AccessLogEntry::read()) ; * For internal log analyzers: RecentReferrers, WikiAccessRestrictions */ function read() { return $this->logtable ? $this->read_sql() : $this->read_file(); } /** * Return iterator of referer items reverse sorted (latest first). */ function get_referer($limit = 15, $external_only = false) { if ($external_only) { // see stdlin.php:isExternalReferrer() $base = SERVER_URL; $blen = strlen($base); } if (!empty($this->_dbi)) { // check same hosts in referer and request and remove them $ext_where = " AND LEFT(referer,$blen) <> " . $this->_dbi->quote($base) . " AND LEFT(referer,$blen) <> LEFT(CONCAT(" . $this->_dbi->quote(SERVER_URL) . ",request_uri),$blen)"; return $this->_read_sql_query("(referer <>'' AND NOT(ISNULL(referer)))" . ($external_only ? $ext_where : '') . " ORDER BY time_stamp DESC" . ($limit ? " LIMIT $limit" : "")); } else { $iter = new WikiDB_Array_generic_iter(0); $logs =& $iter->_array; while ($logentry = $this->read_file()) { if (!empty($logentry->referer) and (!$external_only or (substr($logentry->referer, 0, $blen) != $base)) ) { $iter->_array[] = $logentry; if ($limit and count($logs) > $limit) array_shift($logs); } } $logs = array_reverse($logs); $logs = array_slice($logs, 0, min($limit, count($logs))); return $iter; } } /** * Read sequentially backwards all previous entries from log file. * FIXME! */ function read_file() { global $request; if ($this->logfile) $this->logfile = ACCESS_LOG; // support Request_AccessLog::read if (empty($this->reader)) // start at the beginning $this->reader = fopen($this->logfile, "r"); if ($s = fgets($this->reader)) { $entry = new Request_AccessLogEntry($this); $re = '/^(\S+)\s(\S+)\s(\S+)\s\[(.+?)\] "([^"]+)" (\d+) (\d+) "([^"]*)" "([^"]*)"$/'; if (preg_match($re, $s, $m)) { list(, $entry->host, $entry->ident, $entry->user, $entry->time, $entry->request, $entry->status, $entry->size, $entry->referer, $entry->user_agent) = $m; } return $entry; } else { // until the end fclose($this->reader); return false; } } function read_sql($where = '') { if (empty($this->sqliter)) $this->sqliter = $this->_read_sql_query($where); return $this->sqliter->next(); } function _read_sql_query($where = '') { global $request; $dbh =& $request->_dbi; $log_tbl =& $this->logtable; return $dbh->genericSqlIter("SELECT *,request_uri as request,request_time as time,remote_user as user," . "remote_host as host,agent as user_agent" . " FROM $log_tbl" . ($where ? " WHERE $where" : "")); } /* done in request->finish() before the db is closed */ function write_sql() { global $request; $dbh =& $request->_dbi; if (isset($this->entries) and $dbh and $dbh->isOpen()) foreach ($this->entries as $entry) { $entry->write_sql(); } } /* done in the shutdown callback */ function write_file() { if (isset($this->entries) and $this->logfile) foreach ($this->entries as $entry) { $entry->write_file(); } unset($this->entries); } /* in an ideal world... */ function write() { if ($this->logfile) $this->write_file(); if ($this->logtable) $this->write_sql(); unset($this->entries); } } class Request_AccessLogEntry { /** * Constructor. * * The log entry will be automatically appended to the log file or * SQL table when the current request terminates. * * If you want to modify a Request_AccessLogEntry before it gets * written (e.g. via the setStatus and setSize methods) you should * use an '&' on the constructor, so that you're working with the * original (rather than a copy) object. * *
     *    $log_entry = & new Request_AccessLogEntry("/tmp/wiki_access_log");
     *    $log_entry->setStatus(401);
     *    $log_entry->push($request);
     * 
* * */ function Request_AccessLogEntry(&$accesslog) { $this->_accesslog = $accesslog; $this->logfile = $accesslog->logfile; $this->time = time(); $this->status = 200; // see setStatus() $this->size = 0; // see setSize() } /** * @param $request object Request object for current request. */ function push(&$request) { $this->host = $request->get('REMOTE_HOST'); $this->ident = $request->get('REMOTE_IDENT'); if (!$this->ident) $this->ident = '-'; $user = $request->getUser(); if ($user->isAuthenticated()) $this->user = $user->UserName(); else $this->user = '-'; $this->request = join(' ', array($request->get('REQUEST_METHOD'), $request->get('REQUEST_URI'), $request->get('SERVER_PROTOCOL'))); $this->referer = (string)$request->get('HTTP_REFERER'); $this->user_agent = (string)$request->get('HTTP_USER_AGENT'); } /** * Set result status code. * * @param $status integer HTTP status code. */ function setStatus($status) { $this->status = $status; } /** * Set response size. * * @param $size integer */ function setSize($size = 0) { $this->size = (int)$size; } function setDuration($seconds) { // Pear DB does not correctly quote , in floats using ?. e.g. in european locales. // Workaround: $this->duration = str_replace(",", ".", sprintf("%f", $seconds)); } /** * Get time zone offset. * * This is a static member function. * * @param $time integer Unix timestamp (defaults to current time). * @return string Zone offset, e.g. "-0800" for PST. */ function _zone_offset($time = false) { if (!$time) $time = time(); $offset = date("Z", $time); $negoffset = ""; if ($offset < 0) { $negoffset = "-"; $offset = -$offset; } $offhours = floor($offset / 3600); $offmins = $offset / 60 - $offhours * 60; return sprintf("%s%02d%02d", $negoffset, $offhours, $offmins); } /** * Format time in NCSA format. * * This is a static member function. * * @param $time integer Unix timestamp (defaults to current time). * @return string Formatted date & time. */ function _ncsa_time($time = false) { if (!$time) $time = time(); return date("d/M/Y:H:i:s", $time) . " " . $this->_zone_offset(); } function write() { if ($this->_accesslog->logfile) $this->write_file(); if ($this->_accesslog->logtable) $this->write_sql(); } /** * Write entry to log file. */ function write_file() { $entry = sprintf('%s %s %s [%s] "%s" %d %d "%s" "%s"', $this->host, $this->ident, $this->user, $this->_ncsa_time($this->time), $this->request, $this->status, $this->size, $this->referer, $this->user_agent); if (!empty($this->_accesslog->reader)) { fclose($this->_accesslog->reader); unset($this->_accesslog->reader); } //Error log doesn't provide locking. //error_log("$entry\n", 3, $this->logfile); // Alternate method if (($fp = fopen($this->logfile, "a"))) { flock($fp, LOCK_EX); fputs($fp, "$entry\n"); fclose($fp); } } /* This is better been done by apache mod_log_sql */ /* If ACCESS_LOG_SQL & 2 we do write it by our own */ function write_sql() { global $request; $dbh =& $request->_dbi; if ($dbh and $dbh->isOpen() and $this->_accesslog->logtable) { //$log_tbl =& $this->_accesslog->logtable; if ($request->get('REQUEST_METHOD') == "POST") { // strangely HTTP_POST_VARS doesn't contain all posted vars. $args = $_POST; // copy not ref. clone not needed on hashes // garble passwords if (!empty($args['auth']['passwd'])) $args['auth']['passwd'] = ''; if (!empty($args['dbadmin']['passwd'])) $args['dbadmin']['passwd'] = ''; if (!empty($args['pref']['passwd'])) $args['pref']['passwd'] = ''; if (!empty($args['pref']['passwd2'])) $args['pref']['passwd2'] = ''; $this->request_args = substr(serialize($args), 0, 254); // if VARCHAR(255) is used. } else { $this->request_args = $request->get('QUERY_STRING'); } $this->request_method = $request->get('REQUEST_METHOD'); $this->request_uri = $request->get('REQUEST_URI'); // duration problem: sprintf "%f" might use comma e.g. "100,201" in european locales $dbh->_backend->write_accesslog($this); } } } /** * Shutdown callback. Ensures that the file is written. * * @access private * @see Request_AccessLogEntry */ function Request_AccessLogEntry_shutdown_function() { global $request; if (isset($request->_accesslog->entries) and $request->_accesslog->logfile) foreach ($request->_accesslog->entries as $entry) { $entry->write_file(); } unset($request->_accesslog->entries); } // TODO: SQL access methods.... // (c) 2005 Charles Corrigan (the mysql parts) // (c) 2006 Rein Urban (the postgresql parts) // from AnalyseAccessLogSql.php class Request_AccessLog_SQL { /** * Build the query string * * FIXME: some or all of these queries may be MySQL specific / non-portable * FIXME: properly quote the string args * * The column names displayed are generated from the actual query column * names, so make sure that each column in the query is given a user * friendly name. Note that the column names are passed to _() and so may be * translated. * * If there are query specific where conditions, then the construction * " if ($where_conditions<>'') * $where_conditions = 'WHERE '.$where_conditions.' ';" * should be changed to * " if ($where_conditions<>'') * $where_conditions = 'AND '.$where_conditions.' ';" * and in the assignment to query have something like * " $query= "SELECT " * ."referer " * ."FROM $accesslog " * ."WHERE referer IS NOT NULL " * .$where_conditions */ function _getQueryString(&$args) { // extract any parametrised conditions from the arguments, // in particular, how much history to select $where_conditions = $this->_getWhereConditions($args); // get the correct name for the table //FIXME is there a more correct way to do this? global $DBParams, $request; $accesslog = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '') . "accesslog"; $query = ''; $backend_type = $request->_dbi->_backend->backendType(); switch ($backend_type) { case 'mysql': $Referring_URL = "left(referer,length(referer)-instr(reverse(referer),'?'))"; break; case 'pgsql': case 'postgres7': $Referring_URL = "substr(referer,0,position('?' in referer))"; break; default: $Referring_URL = "referer"; } switch ($args['mode']) { case 'referring_urls': if ($where_conditions <> '') $where_conditions = 'WHERE ' . $where_conditions . ' '; $query = "SELECT " . "$Referring_URL AS Referring_URL, " . "count(*) AS Referral_Count " . "FROM $accesslog " . $where_conditions . "GROUP BY Referring_URL"; break; case 'external_referers': $args['local_referrers'] = 'false'; $where_conditions = $this->_getWhereConditions($args); if ($where_conditions <> '') $where_conditions = 'WHERE ' . $where_conditions . ' '; $query = "SELECT " . "$Referring_URL AS Referring_URL, " . "count(*) AS Referral_Count " . "FROM $accesslog " . $where_conditions . "GROUP BY Referring_URL"; break; case 'referring_domains': if ($where_conditions <> '') $where_conditions = 'WHERE ' . $where_conditions . ' '; switch ($backend_type) { case 'mysql': $Referring_Domain = "left(referer, if(locate('/', referer, 8) > 0,locate('/', referer, 8) -1, length(referer)))"; break; case 'pgsql': case 'postgres7': $Referring_Domain = "substr(referer,0,8) || regexp_replace(substr(referer,8), '/.*', '')"; break; default: $Referring_Domain = "referer"; break; } $query = "SELECT " . "$Referring_Domain AS Referring_Domain, " . "count(*) AS Referral_Count " . "FROM $accesslog " . $where_conditions . "GROUP BY Referring_Domain"; break; case 'remote_hosts': if ($where_conditions <> '') $where_conditions = 'WHERE ' . $where_conditions . ' '; $query = "SELECT " . "remote_host AS Remote_Host, " . "count(*) AS Access_Count " . "FROM $accesslog " . $where_conditions . "GROUP BY Remote_Host"; break; case 'users': if ($where_conditions <> '') $where_conditions = 'WHERE ' . $where_conditions . ' '; $query = "SELECT " . "remote_user AS User, " . "count(*) AS Access_Count " . "FROM $accesslog " . $where_conditions . "GROUP BY remote_user"; break; case 'host_users': if ($where_conditions <> '') $where_conditions = 'WHERE ' . $where_conditions . ' '; $query = "SELECT " . "remote_host AS Remote_Host, " . "remote_user AS User, " . "count(*) AS Access_Count " . "FROM $accesslog " . $where_conditions . "GROUP BY remote_host, remote_user"; break; case "search_bots": // This queries for all entries in the SQL access log table that // have a dns name that I know to be a web search engine crawler and // categorises the results into time buckets as per the list below // 0 - 1 minute - 60 // 1 - 1 hour - 3600 = 60 * 60 // 2 - 1 day - 86400 = 60 * 60 * 24 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25 $now = time(); $query = "SELECT " . "CASE WHEN $now-time_stamp<60 THEN '" . _("0 - last minute") . "' ELSE " . "CASE WHEN $now-time_stamp<3600 THEN '" . _("1 - 1 minute to 1 hour") . "' ELSE " . "CASE WHEN $now-time_stamp<86400 THEN '" . _("2 - 1 hour to 1 day") . "' ELSE " . "CASE WHEN $now-time_stamp<604800 THEN '" . _("3 - 1 day to 1 week") . "' ELSE " . "CASE WHEN $now-time_stamp<2629800 THEN '" . _("4 - 1 week to 1 month") . "' ELSE " . "CASE WHEN $now-time_stamp<31557600 THEN '" . _("5 - 1 month to 1 year") . "' ELSE " . "'" . _("6 - more than 1 year") . "' END END END END END END AS Time_Scale, " . "remote_host AS Remote_Host, " . "count(*) AS Access_Count " . "FROM $accesslog " . "WHERE (remote_host LIKE '%googlebot.com' " . "OR remote_host LIKE '%alexa.com' " . "OR remote_host LIKE '%inktomisearch.com' " . "OR remote_host LIKE '%msnbot.msn.com') " . ($where_conditions ? 'AND ' . $where_conditions : '') . "GROUP BY Time_Scale, remote_host"; break; case "search_bots_hits": // This queries for all entries in the SQL access log table that // have a dns name that I know to be a web search engine crawler and // displays the URI that was hit. // If PHPSESSID appears in the URI, just display the URI to the left of this $sessname = session_name(); switch ($backend_type) { case 'mysql': $Request_URI = "IF(instr(request_uri, '$sessname')=0, request_uri,left(request_uri, instr(request_uri, '$sessname')-2))"; break; case 'pgsql': case 'postgres7': $Request_URI = "regexp_replace(request_uri, '$sessname.*', '')"; break; default: $Request_URI = 'request_uri'; break; } $now = time(); $query = "SELECT " . "CASE WHEN $now-time_stamp<60 THEN '" . _("0 - last minute") . "' ELSE " . "CASE WHEN $now-time_stamp<3600 THEN '" . _("1 - 1 minute to 1 hour") . "' ELSE " . "CASE WHEN $now-time_stamp<86400 THEN '" . _("2 - 1 hour to 1 day") . "' ELSE " . "CASE WHEN $now-time_stamp<604800 THEN '" . _("3 - 1 day to 1 week") . "' ELSE " . "CASE WHEN $now-time_stamp<2629800 THEN '" . _("4 - 1 week to 1 month") . "' ELSE " . "CASE WHEN $now-time_stamp<31557600 THEN '" . _("5 - 1 month to 1 year") . "' ELSE " . "'" . _("6 - more than 1 year") . "' END END END END END END AS Time_Scale, " . "remote_host AS Remote_Host, " . "$Request_URI AS Request_URI " . "FROM $accesslog " . "WHERE (remote_host LIKE '%googlebot.com' " . "OR remote_host LIKE '%alexa.com' " . "OR remote_host LIKE '%inktomisearch.com' " . "OR remote_host LIKE '%msnbot.msn.com') " . ($where_conditions ? 'AND ' . $where_conditions : '') . "ORDER BY time_stamp"; } return $query; } /** Honeypot for xgettext. Those strings are translated dynamically. */ function _locale_dummy() { $dummy = array( // mode caption _("referring_urls"), _("external_referers"), _("referring_domains"), _("remote_hosts"), _("users"), _("host_users"), _("search_bots"), _("search_bots_hits"), // period header _("minutes"), _("hours"), _("days"), _("weeks"), ); } function getDefaultArguments() { return array( 'mode' => 'referring_domains', // referring_domains, referring_urls, remote_hosts, users, host_users, search_bots, search_bots_hits 'caption' => '', // blank means use the mode as the caption/title for the output 'local_referrers' => 'true', // only show external referring sites 'period' => '', // the type of period to report: // may be weeks, days, hours, minutes, or blank for all 'count' => '0' // the number of periods to report ); } function table_output() { $query = $this->_getQueryString($args); if ($query == '') return HTML::p(sprintf(_("Unrecognised parameter 'mode=%s'"), $args['mode'])); // get the data back. // Note that this must be done before the final generation ofthe table, // otherwise the headers will not be ready $tbody = $this->_getQueryResults($query, $dbi); return HTML::table(array('border' => 1), HTML::caption($this->_getCaption($args)), HTML::thead($this->_theadrow), $tbody); } function _getQueryResults($query, &$dbi) { $queryResult = $dbi->genericSqlIter($query); if (!$queryResult) { $tbody = HTML::tbody(HTML::tr(HTML::td(_("")))); } else { $tbody = HTML::tbody(); while ($row = $queryResult->next()) { $this->_setHeaders($row); $tr = HTML::tr(); foreach ($row as $value) { // output a '-' for empty values, otherwise the table looks strange $tr->pushContent(HTML::td(empty($value) ? '-' : $value)); } $tbody->pushContent($tr); } } $queryResult->free(); return $tbody; } function _setHeaders($row) { if (!$this->_headerSet) { foreach ($row as $key => $value) { $this->_theadrow->pushContent(HTML::th(_($key))); } $this->_headerSet = true; } } function _getWhereConditions(&$args) { $where_conditions = ''; if ($args['period'] <> '') { $since = 0; if ($args['period'] == 'minutes') { $since = 60; } elseif ($args['period'] == 'hours') { $since = 60 * 60; } elseif ($args['period'] == 'days') { $since = 60 * 60 * 24; } elseif ($args['period'] == 'weeks') { $since = 60 * 60 * 24 * 7; } $since = $since * $args['count']; if ($since > 0) { if ($where_conditions <> '') $where_conditions = $where_conditions . ' AND '; $since = time() - $since; $where_conditions = $where_conditions . "time_stamp > $since"; } } if ($args['local_referrers'] <> 'true') { global $request; if ($where_conditions <> '') $where_conditions = $where_conditions . ' AND '; $localhost = SERVER_URL; $len = strlen($localhost); $backend_type = $request->_dbi->_backend->backendType(); switch ($backend_type) { case 'mysql': $ref_localhost = "left(referer,$len)<>'$localhost'"; break; case 'pgsql': case 'postgres7': $ref_localhost = "substr(referer,0,$len)<>'$localhost'"; break; default: $ref_localhost = ""; } $where_conditions = $where_conditions . $ref_localhost; } // The assumed contract is that there is a space at the end of the // conditions string, so that following SQL clauses (such as GROUP BY) // will not cause a syntax error if ($where_conditions <> '') $where_conditions = $where_conditions . ' '; return $where_conditions; } function _getCaption(&$args) { $caption = $args['caption']; if ($caption == '') $caption = gettext($args['mode']); if ($args['period'] <> '' && $args['count']) $caption = $caption . " - " . $args['count'] . " " . gettext($args['period']); return $caption; } } // Local Variables: // mode: php // tab-width: 8 // c-basic-offset: 4 // c-hanging-comment-ender-p: nil // indent-tabs-mode: nil // End: