4 * Copyright 2005, 2007 Reini Urban
6 * This file is part of PhpWiki.
8 * PhpWiki is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * PhpWiki is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * Read and write file and SQL accesslog. Write sequentially.
26 * Read from file per pagename: Hits
31 * Create NCSA "combined" log entry for current request.
32 * Also needed for advanced spam prevention.
33 * global object holding global state (sql or file, entries, to dump)
35 class Request_AccessLog
38 * @param $logfile string Log file name.
40 function Request_AccessLog($logfile, $do_sql = false)
42 //global $request; // request not yet initialized!
44 $this->logfile = $logfile;
45 if ($logfile and !is_writeable($logfile)) {
47 (sprintf(_("%s is not writable."), _("The PhpWiki access log file"))
49 . sprintf(_("Please ensure that %s is writable, or redefine %s in config/config.ini."),
50 sprintf(_("the file '%s'"), ACCESS_LOG),
54 //$request->_accesslog =& $this;
55 //if (empty($request->_accesslog->entries))
56 register_shutdown_function("Request_AccessLogEntry_shutdown_function");
59 if (!$request->_dbi->isSQL()) {
60 trigger_error("Unsupported database backend for ACCESS_LOG_SQL.\nNeed DATABASE_TYPE=SQL or ADODB or PDO");
63 //$this->_dbi =& $request->_dbi;
64 $this->logtable = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '') . "accesslog";
67 $this->entries = array();
68 $this->entries[] = new Request_AccessLogEntry($this);
71 function _do($cmd, &$arg)
74 for ($i = 0; $i < count($this->entries); $i++)
75 $this->entries[$i]->$cmd($arg);
78 function push(&$request)
80 $this->_do('push', $request);
83 function setSize($arg)
85 $this->_do('setSize', $arg);
88 function setStatus($arg)
90 $this->_do('setStatus', $arg);
93 function setDuration($arg)
95 $this->_do('setDuration', $arg);
99 * Read sequentially all previous entries from the beginning.
100 * while ($logentry = Request_AccessLogEntry::read()) ;
101 * For internal log analyzers: RecentReferrers, WikiAccessRestrictions
105 return $this->logtable ? $this->read_sql() : $this->read_file();
109 * Return iterator of referer items reverse sorted (latest first).
111 function get_referer($limit = 15, $external_only = false)
113 if ($external_only) { // see stdlin.php:isExternalReferrer()
115 $blen = strlen($base);
117 if (!empty($this->_dbi)) {
118 // check same hosts in referer and request and remove them
119 $ext_where = " AND LEFT(referer,$blen) <> " . $this->_dbi->quote($base)
120 . " AND LEFT(referer,$blen) <> LEFT(CONCAT(" . $this->_dbi->quote(SERVER_URL) . ",request_uri),$blen)";
121 return $this->_read_sql_query("(referer <>'' AND NOT(ISNULL(referer)))"
122 . ($external_only ? $ext_where : '')
123 . " ORDER BY time_stamp DESC"
124 . ($limit ? " LIMIT $limit" : ""));
126 $iter = new WikiDB_Array_generic_iter(0);
127 $logs =& $iter->_array;
128 while ($logentry = $this->read_file()) {
129 if (!empty($logentry->referer)
130 and (!$external_only or (substr($logentry->referer, 0, $blen) != $base))
132 $iter->_array[] = $logentry;
133 if ($limit and count($logs) > $limit)
137 $logs = array_reverse($logs);
138 $logs = array_slice($logs, 0, min($limit, count($logs)));
144 * Return iterator of matching host items reverse sorted (latest first).
146 function get_host($host, $since_minutes = 20)
148 if ($this->logtable) {
149 // mysql specific only:
150 return $this->read_sql("request_host=" . $this->_dbi->quote($host)
151 . " AND time_stamp > " . (time() - $since_minutes * 60)
152 . " ORDER BY time_stamp DESC");
154 $iter = new WikiDB_Array_generic_iter();
155 $logs =& $iter->_array;
156 $logentry = new Request_AccessLogEntry($this);
157 while ($logentry->read_file()) {
158 if (!empty($logentry->referer)) {
159 $iter->_array[] = $logentry;
160 if ($limit and count($logs) > $limit)
162 $logentry = new Request_AccessLogEntry($this);
165 $logs = array_reverse($logs);
166 $logs = array_slice($logs, 0, min($limit, count($logs)));
172 * Read sequentially backwards all previous entries from log file.
178 if ($this->logfile) $this->logfile = ACCESS_LOG; // support Request_AccessLog::read
180 if (empty($this->reader)) // start at the beginning
181 $this->reader = fopen($this->logfile, "r");
182 if ($s = fgets($this->reader)) {
183 $entry = new Request_AccessLogEntry($this);
184 $re = '/^(\S+)\s(\S+)\s(\S+)\s\[(.+?)\] "([^"]+)" (\d+) (\d+) "([^"]*)" "([^"]*)"$/';
185 if (preg_match($re, $s, $m)) {
186 list(, $entry->host, $entry->ident, $entry->user, $entry->time,
187 $entry->request, $entry->status, $entry->size,
188 $entry->referer, $entry->user_agent) = $m;
191 } else { // until the end
192 fclose($this->reader);
197 function read_sql($where = '')
199 if (empty($this->sqliter))
200 $this->sqliter = $this->_read_sql_query($where);
201 return $this->sqliter->next();
204 function _read_sql_query($where = '')
207 $dbh =& $request->_dbi;
208 $log_tbl =& $this->logtable;
209 return $dbh->genericSqlIter("SELECT *,request_uri as request,request_time as time,remote_user as user,"
210 . "remote_host as host,agent as user_agent"
212 . ($where ? " WHERE $where" : ""));
215 /* done in request->finish() before the db is closed */
219 $dbh =& $request->_dbi;
220 if (isset($this->entries) and $dbh and $dbh->isOpen())
221 foreach ($this->entries as $entry) {
226 /* done in the shutdown callback */
227 function write_file()
229 if (isset($this->entries) and $this->logfile)
230 foreach ($this->entries as $entry) {
231 $entry->write_file();
233 unset($this->entries);
236 /* in an ideal world... */
239 if ($this->logfile) $this->write_file();
240 if ($this->logtable) $this->write_sql();
241 unset($this->entries);
245 class Request_AccessLogEntry
250 * The log entry will be automatically appended to the log file or
251 * SQL table when the current request terminates.
253 * If you want to modify a Request_AccessLogEntry before it gets
254 * written (e.g. via the setStatus and setSize methods) you should
255 * use an '&' on the constructor, so that you're working with the
256 * original (rather than a copy) object.
259 * $log_entry = & new Request_AccessLogEntry("/tmp/wiki_access_log");
260 * $log_entry->setStatus(401);
261 * $log_entry->push($request);
266 function Request_AccessLogEntry(&$accesslog)
268 $this->_accesslog = $accesslog;
269 $this->logfile = $accesslog->logfile;
270 $this->time = time();
271 $this->status = 200; // see setStatus()
272 $this->size = 0; // see setSize()
276 * @param $request object Request object for current request.
278 function push(&$request)
280 $this->host = $request->get('REMOTE_HOST');
281 $this->ident = $request->get('REMOTE_IDENT');
284 $user = $request->getUser();
285 if ($user->isAuthenticated())
286 $this->user = $user->UserName();
289 $this->request = join(' ', array($request->get('REQUEST_METHOD'),
290 $request->get('REQUEST_URI'),
291 $request->get('SERVER_PROTOCOL')));
292 $this->referer = (string)$request->get('HTTP_REFERER');
293 $this->user_agent = (string)$request->get('HTTP_USER_AGENT');
297 * Set result status code.
299 * @param $status integer HTTP status code.
301 function setStatus($status)
303 $this->status = $status;
309 * @param $size integer
311 function setSize($size = 0)
313 $this->size = (int)$size;
316 function setDuration($seconds)
318 // Pear DB does not correctly quote , in floats using ?. e.g. in european locales.
320 $this->duration = str_replace(",", ".", sprintf("%f", $seconds));
324 * Get time zone offset.
326 * This is a static member function.
328 * @param $time integer Unix timestamp (defaults to current time).
329 * @return string Zone offset, e.g. "-0800" for PST.
331 function _zone_offset($time = false)
335 $offset = date("Z", $time);
341 $offhours = floor($offset / 3600);
342 $offmins = $offset / 60 - $offhours * 60;
343 return sprintf("%s%02d%02d", $negoffset, $offhours, $offmins);
347 * Format time in NCSA format.
349 * This is a static member function.
351 * @param $time integer Unix timestamp (defaults to current time).
352 * @return string Formatted date & time.
354 function _ncsa_time($time = false)
358 return date("d/M/Y:H:i:s", $time) .
359 " " . $this->_zone_offset();
364 if ($this->_accesslog->logfile) $this->write_file();
365 if ($this->_accesslog->logtable) $this->write_sql();
369 * Write entry to log file.
371 function write_file()
373 $entry = sprintf('%s %s %s [%s] "%s" %d %d "%s" "%s"',
374 $this->host, $this->ident, $this->user,
375 $this->_ncsa_time($this->time),
376 $this->request, $this->status, $this->size,
377 $this->referer, $this->user_agent);
378 if (!empty($this->_accesslog->reader)) {
379 fclose($this->_accesslog->reader);
380 unset($this->_accesslog->reader);
382 //Error log doesn't provide locking.
383 //error_log("$entry\n", 3, $this->logfile);
385 if (($fp = fopen($this->logfile, "a"))) {
387 fputs($fp, "$entry\n");
392 /* This is better been done by apache mod_log_sql */
393 /* If ACCESS_LOG_SQL & 2 we do write it by our own */
398 $dbh =& $request->_dbi;
399 if ($dbh and $dbh->isOpen() and $this->_accesslog->logtable) {
400 //$log_tbl =& $this->_accesslog->logtable;
401 if ($request->get('REQUEST_METHOD') == "POST") {
402 // strangely HTTP_POST_VARS doesn't contain all posted vars.
403 $args = $_POST; // copy not ref. clone not needed on hashes
405 if (!empty($args['auth']['passwd'])) $args['auth']['passwd'] = '<not displayed>';
406 if (!empty($args['dbadmin']['passwd'])) $args['dbadmin']['passwd'] = '<not displayed>';
407 if (!empty($args['pref']['passwd'])) $args['pref']['passwd'] = '<not displayed>';
408 if (!empty($args['pref']['passwd2'])) $args['pref']['passwd2'] = '<not displayed>';
409 $this->request_args = substr(serialize($args), 0, 254); // if VARCHAR(255) is used.
411 $this->request_args = $request->get('QUERY_STRING');
413 $this->request_method = $request->get('REQUEST_METHOD');
414 $this->request_uri = $request->get('REQUEST_URI');
415 // duration problem: sprintf "%f" might use comma e.g. "100,201" in european locales
416 $dbh->_backend->write_accesslog($this);
422 * Shutdown callback. Ensures that the file is written.
425 * @see Request_AccessLogEntry
427 function Request_AccessLogEntry_shutdown_function()
431 if (isset($request->_accesslog->entries) and $request->_accesslog->logfile)
432 foreach ($request->_accesslog->entries as $entry) {
433 $entry->write_file();
435 unset($request->_accesslog->entries);
438 // TODO: SQL access methods....
439 // (c) 2005 Charles Corrigan (the mysql parts)
440 // (c) 2006 Rein Urban (the postgresql parts)
441 // from AnalyseAccessLogSql.php
442 class Request_AccessLog_SQL
446 * Build the query string
448 * FIXME: some or all of these queries may be MySQL specific / non-portable
449 * FIXME: properly quote the string args
451 * The column names displayed are generated from the actual query column
452 * names, so make sure that each column in the query is given a user
453 * friendly name. Note that the column names are passed to _() and so may be
456 * If there are query specific where conditions, then the construction
457 * " if ($where_conditions<>'')
458 * $where_conditions = 'WHERE '.$where_conditions.' ';"
459 * should be changed to
460 * " if ($where_conditions<>'')
461 * $where_conditions = 'AND '.$where_conditions.' ';"
462 * and in the assignment to query have something like
463 * " $query= "SELECT "
465 * ."FROM $accesslog "
466 * ."WHERE referer IS NOT NULL "
469 function _getQueryString(&$args)
471 // extract any parametrised conditions from the arguments,
472 // in particular, how much history to select
473 $where_conditions = $this->_getWhereConditions($args);
475 // get the correct name for the table
476 //FIXME is there a more correct way to do this?
477 global $DBParams, $request;
478 $accesslog = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '') . "accesslog";
481 $backend_type = $request->_dbi->_backend->backendType();
482 switch ($backend_type) {
484 $Referring_URL = "left(referer,length(referer)-instr(reverse(referer),'?'))";
488 $Referring_URL = "substr(referer,0,position('?' in referer))";
491 $Referring_URL = "referer";
493 switch ($args['mode']) {
494 case 'referring_urls':
495 if ($where_conditions <> '')
496 $where_conditions = 'WHERE ' . $where_conditions . ' ';
498 . "$Referring_URL AS Referring_URL, "
499 . "count(*) AS Referral_Count "
502 . "GROUP BY Referring_URL";
504 case 'external_referers':
505 $args['local_referrers'] = 'false';
506 $where_conditions = $this->_getWhereConditions($args);
507 if ($where_conditions <> '')
508 $where_conditions = 'WHERE ' . $where_conditions . ' ';
510 . "$Referring_URL AS Referring_URL, "
511 . "count(*) AS Referral_Count "
514 . "GROUP BY Referring_URL";
516 case 'referring_domains':
517 if ($where_conditions <> '')
518 $where_conditions = 'WHERE ' . $where_conditions . ' ';
519 switch ($backend_type) {
521 $Referring_Domain = "left(referer, if(locate('/', referer, 8) > 0,locate('/', referer, 8) -1, length(referer)))";
525 $Referring_Domain = "substr(referer,0,8) || regexp_replace(substr(referer,8), '/.*', '')";
528 $Referring_Domain = "referer";
532 . "$Referring_Domain AS Referring_Domain, "
533 . "count(*) AS Referral_Count "
536 . "GROUP BY Referring_Domain";
539 if ($where_conditions <> '')
540 $where_conditions = 'WHERE ' . $where_conditions . ' ';
542 . "remote_host AS Remote_Host, "
543 . "count(*) AS Access_Count "
546 . "GROUP BY Remote_Host";
549 if ($where_conditions <> '')
550 $where_conditions = 'WHERE ' . $where_conditions . ' ';
552 . "remote_user AS User, "
553 . "count(*) AS Access_Count "
556 . "GROUP BY remote_user";
559 if ($where_conditions <> '')
560 $where_conditions = 'WHERE ' . $where_conditions . ' ';
562 . "remote_host AS Remote_Host, "
563 . "remote_user AS User, "
564 . "count(*) AS Access_Count "
567 . "GROUP BY remote_host, remote_user";
570 // This queries for all entries in the SQL access log table that
571 // have a dns name that I know to be a web search engine crawler and
572 // categorises the results into time buckets as per the list below
574 // 1 - 1 hour - 3600 = 60 * 60
575 // 2 - 1 day - 86400 = 60 * 60 * 24
576 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7
577 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12
578 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25
581 . "CASE WHEN $now-time_stamp<60 THEN '" . _("0 - last minute") . "' ELSE "
582 . "CASE WHEN $now-time_stamp<3600 THEN '" . _("1 - 1 minute to 1 hour") . "' ELSE "
583 . "CASE WHEN $now-time_stamp<86400 THEN '" . _("2 - 1 hour to 1 day") . "' ELSE "
584 . "CASE WHEN $now-time_stamp<604800 THEN '" . _("3 - 1 day to 1 week") . "' ELSE "
585 . "CASE WHEN $now-time_stamp<2629800 THEN '" . _("4 - 1 week to 1 month") . "' ELSE "
586 . "CASE WHEN $now-time_stamp<31557600 THEN '" . _("5 - 1 month to 1 year") . "' ELSE "
587 . "'" . _("6 - more than 1 year") . "' END END END END END END AS Time_Scale, "
588 . "remote_host AS Remote_Host, "
589 . "count(*) AS Access_Count "
591 . "WHERE (remote_host LIKE '%googlebot.com' "
592 . "OR remote_host LIKE '%alexa.com' "
593 . "OR remote_host LIKE '%inktomisearch.com' "
594 . "OR remote_host LIKE '%msnbot.msn.com') "
595 . ($where_conditions ? 'AND ' . $where_conditions : '')
596 . "GROUP BY Time_Scale, remote_host";
598 case "search_bots_hits":
599 // This queries for all entries in the SQL access log table that
600 // have a dns name that I know to be a web search engine crawler and
601 // displays the URI that was hit.
602 // If PHPSESSID appears in the URI, just display the URI to the left of this
603 $sessname = session_name();
604 switch ($backend_type) {
606 $Request_URI = "IF(instr(request_uri, '$sessname')=0, request_uri,left(request_uri, instr(request_uri, '$sessname')-2))";
610 $Request_URI = "regexp_replace(request_uri, '$sessname.*', '')";
613 $Request_URI = 'request_uri';
618 . "CASE WHEN $now-time_stamp<60 THEN '" . _("0 - last minute") . "' ELSE "
619 . "CASE WHEN $now-time_stamp<3600 THEN '" . _("1 - 1 minute to 1 hour") . "' ELSE "
620 . "CASE WHEN $now-time_stamp<86400 THEN '" . _("2 - 1 hour to 1 day") . "' ELSE "
621 . "CASE WHEN $now-time_stamp<604800 THEN '" . _("3 - 1 day to 1 week") . "' ELSE "
622 . "CASE WHEN $now-time_stamp<2629800 THEN '" . _("4 - 1 week to 1 month") . "' ELSE "
623 . "CASE WHEN $now-time_stamp<31557600 THEN '" . _("5 - 1 month to 1 year") . "' ELSE "
624 . "'" . _("6 - more than 1 year") . "' END END END END END END AS Time_Scale, "
625 . "remote_host AS Remote_Host, "
626 . "$Request_URI AS Request_URI "
628 . "WHERE (remote_host LIKE '%googlebot.com' "
629 . "OR remote_host LIKE '%alexa.com' "
630 . "OR remote_host LIKE '%inktomisearch.com' "
631 . "OR remote_host LIKE '%msnbot.msn.com') "
632 . ($where_conditions ? 'AND ' . $where_conditions : '')
633 . "ORDER BY time_stamp";
638 /** Honeypot for xgettext. Those strings are translated dynamically.
640 function _locale_dummy()
645 _("external_referers"),
646 _("referring_domains"),
651 _("search_bots_hits"),
660 function getDefaultArguments()
663 'mode' => 'referring_domains',
664 // referring_domains, referring_urls, remote_hosts, users, host_users, search_bots, search_bots_hits
666 // blank means use the mode as the caption/title for the output
667 'local_referrers' => 'true', // only show external referring sites
668 'period' => '', // the type of period to report:
669 // may be weeks, days, hours, minutes, or blank for all
670 'count' => '0' // the number of periods to report
675 function table_output()
677 $query = $this->_getQueryString($args);
680 return HTML::p(sprintf(_("Unrecognised parameter 'mode=%s'"),
683 // get the data back.
684 // Note that this must be done before the final generation ofthe table,
685 // otherwise the headers will not be ready
686 $tbody = $this->_getQueryResults($query, $dbi);
688 return HTML::table(array('border' => 1,
691 HTML::caption(HTML::h1(HTML::br(), $this->_getCaption($args))),
692 HTML::thead($this->_theadrow),
696 function _getQueryResults($query, &$dbi)
698 $queryResult = $dbi->genericSqlIter($query);
700 $tbody = HTML::tbody(HTML::tr(HTML::td(_("<empty>"))));
702 $tbody = HTML::tbody();
703 while ($row = $queryResult->next()) {
704 $this->_setHeaders($row);
706 foreach ($row as $value) {
707 // output a '-' for empty values, otherwise the table looks strange
708 $tr->pushContent(HTML::td(empty($value) ? '-' : $value));
710 $tbody->pushContent($tr);
713 $queryResult->free();
717 function _setHeaders($row)
719 if (!$this->_headerSet) {
720 foreach ($row as $key => $value) {
721 $this->_theadrow->pushContent(HTML::th(_($key)));
723 $this->_headerSet = true;
727 function _getWhereConditions(&$args)
729 $where_conditions = '';
731 if ($args['period'] <> '') {
733 if ($args['period'] == 'minutes') {
735 } elseif ($args['period'] == 'hours') {
737 } elseif ($args['period'] == 'days') {
738 $since = 60 * 60 * 24;
739 } elseif ($args['period'] == 'weeks') {
740 $since = 60 * 60 * 24 * 7;
742 $since = $since * $args['count'];
744 if ($where_conditions <> '')
745 $where_conditions = $where_conditions . ' AND ';
746 $since = time() - $since;
747 $where_conditions = $where_conditions . "time_stamp > $since";
751 if ($args['local_referrers'] <> 'true') {
753 if ($where_conditions <> '')
754 $where_conditions = $where_conditions . ' AND ';
755 $localhost = SERVER_URL;
756 $len = strlen($localhost);
757 $backend_type = $request->_dbi->_backend->backendType();
758 switch ($backend_type) {
760 $ref_localhost = "left(referer,$len)<>'$localhost'";
764 $ref_localhost = "substr(referer,0,$len)<>'$localhost'";
769 $where_conditions = $where_conditions . $ref_localhost;
772 // The assumed contract is that there is a space at the end of the
773 // conditions string, so that following SQL clauses (such as GROUP BY)
774 // will not cause a syntax error
775 if ($where_conditions <> '')
776 $where_conditions = $where_conditions . ' ';
778 return $where_conditions;
781 function _getCaption(&$args)
783 $caption = $args['caption'];
785 $caption = gettext($args['mode']);
786 if ($args['period'] <> '' && $args['count'])
787 $caption = $caption . " - " . $args['count'] . " " . gettext($args['period']);
797 // c-hanging-comment-ender-p: nil
798 // indent-tabs-mode: nil