4 * Copyright 2005 Charles Corrigan and $ThePhpWikiProgrammingTeam
6 * This file is (not yet) part of PhpWiki.
8 * PhpWiki is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * PhpWiki is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * A plugin that provides a framework and some useful queries to analyse the SQL
25 * access log. This information may be sensitive and so is limited to
26 * administrator access only.
28 * To add a new query, see _getQueryString()
30 class WikiPlugin_AnalyseAccessLogSql
34 * Build the query string
36 * FIXME: some or all of these queries may be MySQL specific / non-portable
37 * FIXME: properly quote the string args
39 * The column names displayed are generated from the actual query column
40 * names, so make sure that each column in the query is given a user
41 * friendly name. Note that the column names are passed to _() and so may be
44 * If there are query specific where conditions, then the construction
45 * " if ($where_conditions<>'')
46 * $where_conditions = 'WHERE '.$where_conditions.' ';"
47 * should be changed to
48 * " if ($where_conditions<>'')
49 * $where_conditions = 'AND '.$where_conditions.' ';"
50 * and in the assignment to query have something like
54 * ."WHERE referer IS NOT NULL "
57 function _getQueryString(&$args) {
58 // extract any parametrised conditions from the arguments,
59 // in particular, how much history to select
60 $where_conditions = $this->_getWhereConditions($args);
62 // get the correct name for the table
63 //FIXME is there a more correct way to do this?
64 global $DBParams, $request;
65 $accesslog = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '')."accesslog";
68 $backend_type = $request->_dbi->_backend->backendType();
69 switch ($backend_type) {
71 $Referring_URL = "left(referer,length(referer)-instr(reverse(referer),'?'))"; break;
74 $Referring_URL = "substr(referer,0,position('?' in referer))"; break;
76 $Referring_URL = "referer";
78 switch ($args['mode']) {
79 case 'referring_urls':
80 if ($where_conditions<>'')
81 $where_conditions = 'WHERE '.$where_conditions.' ';
83 . "$Referring_URL AS Referring_URL, "
84 . "count(*) AS Referral_Count "
87 . "GROUP BY Referring_URL";
89 case 'external_referers':
90 $args['local_referrers'] = 'false';
91 $where_conditions = $this->_getWhereConditions($args);
92 if ($where_conditions<>'')
93 $where_conditions = 'WHERE '.$where_conditions.' ';
95 . "$Referring_URL AS Referring_URL, "
96 . "count(*) AS Referral_Count "
99 . "GROUP BY Referring_URL";
101 case 'referring_domains':
102 if ($where_conditions<>'')
103 $where_conditions = 'WHERE '.$where_conditions.' ';
104 switch ($backend_type) {
106 $Referring_Domain = "left(referer, if(locate('/', referer, 8) > 0,locate('/', referer, 8) -1, length(referer)))"; break;
109 $Referring_Domain = "substr(referer,0,8) || regexp_replace(substr(referer,8), '/.*', '')"; break;
111 $Referring_Domain = "referer"; break;
114 . "$Referring_Domain AS Referring_Domain, "
115 . "count(*) AS Referral_Count "
118 . "GROUP BY Referring_Domain";
121 if ($where_conditions<>'')
122 $where_conditions = 'WHERE '.$where_conditions.' ';
124 ."remote_host AS Remote_Host, "
125 ."count(*) AS Access_Count "
128 ."GROUP BY Remote_Host";
131 if ($where_conditions<>'')
132 $where_conditions = 'WHERE '.$where_conditions.' ';
134 ."remote_user AS User, "
135 ."count(*) AS Access_Count "
138 ."GROUP BY remote_user";
141 if ($where_conditions<>'')
142 $where_conditions = 'WHERE '.$where_conditions.' ';
144 ."remote_host AS Remote_Host, "
145 ."remote_user AS User, "
146 ."count(*) AS Access_Count "
149 ."GROUP BY remote_host, remote_user";
152 // This queries for all entries in the SQL access log table that
153 // have a dns name that I know to be a web search engine crawler and
154 // categorises the results into time buckets as per the list below
156 // 1 - 1 hour - 3600 = 60 * 60
157 // 2 - 1 day - 86400 = 60 * 60 * 24
158 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7
159 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12
160 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25
163 ."CASE WHEN $now-time_stamp<60 THEN '"._("0 - last minute")."' ELSE "
164 ."CASE WHEN $now-time_stamp<3600 THEN '"._("1 - 1 minute to 1 hour")."' ELSE "
165 ."CASE WHEN $now-time_stamp<86400 THEN '"._("2 - 1 hour to 1 day")."' ELSE "
166 ."CASE WHEN $now-time_stamp<604800 THEN '"._("3 - 1 day to 1 week")."' ELSE "
167 ."CASE WHEN $now-time_stamp<2629800 THEN '"._("4 - 1 week to 1 month")."' ELSE "
168 ."CASE WHEN $now-time_stamp<31557600 THEN '"._("5 - 1 month to 1 year")."' ELSE "
169 ."'"._("6 - more than 1 year")."' END END END END END END AS Time_Scale, "
170 ."remote_host AS Remote_Host, "
171 ."count(*) AS Access_Count "
173 ."WHERE (remote_host LIKE '%googlebot.com' "
174 ."OR remote_host LIKE '%alexa.com' "
175 ."OR remote_host LIKE '%inktomisearch.com' "
176 ."OR remote_host LIKE '%msnbot.msn.com') "
177 .($where_conditions ? 'AND '.$where_conditions : '')
178 ."GROUP BY Time_Scale, remote_host";
180 case "search_bots_hits":
181 // This queries for all entries in the SQL access log table that
182 // have a dns name that I know to be a web search engine crawler and
183 // displays the URI that was hit.
184 // If PHPSESSID appears in the URI, just display the URI to the left of this
185 $sessname = session_name();
186 switch ($backend_type) {
188 $Request_URI = "IF(instr(request_uri, '$sessname')=0, request_uri,left(request_uri, instr(request_uri, '$sessname')-2))";
192 $Request_URI = "regexp_replace(request_uri, '$sessname.*', '')"; break;
194 $Request_URI = 'request_uri'; break;
198 ."CASE WHEN $now-time_stamp<60 THEN '"._("0 - last minute")."' ELSE "
199 ."CASE WHEN $now-time_stamp<3600 THEN '"._("1 - 1 minute to 1 hour")."' ELSE "
200 ."CASE WHEN $now-time_stamp<86400 THEN '"._("2 - 1 hour to 1 day")."' ELSE "
201 ."CASE WHEN $now-time_stamp<604800 THEN '"._("3 - 1 day to 1 week")."' ELSE "
202 ."CASE WHEN $now-time_stamp<2629800 THEN '"._("4 - 1 week to 1 month")."' ELSE "
203 ."CASE WHEN $now-time_stamp<31557600 THEN '"._("5 - 1 month to 1 year")."' ELSE "
204 ."'"._("6 - more than 1 year")."' END END END END END END AS Time_Scale, "
205 ."remote_host AS Remote_Host, "
206 ."$Request_URI AS Request_URI "
208 ."WHERE (remote_host LIKE '%googlebot.com' "
209 ."OR remote_host LIKE '%alexa.com' "
210 ."OR remote_host LIKE '%inktomisearch.com' "
211 ."OR remote_host LIKE '%msnbot.msn.com') "
212 .($where_conditions ? 'AND '.$where_conditions : '')
213 ."ORDER BY time_stamp";
218 /** Honeypot for xgettext. Those strings are translated dynamically.
220 function _locale_dummy() {
224 _("external_referers"),
225 _("referring_domains"),
230 _("search_bots_hits"),
239 function getDefaultArguments() {
241 'mode' => 'referring_domains',
242 // referring_domains, referring_urls, remote_hosts, users, host_users, search_bots, search_bots_hits
244 // blank means use the mode as the caption/title for the output
245 'local_referrers' => 'true', // only show external referring sites
246 'period' => '', // the type of period to report:
247 // may be weeks, days, hours, minutes, or blank for all
248 'count' => '0' // the number of periods to report
252 function getName () {
253 return _("AnalyseAccessLogSql");
256 function getDescription () {
257 return _("Show summary information from the access log table.");
260 function run($dbi, $argstr, &$request, $basepage) {
261 // flag that the output may not be cached - i.e. it is dynamic
262 $request->setArg('nocache', 1);
264 if (!$request->_user->isAdmin())
265 return HTML::p(_("The requested information is available only to Administrators."));
267 if (!ACCESS_LOG_SQL) // need read access
268 return HTML::p(_("The SQL_ACCESS_LOG is not enabled."));
270 // set aside a place for the table headers, see _setHeaders()
271 $this->_theadrow = HTML::tr();
272 $this->_headerSet = false;
274 $args = $this->getArgs($argstr, $request);
276 $query = $this->_getQueryString($args);
279 return HTML::p(sprintf( _("Unrecognised parameter 'mode=%s'"),
282 // get the data back.
283 // Note that this must be done before the final generation ofthe table,
284 // otherwise the headers will not be ready
285 $tbody = $this->_getQueryResults($query, $dbi);
287 return HTML::table(array('border' => 1,
290 HTML::caption(HTML::h1(HTML::br(),$this->_getCaption($args))),
291 HTML::thead($this->_theadrow),
295 function _getQueryResults($query, &$dbi) {
296 $queryResult = $dbi->genericSqlIter($query);
298 $tbody = HTML::tbody(HTML::tr(HTML::td(_("<empty>"))));
300 $tbody = HTML::tbody();
301 while ($row = $queryResult->next()) {
302 $this->_setHeaders($row);
304 foreach ($row as $value) {
305 // output a '-' for empty values, otherwise the table looks strange
306 $tr->pushContent(HTML::td( empty($value) ? '-' : $value ));
308 $tbody->pushContent($tr);
311 $queryResult->free();
315 function _setHeaders($row) {
316 if (!$this->_headerSet) {
317 foreach ($row as $key => $value) {
318 $this->_theadrow->pushContent(HTML::th(_($key)));
320 $this->_headerSet = true;
324 function _getWhereConditions(&$args) {
325 $where_conditions = '';
327 if ($args['period']<>'') {
329 if ($args['period']=='minutes') {
331 } elseif ($args['period']=='hours') {
333 } elseif ($args['period']=='days') {
334 $since = 60 * 60 * 24;
335 } elseif ($args['period']=='weeks') {
336 $since = 60 * 60 * 24 * 7;
338 $since = $since * $args['count'];
340 if ($where_conditions<>'')
341 $where_conditions = $where_conditions.' AND ';
342 $since = time() - $since;
343 $where_conditions = $where_conditions."time_stamp > $since";
347 if ($args['local_referrers']<>'true') {
349 if ($where_conditions<>'')
350 $where_conditions = $where_conditions.' AND ';
351 $localhost = SERVER_URL;
352 $len = strlen($localhost);
353 $backend_type = $request->_dbi->_backend->backendType();
354 switch ($backend_type) {
356 $ref_localhost = "left(referer,$len)<>'$localhost'"; break;
359 $ref_localhost = "substr(referer,0,$len)<>'$localhost'"; break;
363 $where_conditions = $where_conditions.$ref_localhost;
366 // The assumed contract is that there is a space at the end of the
367 // conditions string, so that following SQL clauses (such as GROUP BY)
368 // will not cause a syntax error
369 if ($where_conditions<>'')
370 $where_conditions = $where_conditions.' ';
372 return $where_conditions;
375 function _getCaption(&$args) {
376 $caption = $args['caption'];
378 $caption = gettext($args['mode']);
379 if ($args['period']<>'' && $args['count'])
380 $caption = $caption." - ".$args['count']." ". gettext($args['period']);
390 // c-hanging-comment-ender-p: nil
391 // indent-tabs-mode: nil