2 rcs_id('$Id: AnalyseAccessLogSql.php,v 1.1 2005-02-12 17:26:24 rurban Exp $');
4 Copyright 2005 Charles Corrigan and $ThePhpWikiProgrammingTeam
6 This file is (not yet) part of PhpWiki.
8 PhpWiki is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 PhpWiki is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with PhpWiki; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * A plugin that provides a framework and some useful queries to analyse the SQL
24 * access log. This information may be sensitive and so is limited to
25 * administrator access only.
27 * To add a new query, see _getQueryString()
29 class WikiPlugin_AnalyseAccessLogSql
33 * Build the query string
35 * FIXME: some or all of these queries may be MySQL specific / non-portable
36 * FIXME: properly quote the string args
38 * The column names displayed are generated from the actual query column
39 * names, so make sure that each column in the query is given a user
40 * friendly name. Note that the column names are passed to _() and so may be
43 * If there are query specific where conditions, then the construction
44 * " if ($where_conditions<>'')
45 * $where_conditions = 'WHERE '.$where_conditions.' ';"
46 * should be changed to
47 * " if ($where_conditions<>'')
48 * $where_conditions = 'AND '.$where_conditions.' ';"
49 * and in the assignment to query have something like
53 * ."WHERE referer IS NOT NULL "
56 function _getQueryString(&$args) {
57 // extract any parametrised conditions from the arguments,
58 // in particular, how much history to select
59 $where_conditions = $this->_getWhereConditions($args);
61 // get the correct name for the table
62 //FIXME is there a more correct way to do this?
64 $accesslog = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '')."accesslog";
67 if ($args['mode']=='referring_urls') {
68 if ($where_conditions<>'')
69 $where_conditions = 'WHERE '.$where_conditions.' ';
71 ."left(referer,length(referer)-instr(reverse(referer),'?')) AS Referring_URL, "
72 ."count(*) AS Referral_Count "
75 ."GROUP BY Referring_URL";
77 } elseif ($args['mode']=='external_referers') {
78 $args['local_referrers'] = 'false';
79 $where_conditions = $this->_getWhereConditions($args);
80 if ($where_conditions<>'')
81 $where_conditions = 'WHERE '.$where_conditions.' ';
83 ."left(referer,length(referer)-instr(reverse(referer),'?')) AS Referring_URL, "
84 ."count(*) AS Referral_Count "
87 ."GROUP BY Referring_URL";
89 } elseif ($args['mode']=='referring_domains') {
90 if ($where_conditions<>'')
91 $where_conditions = 'WHERE '.$where_conditions.' ';
93 ."left(referer, if(locate('/', referer, 8) > 0,locate('/', referer, 8) -1, length(referer))) AS Referring_Domain, "
94 ."count(*) AS Referral_Count "
97 ."GROUP BY Referring_Domain";
99 } elseif ($args['mode']=='remote_hosts') {
100 if ($where_conditions<>'')
101 $where_conditions = 'WHERE '.$where_conditions.' ';
103 ."remote_host AS Remote_Host, "
104 ."count(*) AS Access_Count "
107 ."GROUP BY Remote_Host";
109 } elseif ($args['mode']=='users') {
110 if ($where_conditions<>'')
111 $where_conditions = 'WHERE '.$where_conditions.' ';
113 ."remote_user AS User, "
114 ."count(*) AS Access_Count "
119 } elseif ($args['mode']=='host_users') {
120 if ($where_conditions<>'')
121 $where_conditions = 'WHERE '.$where_conditions.' ';
123 ."remote_host AS Remote_Host, "
124 ."remote_user AS User, "
125 ."count(*) AS Access_Count "
128 ."GROUP BY Remote_Host, User";
130 } elseif ($args['mode']=="search_bots") {
131 // This queries for all entries in the SQL access log table that
132 // have a dns name that I know to be a web search engine crawler and
133 // categorises the results into time buckets as per the list below
135 // 1 - 1 hour - 3600 = 60 * 60
136 // 2 - 1 day - 86400 = 60 * 60 * 24
137 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7
138 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12
139 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25
143 ."IF($now-time_stamp<60, '"._("0 - last minute")."',"
144 ."IF($now-time_stamp<3600, '"._("1 - 1 minute to 1 hour")."',"
145 ."IF($now-time_stamp<86400, '"._("2 - 1 hour to 1 day")."',"
146 ."IF($now-time_stamp<604800, '"._("3 - 1 day to 1 week")."',"
147 ."IF($now-time_stamp<2629800, '"._("4 - 1 week to 1 month")."',"
148 ."IF($now-time_stamp<31557600, '"._("5 - 1 month to 1 year")."',"
149 ."'"._("6 - more than 1 year")."')))))) AS 'Time_Scale', "
150 ."remote_host AS 'Remote_Host', "
151 ."count(*) AS 'Access_Count' "
153 ."WHERE (remote_host LIKE '%googlebot.com' "
154 ."OR remote_host LIKE '%alexa.com' "
155 ."OR remote_host LIKE '%inktomisearch.com' "
156 ."OR remote_host LIKE '%msnbot.msn.com') "
157 .($where_conditions ? 'AND '.$where_conditions : '')
158 ."GROUP BY 'Time_Scale', 'Remote_Host'";
160 } elseif ($args['mode']=="search_bots_hits") {
161 // This queries for all entries in the SQL access log table that
162 // have a dns name that I know to be a web search engine crawler and
163 // displays the URI that was hit.
164 // If PHPSESSID appears in the URI, just display the URI to the left of this
168 ."IF($now-time_stamp<60, '"._("0 - last minute")."',"
169 ."IF($now-time_stamp<3600, '"._("1 - 1 minute to 1 hour")."',"
170 ."IF($now-time_stamp<86400, '"._("2 - 1 hour to 1 day")."',"
171 ."IF($now-time_stamp<604800, '"._("3 - 1 day to 1 week")."',"
172 ."IF($now-time_stamp<2629800, '"._("4 - 1 week to 1 month")."',"
173 ."IF($now-time_stamp<31557600, '"._("5 - 1 month to 1 year")."',"
174 ."'"._("6 - more than 1 year")."')))))) AS 'Time_Scale', "
175 ."remote_host AS 'Remote_Host', "
176 ."IF(instr(request_uri, 'PHPSESS')=0, request_uri,left(request_uri, instr(request_uri, 'PHPSESS')-2)) AS 'Request_URI' "
178 ."WHERE (remote_host LIKE '%googlebot.com' "
179 ."OR remote_host LIKE '%alexa.com' "
180 ."OR remote_host LIKE '%inktomisearch.com' "
181 ."OR remote_host LIKE '%msnbot.msn.com') "
182 .($where_conditions ? 'AND '.$where_conditions : '')
183 ."ORDER BY time_stamp";
188 /** Honeypot for xgettext. Those strings are translated dynamically.
190 function _locale_dummy() {
194 _("external_referers"),
195 _("referring_domains"),
200 _("search_bots_hits"),
209 function getDefaultArguments() {
211 'mode' => 'referring_domains',
212 // referring_domains, referring_urls, remote_hosts, users, host_users
214 // blank means use the mode as the caption/title for the output
215 'local_referrers' => 'true', // only show external referring sites
216 'period' => '', // the type of period to report:
217 // may be weeks, days, hours, minutes, or blank for all
218 'count' => '0' // the number of periods to report
222 function getName () {
223 return _("AnalyseAccessLogSql");
226 function getDescription () {
227 return _("Show summary information from the access log table.");
230 function getVersion() {
231 return preg_replace("/[Revision: $]/", '',
232 "\$Revision: 1.1 $");
235 function run($dbi, $argstr, &$request, $basepage) {
236 // flag that the output may not be cached - i.e. it is dynamic
237 $request->setArg('nocache', 1);
239 if (!$request->_user->isAdmin())
240 return HTML::p(_("The requested information is available only to Administrators."));
242 if (!ACCESS_LOG_SQL) // need read access
243 return HTML::p(_("The SQL_ACCESS_LOG is not enabled."));
245 // set aside a place for the table headers, see _setHeaders()
246 $this->_theadrow = HTML::tr();
247 $this->_headerSet = false;
249 $args = $this->getArgs($argstr, $request);
251 $query = $this->_getQueryString($args);
254 return HTML::p(sprintf( _("Unrecognised parameter 'mode=%s'"),
257 // get the data back.
258 // Note that this must be done before the final generation ofthe table,
259 // otherwise the headers will not be ready
260 $tbody = $this->_getQueryResults($query, $dbi);
262 return HTML::table(array('border' => 1,
265 HTML::caption(HTML::h1(HTML::br(),$this->_getCaption($args))),
266 HTML::thead($this->_theadrow),
270 function _getQueryResults($query, &$dbi) {
271 $queryResult = $dbi->genericSqlIter($query);
273 $tbody = HTML::tbody(HTML::tr(HTML::td(_("<empty>"))));
275 $tbody = HTML::tbody();
276 while ($row = $queryResult->next()) {
277 $this->_setHeaders($row);
279 foreach ($row as $value) {
280 // output a '-' for empty values, otherwise the table looks strange
281 $tr->pushContent(HTML::td( empty($value) ? '-' : $value ));
283 $tbody->pushContent($tr);
286 $queryResult->free();
290 function _setHeaders($row) {
291 if (!$this->_headerSet) {
292 foreach ($row as $key => $value) {
293 $this->_theadrow->pushContent(HTML::th(_($key)));
295 $this->_headerSet = true;
299 function _getWhereConditions(&$args) {
300 $where_conditions = '';
302 if ($args['period']<>'') {
304 if ($args['period']=='minutes') {
306 } elseif ($args['period']=='hours') {
308 } elseif ($args['period']=='days') {
309 $since = 60 * 60 * 24;
310 } elseif ($args['period']=='weeks') {
311 $since = 60 * 60 * 24 * 7;
313 $since = $since * $args['count'];
315 if ($where_conditions<>'')
316 $where_conditions = $where_conditions.' AND ';
317 $since = time() - $since;
318 $where_conditions = $where_conditions."time_stamp > $since";
322 if ($args['local_referrers']<>'true') {
323 if ($where_conditions<>'')
324 $where_conditions = $where_conditions.' AND ';
325 $localhost = SERVER_URL;
326 $len = strlen($localhost);
327 $where_conditions = $where_conditions."left(referer,$len)<>'$localhost'";
330 // The assumed contract is that there is a space at the end of the
331 // conditions string, so that following SQL clauses (such as GROUP BY)
332 // will not cause a syntax error
333 if ($where_conditions<>'')
334 $where_conditions = $where_conditions.' ';
336 return $where_conditions;
339 function _getCaption(&$args) {
340 $caption = $args['caption'];
342 $caption = gettext($args['mode']);
343 if ($args['period']<>'' && $args['count'])
344 $caption = $caption." - ".$args['count']." ". gettext($args['period']);
350 // $Log: not supported by cvs2svn $
351 // Revision 1.1 2005/02/02 20:41:02 rurban
352 // Accesslog sql queries
360 // c-hanging-comment-ender-p: nil
361 // indent-tabs-mode: nil