3 * Copyright 2005 Charles Corrigan and $ThePhpWikiProgrammingTeam
5 * This file is (not yet) part of PhpWiki.
7 * PhpWiki is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * PhpWiki is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with PhpWiki; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 * A plugin that provides a framework and some useful queries to analyse the SQL
24 * access log. This information may be sensitive and so is limited to
25 * administrator access only.
27 * To add a new query, see getQueryString()
29 class WikiPlugin_AnalyseAccessLogSql
33 * Build the query string
35 * FIXME: some or all of these queries may be MySQL specific / non-portable
36 * FIXME: properly quote the string args
38 * The column names displayed are generated from the actual query column
39 * names, so make sure that each column in the query is given a user
40 * friendly name. Note that the column names are passed to _() and so may be
43 * If there are query specific where conditions, then the construction
44 * " if ($where_conditions<>'')
45 * $where_conditions = 'WHERE '.$where_conditions.' ';"
46 * should be changed to
47 * " if ($where_conditions<>'')
48 * $where_conditions = 'AND '.$where_conditions.' ';"
49 * and in the assignment to query have something like
53 * ."WHERE referer IS NOT NULL "
56 private function getQueryString(&$args)
58 // extract any parametrised conditions from the arguments,
59 // in particular, how much history to select
60 $where_conditions = $this->getWhereConditions($args);
62 // get the correct name for the table
63 //FIXME is there a more correct way to do this?
64 global $DBParams, $request;
65 $accesslog = (!empty($DBParams['prefix']) ? $DBParams['prefix'] : '') . "accesslog";
68 $backend_type = $request->_dbi->_backend->backendType();
69 switch ($backend_type) {
71 $Referring_URL = "left(referer,length(referer)-instr(reverse(referer),'?'))";
75 $Referring_URL = "substr(referer,0,position('?' in referer))";
78 $Referring_URL = "referer";
80 switch ($args['mode']) {
81 case 'referring_urls':
82 if ($where_conditions <> '')
83 $where_conditions = 'WHERE ' . $where_conditions . ' ';
85 . "$Referring_URL AS Referring_URL, "
86 . "count(*) AS Referral_Count "
89 . "GROUP BY Referring_URL";
91 case 'external_referers':
92 $args['local_referrers'] = 'false';
93 $where_conditions = $this->getWhereConditions($args);
94 if ($where_conditions <> '')
95 $where_conditions = 'WHERE ' . $where_conditions . ' ';
97 . "$Referring_URL AS Referring_URL, "
98 . "count(*) AS Referral_Count "
101 . "GROUP BY Referring_URL";
103 case 'referring_domains':
104 if ($where_conditions <> '')
105 $where_conditions = 'WHERE ' . $where_conditions . ' ';
106 switch ($backend_type) {
108 $Referring_Domain = "left(referer, if(locate('/', referer, 8) > 0,locate('/', referer, 8) -1, length(referer)))";
112 $Referring_Domain = "substr(referer,0,8) || regexp_replace(substr(referer,8), '/.*', '')";
115 $Referring_Domain = "referer";
119 . "$Referring_Domain AS Referring_Domain, "
120 . "count(*) AS Referral_Count "
123 . "GROUP BY Referring_Domain";
126 if ($where_conditions <> '')
127 $where_conditions = 'WHERE ' . $where_conditions . ' ';
129 . "remote_host AS Remote_Host, "
130 . "count(*) AS Access_Count "
133 . "GROUP BY Remote_Host";
136 if ($where_conditions <> '')
137 $where_conditions = 'WHERE ' . $where_conditions . ' ';
139 . "remote_user AS User, "
140 . "count(*) AS Access_Count "
143 . "GROUP BY remote_user";
146 if ($where_conditions <> '')
147 $where_conditions = 'WHERE ' . $where_conditions . ' ';
149 . "remote_host AS Remote_Host, "
150 . "remote_user AS User, "
151 . "count(*) AS Access_Count "
154 . "GROUP BY remote_host, remote_user";
157 // This queries for all entries in the SQL access log table that
158 // have a dns name that I know to be a web search engine crawler and
159 // categorises the results into time buckets as per the list below
161 // 1 - 1 hour - 3600 = 60 * 60
162 // 2 - 1 day - 86400 = 60 * 60 * 24
163 // 3 - 1 week - 604800 = 60 * 60 * 24 * 7
164 // 4 - 1 month - 2629800 = 60 * 60 * 24 * 365.25 / 12
165 // 5 - 1 year - 31557600 = 60 * 60 * 24 * 365.25
168 . "CASE WHEN $now-time_stamp<60 THEN '" . _("0 - last minute") . "' ELSE "
169 . "CASE WHEN $now-time_stamp<3600 THEN '" . _("1 - 1 minute to 1 hour") . "' ELSE "
170 . "CASE WHEN $now-time_stamp<86400 THEN '" . _("2 - 1 hour to 1 day") . "' ELSE "
171 . "CASE WHEN $now-time_stamp<604800 THEN '" . _("3 - 1 day to 1 week") . "' ELSE "
172 . "CASE WHEN $now-time_stamp<2629800 THEN '" . _("4 - 1 week to 1 month") . "' ELSE "
173 . "CASE WHEN $now-time_stamp<31557600 THEN '" . _("5 - 1 month to 1 year") . "' ELSE "
174 . "'" . _("6 - more than 1 year") . "' END END END END END END AS Time_Scale, "
175 . "remote_host AS Remote_Host, "
176 . "count(*) AS Access_Count "
178 . "WHERE (remote_host LIKE '%googlebot.com' "
179 . "OR remote_host LIKE '%alexa.com' "
180 . "OR remote_host LIKE '%inktomisearch.com' "
181 . "OR remote_host LIKE '%msnbot.msn.com') "
182 . ($where_conditions ? 'AND ' . $where_conditions : '')
183 . "GROUP BY Time_Scale, remote_host";
185 case "search_bots_hits":
186 // This queries for all entries in the SQL access log table that
187 // have a dns name that I know to be a web search engine crawler and
188 // displays the URI that was hit.
189 // If PHPSESSID appears in the URI, just display the URI to the left of this
190 $sessname = session_name();
191 switch ($backend_type) {
193 $Request_URI = "IF(instr(request_uri, '$sessname')=0, request_uri,left(request_uri, instr(request_uri, '$sessname')-2))";
197 $Request_URI = "regexp_replace(request_uri, '$sessname.*', '')";
200 $Request_URI = 'request_uri';
205 . "CASE WHEN $now-time_stamp<60 THEN '" . _("0 - last minute") . "' ELSE "
206 . "CASE WHEN $now-time_stamp<3600 THEN '" . _("1 - 1 minute to 1 hour") . "' ELSE "
207 . "CASE WHEN $now-time_stamp<86400 THEN '" . _("2 - 1 hour to 1 day") . "' ELSE "
208 . "CASE WHEN $now-time_stamp<604800 THEN '" . _("3 - 1 day to 1 week") . "' ELSE "
209 . "CASE WHEN $now-time_stamp<2629800 THEN '" . _("4 - 1 week to 1 month") . "' ELSE "
210 . "CASE WHEN $now-time_stamp<31557600 THEN '" . _("5 - 1 month to 1 year") . "' ELSE "
211 . "'" . _("6 - more than 1 year") . "' END END END END END END AS Time_Scale, "
212 . "remote_host AS Remote_Host, "
213 . "$Request_URI AS Request_URI "
215 . "WHERE (remote_host LIKE '%googlebot.com' "
216 . "OR remote_host LIKE '%alexa.com' "
217 . "OR remote_host LIKE '%inktomisearch.com' "
218 . "OR remote_host LIKE '%msnbot.msn.com') "
219 . ($where_conditions ? 'AND ' . $where_conditions : '')
220 . "ORDER BY time_stamp";
225 /** Honeypot for xgettext. Those strings are translated dynamically.
227 private function locale_dummy()
232 _("external_referers"),
233 _("referring_domains"),
238 _("search_bots_hits"),
247 function getDefaultArguments()
250 'mode' => 'referring_domains',
251 // referring_domains, referring_urls, remote_hosts, users, host_users, search_bots, search_bots_hits
253 // blank means use the mode as the caption/title for the output
254 'local_referrers' => 'true', // only show external referring sites
255 'period' => '', // the type of period to report:
256 // may be weeks, days, hours, minutes, or blank for all
257 'count' => '0' // the number of periods to report
261 function getDescription()
263 return _("Show summary information from the access log table.");
266 function run($dbi, $argstr, &$request, $basepage)
268 // flag that the output may not be cached - i.e. it is dynamic
269 $request->setArg('nocache', 1);
271 if (!$request->_user->isAdmin())
272 return HTML::p(_("The requested information is available only to Administrators."));
274 if (!ACCESS_LOG_SQL) // need read access
275 return HTML::p(_("The SQL_ACCESS_LOG is not enabled."));
277 // set aside a place for the table headers, see setHeaders()
278 $this->_theadrow = HTML::tr();
279 $this->_headerSet = false;
281 $args = $this->getArgs($argstr, $request);
283 $query = $this->getQueryString($args);
286 return HTML::p(sprintf(_("Unrecognised parameter 'mode=%s'"),
289 // get the data back.
290 // Note that this must be done before the final generation ofthe table,
291 // otherwise the headers will not be ready
292 $tbody = $this->getQueryResults($query, $dbi);
294 return HTML::table(array('border' => 1),
295 HTML::caption($this->getCaption($args)),
296 HTML::thead($this->_theadrow),
300 private function getQueryResults($query, &$dbi)
302 $queryResult = $dbi->genericSqlIter($query);
304 $tbody = HTML::tbody(HTML::tr(HTML::td(_("<empty>"))));
306 $tbody = HTML::tbody();
307 while ($row = $queryResult->next()) {
308 $this->setHeaders($row);
310 foreach ($row as $value) {
311 // output a '-' for empty values, otherwise the table looks strange
312 $tr->pushContent(HTML::td(empty($value) ? '-' : $value));
314 $tbody->pushContent($tr);
317 $queryResult->free();
321 private function setHeaders($row)
323 if (!$this->_headerSet) {
324 foreach ($row as $key => $value) {
325 $this->_theadrow->pushContent(HTML::th(_($key)));
327 $this->_headerSet = true;
331 private function getWhereConditions(&$args)
333 $where_conditions = '';
335 if ($args['period'] <> '') {
337 if ($args['period'] == 'minutes') {
339 } elseif ($args['period'] == 'hours') {
341 } elseif ($args['period'] == 'days') {
342 $since = 60 * 60 * 24;
343 } elseif ($args['period'] == 'weeks') {
344 $since = 60 * 60 * 24 * 7;
346 $since = $since * $args['count'];
348 if ($where_conditions <> '')
349 $where_conditions = $where_conditions . ' AND ';
350 $since = time() - $since;
351 $where_conditions = $where_conditions . "time_stamp > $since";
355 if ($args['local_referrers'] <> 'true') {
357 if ($where_conditions <> '')
358 $where_conditions = $where_conditions . ' AND ';
359 $localhost = SERVER_URL;
360 $len = strlen($localhost);
361 $backend_type = $request->_dbi->_backend->backendType();
362 switch ($backend_type) {
364 $ref_localhost = "left(referer,$len)<>'$localhost'";
368 $ref_localhost = "substr(referer,0,$len)<>'$localhost'";
373 $where_conditions = $where_conditions . $ref_localhost;
376 // The assumed contract is that there is a space at the end of the
377 // conditions string, so that following SQL clauses (such as GROUP BY)
378 // will not cause a syntax error
379 if ($where_conditions <> '')
380 $where_conditions = $where_conditions . ' ';
382 return $where_conditions;
385 private function getCaption(&$args)
387 $caption = $args['caption'];
389 $caption = gettext($args['mode']);
390 if ($args['period'] <> '' && $args['count'])
391 $caption = $caption . " - " . $args['count'] . " " . gettext($args['period']);
401 // c-hanging-comment-ender-p: nil
402 // indent-tabs-mode: nil