5 * @author: Chris Petersen, Reini Urban
8 Copyright (c) 2002 Intercept Vector
9 Copyright (c) 2004 Reini Urban
11 This library is free software; you can redistribute it and/or
12 modify it under the terms of the GNU Lesser General Public
13 License as published by the Free Software Foundation; either
14 version 2.1 of the License, or (at your option) any later version.
16 This library is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License for more details.
21 You should have received a copy of the GNU Lesser General Public
22 License along with this library; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 If you have any questions or comments, please email:
28 admin@interceptvector.com
30 http://www.interceptvector.com
34 * Objectified, simplified, documented and added the two other queries
39 * GoogleSearchResults, list of GoogleSearch Result Elements
41 * Each time you issue a search request to the Google service, a
42 * response is returned back to you. This section describes the
43 * meanings of the values returned to you.
45 * <documentFiltering> - A Boolean value indicating whether filtering
46 * was performed on the search results. This will be "true" only if
47 * (a) you requested filtering and (b) filtering actually occurred.
49 * <searchComments> - A text string intended for display to an end
50 * user. One of the most common messages found here is a note that
51 * "stop words" were removed from the search automatically. (This
52 * happens for very common words such as "and" and "as.")
54 * <estimatedTotalResultsCount> - The estimated total number of
55 * results that exist for the query. Note: The estimated number may
56 * be either higher or lower than the actual number of results that
59 * <estimateIsExact> - A Boolean value indicating that the estimate is
60 * actually the exact value.
62 * <resultElements> - An array of <resultElement> items. This
63 * corresponds to the actual list of search results.
65 * <searchQuery> - This is the value of <q> for the search request.
67 * <startIndex> - Indicates the index (1-based) of the first search
68 * result in <resultElements>.
70 * <endIndex> - Indicates the index (1-based) of the last search
71 * result in <resultElements>.
73 * <searchTips> - A text string intended for display to the end
74 * user. It provides instructive suggestions on how to use Google.
76 * <directoryCategories> - An array of <directoryCategory> items. This
77 * corresponds to the ODP directory matches for this search.
79 * <searchTime> - Text, floating-point number indicating the total
80 * server time to return the search results, measured in seconds.
83 class GoogleSearchResults {
84 var $_fields = "documentFiltering,searchComments,estimatedTotalResultsCount,estimateIsExact,searchQuery,startIndex,endIndex,searchTips,directoryCategories,searchTime,resultElements";
85 var $resultElements, $results;
87 function GoogleSearchResults ($result) {
88 $this->fields = explode(',',$this->_fields);
89 foreach ($this->fields as $f) {
90 $this->{$f} = $result[$f];
92 $i = 0; $this->results = array();
93 //$this->resultElements = $result['resultElements'];
94 foreach ($this->resultElements as $r) {
95 $this->results[] = new GoogleSearchResult($r);
102 * Google Search Result Element:
104 * <summary> - If the search result has a listing in the ODP
105 * directory, the ODP summary appears here as a text string.
107 * <URL> - The URL of the search result, returned as text, with an
110 * <snippet> - A snippet which shows the query in context on the URL
111 * where it appears. This is formatted HTML and usually includes <B>
112 * tags within it. Note that the query term does not always appear
113 * in the snippet. Note: Query terms will be in highlighted in bold
114 * in the results, and line breaks will be included for proper text
117 * <title> - The title of the search result, returned as HTML.
119 * <cachedSize> - Text (Integer + "k"). Indicates that a cached
120 * version of the <URL> is available; size is indicated in
123 * <relatedInformationPresent> - Boolean indicating that the
124 * "related:" query term is supported for this URL.
126 * <hostName> - When filtering occurs, a maximum of two results from
127 * any given host is returned. When this occurs, the second
128 * resultElement that comes from that host contains the host name in
131 * <directoryCategory> - array with "fullViewableName" and
132 * "specialEncoding" keys.
134 * <directoryTitle> - If the URL for this resultElement is contained
135 * in the ODP directory, the title that appears in the directory
136 * appears here as a text string. Note that the directoryTitle may
137 * be different from the URL's <title>.
139 class GoogleSearchResult {
140 var $_fields = "summary,URL,snippet,title,cachedSize,relatedInformationPresent,hostName,directoryCategory,directoryTitle";
141 function GoogleSearchResult ($result) {
142 $this->fields = explode(',',$this->_fields);
143 foreach ($this->fields as $f) {
144 $this->{$f} = $result[$f];
152 function Google($maxResults=10,$license_key=false,$proxy=false) {
154 $this->license_key = $license_key;
155 elseif (!defined('GOOGLE_LICENSE_KEY'))
156 return HTML::div(array('class' => 'errors'),
157 fmt("You must first obtain a license key at %s to be able to use the Google API.",
158 WikiLink("http://www.google.com/apis/")),
159 fmt("It's free however."));
161 $this->license_key = GOOGLE_LICENSE_KEY;
162 require_once("lib/nusoap/nusoap.php");
164 $this->soapclient = new soapclient(DATA_PATH."/"."GoogleSearch.wsdl", "wsdl");
165 $this->proxy = $this->soapclient->getProxy();
166 if ($maxResults > 10) $maxResults = 10;
167 if ($maxResults < 1) $maxResults = 1;
168 $this->maxResults = $maxResults;
174 * See http://www.google.com/help/features.html for examples of
175 * advanced features. Anything that works at the Google web site
176 * will work as a query string in this method.
178 * You can use the start and maxResults parameters to page through
179 * multiple pages of results. Note that 'maxResults' is currently
180 * limited by Google to 10. See the API reference for more
181 * advanced examples and a full list of country codes and topics
182 * for use in the restrict parameter, along with legal values for
183 * the language, inputencoding, and outputencoding parameters.
185 * <license key> Provided by Google, this is required for you to access the
186 * Google service. Google uses the key for authentication and
188 * <q> (See the API docs for details on query syntax.)
189 * <start> Zero-based index of the first desired result.
190 * <maxResults> Number of results desired per query. The maximum
191 * value per query is 10. Note: If you do a query that doesn't
192 * have many matches, the actual number of results you get may be
193 * smaller than what you request.
194 * <filter> Activates or deactivates automatic results filtering,
195 * which hides very similar results and results that all come from
196 * the same Web host. Filtering tends to improve the end user
197 * experience on Google, but for your application you may prefer
198 * to turn it off. (See the API docs for more
200 * <restrict> Restricts the search to a subset of the Google Web
201 * index, such as a country like "Ukraine" or a topic like
202 * "Linux." (See the API docs for more details.)
203 * <safeSearch> A Boolean value which enables filtering of adult
204 * content in the search results. See SafeSearch for more details.
205 * <lr> Language Restrict - Restricts the search to documents
206 * within one or more languages.
207 * <ie> Input Encoding - this parameter has been deprecated and is
208 * ignored. All requests to the APIs should be made with UTF-8
209 * encoding. (See the API docs for details.)
210 * <oe> Output Encoding - this parameter has been deprecated and is
211 * ignored. All requests to the APIs should be made with UTF-8
214 function doGoogleSearch($query, $startIndex=1, $maxResults=10, $filter = "false",
215 $restrict='', $safeSearch='false', $lr='',
216 $inputencoding='UTF-8', $outputencoding='UTF-8') {
217 // doGoogleSearch() gets created automatically!! (some eval'ed code from the soap request)
218 $result = $this->doGoogleSearch(GOOGLE_LICENSE_KEY, // "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
226 $inputencoding, // ignored by server, everything is UTF-8 now
228 return new GoogleSearchResults($result);
232 * Retrieve a page from the Google cache.
234 * Cache requests submit a URL to the Google Web APIs service and
235 * receive in return the contents of the URL when Google's
236 * crawlers last visited the page (if available).
238 * Please note that Google is not affiliated with the authors of
239 * cached pages nor responsible for their content.
241 * The return type for cached pages is base64 encoded text.
243 * @params string url - full URL to the page to retrieve
244 * @return string full text of the cached page
246 function doGetCachedPage($url) {
247 // This class gets created automatically!! (some eval'ed code from the soap request)
248 $result = $this->proxy->doGetCachedPage(GOOGLE_LICENSE_KEY,
250 if (!empty($result)) return base64_decode($result);
254 * Get spelling suggestions from Google
256 * @param string phrase word or phrase to spell-check
257 * @return string text of any suggested replacement, or None
259 function doSpellingSuggestion($phrase) {
260 // This class gets created automatically!! (some eval'ed code from the soap request)
261 return $this->proxy->doSpellingSuggestion(GOOGLE_LICENSE_KEY,
270 // c-hanging-comment-ender-p: nil
271 // indent-tabs-mode: nil