4 * SQL Parser from: http://code.google.com/p/php-sql-parser/
9 var $reserved = array();
10 var $functions = array();
11 function __construct($sql = false) {
12 #LOAD THE LIST OF RESERVED WORDS
13 $this->load_reserved_words();
14 if($sql) $this->parse($sql);
17 function parse($sql) {
20 #lex the SQL statement
21 $in = $this->split_sql($sql);
23 #sometimes the parser needs to skip ahead until a particular
27 #this is the output tree which is being parsed
30 #This is the last type of union used (UNION or UNION ALL)
31 #indicates a) presence of at least one union in this query
32 # b) the type of union if this is the first or last query
35 #Sometimes a "query" consists of more than one query (like a UNION query)
36 #this array holds all the queries
39 #This is the highest level lexical analysis. This is the part of the
40 #code which finds UNION and UNION ALL query parts
41 foreach($in as $key => $token) {
46 if(strtoupper($token) == $skip_until) {
55 if(strtoupper($token) == "UNION") {
57 for($i=$key+1;$i<count($in);++$i) {
58 if(trim($in[$i]) == '') continue;
59 if(strtoupper($in[$i]) == 'ALL') {
68 $queries[$union][] = $out;
79 $queries[$union][] = $out;
86 /*MySQL supports a special form of UNION:
91 This block handles this query syntax. Only one such subquery
92 is supported in each UNION block. (select)(select)union(select) is not legal.
93 The extra queries will be silently ignored.
95 $union_types = array('UNION','UNION ALL');
96 foreach($union_types as $union_type) {
97 if(!empty($queries[$union_type])) {
98 foreach($queries[$union_type] as $i => $tok_list) {
99 foreach($tok_list as $z => $tok) {
102 if(preg_match('/^\\(\\s*select\\s*/i', $tok)) {
103 $queries[$union_type][$i] = $this->parse(substr($tok,1,-1));
106 $queries[$union_type][$i] = $this->process_sql($queries[$union_type][$i]);
115 /* If there was no UNION or UNION ALL in the query, then the query is
116 stored at $queries[0].
118 if(!empty($queries[0])) {
119 $queries[0] = $this->process_sql($queries[0]);
123 if(count($queries) == 1 && !$union) {
124 $queries = $queries[0];
127 $this->parsed = $queries;
128 return $this->parsed;
131 #This function counts open and close parenthesis and
132 #returns their location. This might be faster as a regex
133 private function count_paren($token,$chars=array('(',')')) {
134 $len = strlen($token);
137 for($i=0;$i<$len;++$i){
138 if($token[$i] == $chars[0]) {
140 } elseif($token[$i] == $chars[1]) {
145 return array('open' => $open, 'close' => $close, 'balanced' =>( count($close) - count($open)));
148 #This function counts open and close parenthesis and
149 #returns their location. This might be faster as a regex
150 private function count_backtick($token) {
151 $len = strlen($token);
153 for($i=0;$i<$len;++$i){
154 if($token[$i] == '`') ++$cnt;
160 #this function splits up a SQL statement into easy to "parse"
161 #tokens for the SQL processor
162 private function split_sql($sql) {
164 if(!is_string($sql)) {
168 $sql = str_replace(array('\\\'','\\"',"\r\n","\n","()"),array("''",'""'," "," "," "), $sql);
170 /(`(?:[^`]|``)`|[@A-Za-z0-9_.`-]+(?:\(\s*\)){0,1})
171 |(\+|-|\*|\/|!=|>=|<=|<>|>|<|&&|\|\||=|\^)
172 |(\(.*?\)) # Match FUNCTION(...) OR BAREWORDS
180 $tokens = preg_split($regex, $sql,-1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
181 $token_count = count($tokens);
183 /* The above regex has one problem, because the parenthetical match is not greedy.
184 Thus, when matching grouped expresions such as ( (a and b) or c) the
185 tokenizer will produce "( (a and b)", " ", "or", " " , "c,")"
187 This block detects the number of open/close parens in the given token. If the parens are balanced
188 (balanced == 0) then we don't need to do anything.
190 otherwise, we need to balance the expression.
193 for($i=0;$i<$token_count;++$i) {
195 if(empty($tokens[$i])) continue;
197 $token = $tokens[$i];
198 $trim = trim($token);
201 && substr($trim,-1) == ')') {
202 $trim=trim(substr($trim,0,
209 if($token && $token[0] == '(') {
210 $info = $this->count_paren($token);
211 if($info['balanced'] == 0) {
215 #we need to find this many closing parens
216 $needed = abs($info['balanced']);
218 while($needed > 0 && $n <$token_count-1) {
220 #echo "LOOKING FORWARD TO $n [ " . $tokens[$n] . "]\n";
221 $token2 = $tokens[$n];
222 $info2 = $this->count_paren($token2);
223 $closes = count($info2['close']);
224 if($closes != $needed) {
225 $tokens[$i] .= $tokens[$n];
228 $info2 = $this->count_paren($tokens[$i]);
229 $needed = abs($info2['balanced']);
230 # echo "CLOSES LESS THAN NEEDED (still need $needed)\n";
232 /*get the string pos of the last close paren we need*/
233 $pos = $info2['close'][count($info2['close'])-1];
238 $str1 = substr($tokens[$n],0,$pos) . ')';
239 $str2 = substr($tokens[$n],$pos+1);
241 #echo "CLOSES FOUND AT $n, offset:$pos [$str1] [$str2]\n";
242 if(strlen($str2) > 0) {
248 $tokens[$i] .= $str1;
249 $info2 = $this->count_paren($tokens[$i]);
250 $needed = abs($info2['balanced']);
257 #the same problem appears with backticks :(
259 /* reset the array if we deleted any tokens above */
260 if ($reset) $tokens = array_values($tokens);
262 $token_count=count($tokens);
263 for($i=0;$i<$token_count;++$i) {
264 if(empty($tokens[$i])) continue;
268 if($needed && $token && strpos($token,'`') !== false) {
269 $info = $this->count_backtick($token);
270 if($info %2 == 0) { #even number of backticks means we are balanced
276 while($needed && $n <$token_count-1) {
278 #echo "BACKTICK COUNT[$i]: $info old: {$tokens[$i]}, new: ($token)\n";
280 $token .= $tokens[$n];
282 $needed = $this->count_backtick($token) % 2;
285 if($reset) $tokens[$i] = $token;
288 /* reset the array if we deleted any tokens above */
289 $tokens = array_values($tokens);
295 /* This function breaks up the SQL statement into logical sections.
296 Some sections are then further handled by specialized functions.
298 private function process_sql(&$tokens,$start_at = 0, $stop_at = false) {
300 $start = microtime(true);
301 $token_category = "";
304 $token_count = count($tokens);
307 $stop_at = $token_count;
312 for($token_number = $start_at;$token_number<$stop_at;++$token_number) {
313 $token = trim($tokens[$token_number]);
314 if($token && $token[0] == '(' && $token_category == "") {
315 $token_category = 'SELECT';
318 /* If it isn't obvious, when $skip_next is set, then we ignore the next real
319 token, that is we ignore whitespace.
322 #whitespace does not count as a next token
327 #to skip the token we replace it with whitespace
332 $upper = strtoupper($token);
335 /* Tokens that get their own sections. These keywords have subclauses. */
375 if($token == 'DEALLOCATE') {
378 /* this FROM is different from FROM in other DML (not join related) */
379 if($token_category == 'PREPARE' && $upper == 'FROM') {
383 $token_category = $upper;
384 #$join_type = 'JOIN';
385 if($upper == 'FROM' && $token_category == 'FROM') {
393 /* These tokens get their own section, but have no subclauses.
394 These tokens identify the statement but have no specific subclauses of their own. */
424 $token_category = $upper; /* set the category in case these get subclauses
425 in a future version of MySQL */
426 $out[$upper][0] = $upper;
430 /* This is either LOCK TABLES or SELECT ... LOCK IN SHARE MODE*/
432 if($token_category == "") {
433 $token_category = $upper;
434 $out[$upper][0] = $upper;
436 $token = 'LOCK IN SHARE MODE';
438 $out['OPTIONS'][] = $token;
444 /* USING in FROM clause is different from USING w/ prepared statement*/
445 if($token_category == 'EXECUTE') {
446 $token_category=$upper;
449 if($token_category == 'FROM' && !empty($out['DELETE'])) {
450 $token_category=$upper;
455 /* DROP TABLE is different from ALTER TABLE DROP ... */
457 if($token_category != 'ALTER') {
458 $token_category = $upper;
459 $out[$upper][0] = $upper;
466 $out['OPTIONS'][] = 'FOR UPDATE';
472 if($token_category == "" ) {
473 $token_category = $upper;
477 if($token_category == 'DUPLICATE') {
485 $out[$upper][0] = $upper;
489 /* These tokens are ignored. */
501 if($token_category == 'DUPLICATE') {
506 /* These tokens set particular options for the statement. They never stand alone.*/
510 case 'HIGH_PRIORITY':
515 case 'STRAIGHT_JOIN':
516 case 'SQL_SMALL_RESULT':
517 case 'SQL_BIG_RESULT':
519 case 'SQL_BUFFER_RESULT':
522 case 'SQL_CALC_FOUND_ROWS':
523 $out['OPTIONS'][] = $upper;
528 if($token_category == 'GROUP') {
530 $out['OPTIONS'][] = 'WITH ROLLUP';
548 if($prev_category == $token_category) {
549 $out[$token_category][] = $token;
552 $prev_category = $token_category;
555 if(!$out) return false;
558 #process the SELECT clause
559 if(!empty($out['SELECT'])) $out['SELECT'] = $this->process_select($out['SELECT']);
561 if(!empty($out['FROM'])) $out['FROM'] = $this->process_from($out['FROM']);
562 if(!empty($out['USING'])) $out['USING'] = $this->process_from($out['USING']);
563 if(!empty($out['UPDATE'])) $out['UPDATE'] = $this->process_from($out['UPDATE']);
565 if(!empty($out['GROUP'])) $out['GROUP'] = $this->process_group($out['GROUP'], $out['SELECT']);
566 if(!empty($out['ORDER'])) $out['ORDER'] = $this->process_group($out['ORDER'], $out['SELECT']);
568 if(!empty($out['LIMIT'])) $out['LIMIT'] = $this->process_limit($out['LIMIT']);
570 if(!empty($out['WHERE'])) $out['WHERE'] = $this->process_expr_list($out['WHERE']);
571 if(!empty($out['HAVING'])) $out['HAVING'] = $this->process_expr_list($out['HAVING']);
572 if(!empty($out['SET'])) $out['SET'] = $this->process_set_list($out['SET']);
573 if(!empty($out['DUPLICATE'])) {
574 $out['ON DUPLICATE KEY UPDATE'] = $this->process_set_list($out['DUPLICATE']);
575 unset($out['DUPLICATE']);
577 if(!empty($out['INSERT'])) $out = $this->process_insert($out);
578 if(!empty($out['REPLACE'])) $out = $this->process_insert($out,'REPLACE');
579 if(!empty($out['DELETE'])) $out = $this->process_delete($out);
585 /* A SET list is simply a list of key = value expressions separated by comma (,).
586 This function produces a list of the key/value expressions.
588 private function process_set_list($tokens) {
591 foreach($tokens as $token) {
594 if($token === false || empty($token)) continue;
599 if($token == '=') continue;
602 $expr[] = array('column' => trim($column), 'expr' => trim($expression));
603 $expression = $column = "";
607 $expression .= $token;
610 $expr[] = array('column' => trim($column), 'expr' => trim($expression));
616 /* This function processes the LIMIT section.
617 start,end are set. If only end is provided in the query
618 then start is set to 0.
620 private function process_limit($tokens) {
624 if($pos = array_search(',',$tokens)) {
625 for($i=0;$i<$pos;++$i) {
626 if($tokens[$i] != '') {
627 $start = $tokens[$i];
637 for($i=$pos;$i<count($tokens);++$i) {
638 if($tokens[$i] != '') {
644 return array('start' => $start, 'end' => $end);
647 /* This function processes the SELECT section. It splits the clauses at the commas.
648 Each clause is then processed by process_select_expr() and the results are added to
651 Finally, at the end, the epxression list is returned.
653 private function process_select(&$tokens) {
656 foreach($tokens as $token) {
658 $expr[] = $this->process_select_expr(trim($expression));
661 if($token === "" || $token===false) $token=" ";
662 $expression .= $token ;
665 if($expression) $expr[] = $this->process_select_expr(trim($expression));
669 /* This fuction processes each SELECT clause. We determine what (if any) alias
670 is provided, and we set the type of expression.
672 private function process_select_expr($expression) {
675 $base_expression = $expression;
676 $upper = trim(strtoupper($expression));
677 #if necessary, unpack the expression
678 if($upper[0] == '(') {
679 #$expression = substr($expression,1,-1);
680 $base_expression = $expression;
683 $tokens = $this->split_sql($expression);
684 $token_count = count($tokens);
686 /* Determine if there is an explicit alias after the AS clause.
687 If AS is found, then the next non-whitespace token is captured as the alias.
688 The tokens after (and including) the AS are removed.
695 for($i=0;$i<$token_count;++$i) {
696 $token = strtoupper($tokens[$i]);
698 $stripped[] = $tokens[$i];
709 $alias .= $tokens[$i];
714 $base_expr .= $tokens[$i];
717 $stripped = $this->process_expr_list($stripped);
718 $last = array_pop($stripped);
719 if(!$alias && $last['expr_type'] == 'colref') {
720 $prev = array_pop($stripped);
721 if($prev['expr_type'] == 'operator' ||
722 $prev['expr_type'] == 'const' ||
723 $prev['expr_type'] == 'function' ||
724 $prev['expr_type'] == 'expression' ||
725 #$prev['expr_type'] == 'aggregate_function' ||
726 $prev['expr_type'] == 'subquery' ||
727 $prev['expr_type'] == 'colref') {
728 $alias = $last['base_expr'];
730 #remove the last token
733 $base_expr = join("", $tokens);
740 $base_expr=join("", $tokens);
744 /* Properly escape the alias if it is not escaped */
745 if ($alias[0] != '`') {
746 $alias = '`' . str_replace('`','``',$alias) . '`';
751 if(substr(trim($base_expr),0,1) == '(') {
752 $base_expr = substr($expression,1,-1);
753 if(preg_match('/^sel/i', $base_expr)) {
755 $processed = $this->parse($base_expr);
759 $processed = $this->process_expr_list($tokens);
762 if(count($processed) == 1) {
763 $type = $processed[0]['expr_type'];
767 return array('expr_type'=>$type,'alias' => $alias, 'base_expr' => $base_expr, 'sub_tree' => $processed);
772 private function process_from(&$tokens) {
793 foreach($tokens as $token) {
795 $upper = strtoupper(trim($token));
797 if($skip_next && $token) {
807 if(preg_match("/^\\s*\\(\\s*select/i",$token)) {
809 $table = "DEPENDENT-SUBQUERY";
810 $sub_tree = $this->parse(trim($token,'() '));
825 $expression .= $token == '' ? " " : $token;
827 $ref_expr .= $token == '' ? " " : $token;
837 while($alias == "") {
838 $alias = trim($tokens[$i+$n]);
846 if($token_category == 'CREATE') {
847 $token_category = $upper;
864 # $expression .= $token;
879 case 'STRAIGHT_JOIN':
880 $join_type=$saved_join_type;
882 $modifier = $upper . " ";
893 $saved_join_type = ($modifier ? $modifier : 'JOIN');
895 $new_join_type = ($modifier ? $modifier : 'JOIN');
896 $join_type = $saved_join_type;
897 $saved_join_type = $new_join_type;
898 unset($new_join_type);
903 if(!trim($alias)) $alias = $table;
906 $sub_tree = $this->parse(trim($subquery,'()'));
907 $base_expr=$subquery;
910 if(substr(trim($table),0,1) == '(') {
911 $base_expr=trim($table,'() ');
913 $sub_tree = $this->process_from($this->split_sql($base_expr));
918 if($join_type == "") $join_type='JOIN';
919 $expr[] = array('table'=>$table, 'alias'=>$alias,'join_type'=>$join_type,'ref_type'=> $ref_type,'ref_clause'=>trim($ref_expr,'() '), 'base_expr' => $base_expr, 'sub_tree' => $sub_tree);
921 #$join_type=$saved_join_type;
925 $table = $alias = $expression = $base_expr = $ref_type = $ref_expr = "";
933 if($token === false || empty($token) || $token === "") continue;
935 if($token_count == 0 ) {
939 } else if($token_count == 1) {
947 if(substr(trim($table),0,1) == '(') {
948 $base_expr=trim($table,'() ');
950 $sub_tree = $this->process_from($this->split_sql($base_expr));
953 if(!trim($alias)) $alias = $table;
955 if($join_type == "") $saved_join_type='JOIN';
957 $expr[] = array('table'=>$table, 'alias'=>$alias,'join_type'=>$saved_join_type,'ref_type'=> $ref_type,'ref_clause'=> trim($ref_expr,'() '), 'base_expr' => $base_expr, 'sub_tree' => $sub_tree);
963 private function process_group(&$tokens, &$select) {
968 $type = "expression";
969 if(!$tokens) return false;
971 foreach($tokens as $token) {
972 switch(strtoupper($token)) {
974 $expression = trim($expression);
975 if($expression[0] != '`' || substr($expression,-1) != '`') {
976 $escaped = str_replace('`','``',$expression);
978 $escaped = $expression;
980 $escaped = '`' . $escaped . '`';
982 if(is_numeric(trim($expression))) {
986 #search to see if the expression matches an alias
987 foreach($select as $clause) {
988 if($clause['alias'] == $escaped) {
993 if(!$type) $type = "expression";
996 $out[]=array('type'=>$type,'base_expr'=>$expression,'direction'=>$direction);
1007 $direction = "DESC";
1011 $expression .= $token == '' ? ' ' : $token;
1017 $expression = trim($expression);
1018 if($expression[0] != '`' || substr($expression,-1) != '`') {
1019 $escaped = str_replace('`','``',$expression);
1021 $escaped = $expression;
1023 $escaped = '`' . $escaped . '`';
1025 if(is_numeric(trim($expression))) {
1029 #search to see if the expression matches an alias
1030 if(!$type && $select) {
1031 foreach($select as $clause) {
1032 if(!is_array($clause)) continue;
1033 if($clause['alias'] == $escaped) {
1041 if(!$type) $type = "expression";
1044 $out[]=array('type'=>$type,'base_expr'=>$expression,'direction'=>$direction);
1046 foreach($out as &$term) {
1047 if(!empty($term['base_expr'])) {
1048 $term['sub_tree'] = array($this->process_select_expr($term['base_expr']));
1055 /* Some sections are just lists of expressions, like the WHERE and HAVING clauses. This function
1056 processes these sections. Recursive.
1058 private function process_expr_list($tokens) {
1065 $in_lists = array();
1066 foreach($tokens as $key => $token) {
1068 if(strlen(trim($token))==0) continue;
1075 $upper = strtoupper(trim($token));
1076 if(trim($token)) $token=trim($token);
1078 /* is it a subquery?*/
1079 if(preg_match("/^\\s*\\(\\s*SELECT/i", $token)) {
1081 #tokenize and parse the subquery.
1082 #we remove the enclosing parenthesis for the tokenizer
1083 $processed = $this->parse(trim($token,' ()'));
1086 /* is it an inlist */
1087 } elseif( $upper[0] == '(' && substr($upper,-1) == ')' ) {
1088 if($prev_token == 'IN') {
1090 $processed = $this->split_sql(substr($token,1,-1));
1092 foreach($processed as $v) {
1093 if($v == ',') continue;
1101 elseif($prev_token == 'AGAINST') {
1102 $type = "match-arguments";
1103 $list = $this->split_sql(substr($token,1,-1));
1104 if(count($list) > 1){
1105 $match_mode = implode('',array_slice($list,1));
1106 $processed = array($list[0], $match_mode);
1109 $processed = $list[0];
1113 /* it is either an operator, a colref or a constant */
1171 if(is_numeric($token)) {
1179 #$processed = $token;
1183 /* is a reserved word? */
1184 if(($type != 'operator' && $type != 'in-list' && $type != 'sub_expr') && in_array($upper, $this->reserved)) {
1186 if(!in_array($upper,$this->functions)) {
1201 case 'GROUP_CONCAT':
1205 $type = 'aggregate_function';
1206 if(!empty($tokens[$key+1])) $sub_expr = $tokens[$key+1];
1212 if(!empty($tokens[$key+1])) $sub_expr = $tokens[$key+1]; else $sub_expr="()";
1222 if($upper[0] == '(') {
1223 $local_expr = substr(trim($token),1,-1);
1225 $local_expr = $token;
1227 $processed = $this->process_expr_list($this->split_sql($local_expr));
1228 $type = 'expression';
1230 // if(count($processed) == 1) {
1231 // $type = $processed[0]['expr_type'];
1232 // $base_expr = $processed[0]['base_expr'];
1233 // $processed = $processed[0]['sub_tree'];
1238 $sub_expr=trim($sub_expr);
1241 $expr[] = array( 'expr_type' => $type, 'base_expr' => $token, 'sub_tree' => $processed);
1242 $prev_token = $upper;
1247 $processed['sub_tree'] = $this->process_expr_list($this->split_sql(substr($sub_expr,1,-1)));
1250 if(!is_array($processed)) {
1255 $expr[] = array( 'expr_type' => $type, 'base_expr' => $token, 'sub_tree' => $processed);
1261 for($i=0;$i<count($expr);++$i){
1262 if($expr[$i]['expr_type'] == 'function' ||
1263 $expr[$i]['expr_type'] == 'aggregate_function') {
1264 if(!empty($expr[$i+1])) {
1265 $expr[$i]['sub_tree']=$expr[$i+1]['sub_tree'];
1268 ++$i; // BAD FORM TO MODIFY THE LOOP COUNTER
1276 if($mod) $expr=array_values($expr);
1282 private function process_update($tokens) {
1286 private function process_delete($tokens) {
1288 $del = $tokens['DELETE'];
1290 foreach($tokens['DELETE'] as $expression) {
1291 if ($expression != 'DELETE' && trim($expression,' .*') != "" && $expression != ',') {
1292 $tables[] = trim($expression,'.* ');
1296 if(empty($tables)) {
1297 foreach($tokens['FROM'] as $table) {
1298 $tables[] = $table['table'];
1302 $tokens['DELETE'] = array('TABLES' => $tables);
1307 private function process_insert($tokens, $token_category = 'INSERT') {
1311 $into = $tokens['INTO'];
1312 foreach($into as $token) {
1313 if(!trim($token)) continue;
1324 $cols = explode(",", trim($cols,'() '));
1326 unset($tokens['INTO']);
1327 $tokens[$token_category] = array('table'=>$table, 'cols'=>$cols);
1333 private function load_reserved_words() {
1335 $this->functions = array(
1515 'uncompressed_length',
1534 /* includes functions */
1535 $this->reserved = array(
1714 'uncompressed_length',
1768 'current_timestamp',
1857 'minute_microsecond',
1862 'no_write_to_binlog',
1892 'second_microsecond',
1908 'sql_calc_found_rows',
1911 'sql_tsi_frac_second',
1963 for($i=0;$i<count($this->reserved);++$i) {
1964 $this->reserved[$i]=strtoupper($this->reserved[$i]);
1965 if(!empty($this->functions[$i])) $this->functions[$i] = strtoupper($this->functions[$i]);