7 Class: parseCSV v0.4.3 beta
8 http://code.google.com/p/parsecsv-for-php/
11 Fully conforms to the specifications lined out on wikipedia:
12 - http://en.wikipedia.org/wiki/Comma-separated_values
14 Based on the concept of Ming Hong Ng's CsvFileParser class:
15 - http://minghong.blogspot.com/2006/07/csv-parser-for-php.html
19 Copyright (c) 2007 Jim Myhrberg (jim@zydev.info).
21 Permission is hereby granted, free of charge, to any person obtaining a copy
22 of this software and associated documentation files (the "Software"), to deal
23 in the Software without restriction, including without limitation the rights
24 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25 copies of the Software, and to permit persons to whom the Software is
26 furnished to do so, subject to the following conditions:
28 The above copyright notice and this permission notice shall be included in
29 all copies or substantial portions of the Software.
31 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
34 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44 $csv = new parseCSV('data.csv');
47 # tab delimited, and encoding conversion
48 $csv = new parseCSV();
49 $csv->encoding('UTF-16', 'UTF-8');
50 $csv->delimiter = "\t";
51 $csv->parse('data.tsv');
54 # auto-detect delimiter character
55 $csv = new parseCSV();
56 $csv->auto('data.csv');
59 # modify data in a csv file
60 $csv = new parseCSV();
62 $csv->parse('data.csv');
63 # "4" is the value of the "id" column of the CSV row
64 $csv->data[4] = array('firstname' => 'John', 'lastname' => 'Doe', 'email' => 'john@doe.com');
67 # add row/entry to end of CSV file
68 # - only recommended when you know the extact sctructure of the file
69 $csv = new parseCSV();
70 $csv->save('data.csv', array('1986', 'Home', 'Nowhere', ''), true);
72 # convert 2D array to csv data and send headers
73 # to browser to treat output as a file and download it
74 $csv = new parseCSV();
75 $csv->output (true, 'movies.csv', $array);
84 * - set these options with $object->var_name = 'value';
87 # use first line/entry as field names
90 # override field names
91 var $fields = array();
93 # sort entries by this field
95 var $sort_reverse = false;
97 # sort behavior passed to ksort/krsort functions
98 # regular = SORT_REGULAR
99 # numeric = SORT_NUMERIC
100 # string = SORT_STRING
101 var $sort_type = null;
103 # delimiter (comma) and enclosure (double quote)
104 var $delimiter = ',';
105 var $enclosure = '"';
107 # basic SQL-like conditions for row matching
108 var $conditions = null;
110 # number of rows to ignore from beginning of data
113 # limits the number of returned rows to specified amount
116 # number of rows to analyze when attempting to auto-detect delimiter
117 var $auto_depth = 15;
119 # characters to ignore when attempting to auto-detect delimiter
120 var $auto_non_chars = "a-zA-Z0-9\n\r";
122 # preferred delimiter characters, only used when all filtering method
123 # returns multiple possible delimiters (happens very rarely)
124 var $auto_preferred = ",;\t.:|";
126 # character encoding options
127 var $convert_encoding = false;
128 var $input_encoding = 'ISO-8859-1';
129 var $output_encoding = 'ISO-8859-1';
131 # used by unparse(), save(), and output() functions
132 var $linefeed = "\r\n";
134 # only used by output() function
135 var $output_delimiter = ',';
136 var $output_filename = 'data.csv';
138 # keep raw file data in memory after successful parsing (useful for debugging)
139 var $keep_file_data = false;
148 # loaded file contents
151 # error while parsing input data
152 # 0 = No errors found. Everything should be fine :)
153 # 1 = Hopefully correctable syntax error was found.
154 # 2 = Enclosure character (double quote by default)
155 # was found in non-enclosed field. This means
156 # the file is either corrupt, or does not
157 # standard CSV formatting. Please validate
158 # the parsed data yourself.
161 # detailed error info
162 var $error_info = array();
164 # array of field values in data parsed
165 var $titles = array();
167 # two dimensional array of CSV data
173 * @param input CSV file or string
176 function parseCSV ($input = null, $offset = null, $limit = null, $conditions = null) {
177 if ( $offset !== null ) $this->offset = $offset;
178 if ( $limit !== null ) $this->limit = $limit;
179 if ( count($conditions) > 0 ) $this->conditions = $conditions;
180 if ( !empty($input) ) $this->parse($input);
184 // ==============================================
185 // ----- [ Main Functions ] ---------------------
186 // ==============================================
189 * Parse CSV file or string
190 * @param input CSV file or string
193 function parse ($input = null, $offset = null, $limit = null, $conditions = null) {
194 if ( $input === null ) $input = $this->file;
195 if ( !empty($input) ) {
196 if ( $offset !== null ) $this->offset = $offset;
197 if ( $limit !== null ) $this->limit = $limit;
198 if ( count($conditions) > 0 ) $this->conditions = $conditions;
199 if ( is_readable($input) ) {
200 $this->data = $this->parse_file($input);
202 $this->file_data = &$input;
203 $this->data = $this->parse_string();
205 if ( $this->data === false ) return false;
211 * Save changes, or new file and/or data
212 * @param file file to save to
213 * @param data 2D array with data
214 * @param append append current data to end of target CSV if exists
215 * @param fields field names
216 * @return true or false
218 function save ($file = null, $data = array(), $append = false, $fields = array()) {
219 if ( empty($file) ) $file = &$this->file;
220 $mode = ( $append ) ? 'at' : 'wt' ;
221 $is_php = ( preg_match('/\.php$/i', $file) ) ? true : false ;
222 return $this->_wfile($file, $this->unparse($data, $fields, $append, $is_php), $mode);
226 * Generate CSV based string for output
227 * @param filename if specified, headers and data will be output directly to browser as a downloable file
228 * @param data 2D array with data
229 * @param fields field names
230 * @param delimiter delimiter used to separate data
231 * @return CSV data using delimiter of choice, or default
233 function output ($filename = null, $data = array(), $fields = array(), $delimiter = null) {
234 if ( empty($filename) ) $filename = $this->output_filename;
235 if ( $delimiter === null ) $delimiter = $this->output_delimiter;
236 $data = $this->unparse($data, $fields, null, null, $delimiter);
237 if ( $filename !== null ) {
238 header('Content-type: application/csv');
239 header('Content-Disposition: attachment; filename="'.$filename.'"');
246 * Convert character encoding
247 * @param input input character encoding, uses default if left blank
248 * @param output output character encoding, uses default if left blank
251 function encoding ($input = null, $output = null) {
252 $this->convert_encoding = true;
253 if ( $input !== null ) $this->input_encoding = $input;
254 if ( $output !== null ) $this->output_encoding = $output;
258 * Auto-Detect Delimiter: Find delimiter by analyzing a specific number of
259 * rows to determine most probable delimiter character
260 * @param file local CSV file
261 * @param parse true/false parse file directly
262 * @param search_depth number of rows to analyze
263 * @param preferred preferred delimiter characters
264 * @param enclosure enclosure character, default is double quote (").
265 * @return delimiter character
267 function auto ($file = null, $parse = true, $search_depth = null, $preferred = null, $enclosure = null) {
269 if ( $file === null ) $file = $this->file;
270 if ( empty($search_depth) ) $search_depth = $this->auto_depth;
271 if ( $enclosure === null ) $enclosure = $this->enclosure;
272 else $this->enclosure = $enclosure;
274 if ( $preferred === null ) $preferred = $this->auto_preferred;
276 if ( empty($this->file_data) ) {
277 if ( $this->_check_data($file) ) {
278 $data = &$this->file_data;
281 $data = &$this->file_data;
285 $strlen = strlen($data);
290 // walk specific depth finding possible delimiter characters
291 for ( $i=0; $i < $strlen; $i++ ) {
293 $nch = ( isset($data{$i+1}) ) ? $data{$i+1} : false ;
294 $pch = ( isset($data{$i-1}) ) ? $data{$i-1} : false ;
296 // open and closing quotes
297 if ( $ch == $enclosure ) {
298 if ( !$enclosed || $nch != $enclosure ) {
299 $enclosed = ( $enclosed ) ? false : true ;
300 } elseif ( $enclosed ) {
305 } elseif ( ($ch == "\n" && $pch != "\r" || $ch == "\r") && !$enclosed ) {
306 if ( $n >= $search_depth ) {
314 } elseif (!$enclosed) {
315 if ( !preg_match('/['.preg_quote($this->auto_non_chars, '/').']/i', $ch) ) {
316 if ( !isset($chars[$ch][$n]) ) {
326 $depth = ( $to_end ) ? $n-1 : $n ;
328 foreach( $chars as $char => $value ) {
329 if ( $match = $this->_check_count($char, $value, $depth, $preferred) ) {
330 $filtered[$match] = $char;
334 // capture most probable delimiter
336 $this->delimiter = reset($filtered);
339 if ( $parse ) $this->data = $this->parse_string();
341 return $this->delimiter;
346 // ==============================================
347 // ----- [ Core Functions ] ---------------------
348 // ==============================================
351 * Read file to string and call parse_string()
352 * @param file local CSV file
353 * @return 2D array with CSV data, or false on failure
355 function parse_file ($file = null) {
356 if ( $file === null ) $file = $this->file;
357 if ( empty($this->file_data) ) $this->load_data($file);
358 return ( !empty($this->file_data) ) ? $this->parse_string() : false ;
362 * Parse CSV strings to arrays
363 * @param data CSV string
364 * @return 2D array with CSV data, or false on failure
366 function parse_string ($data = null) {
367 if ( empty($data) ) {
368 if ( $this->_check_data() ) {
369 $data = &$this->file_data;
373 $white_spaces = str_replace($this->delimiter, '', " \t\x0B\0");
379 $head = ( !empty($this->fields) ) ? $this->fields : array() ;
382 $was_enclosed = false;
383 $strlen = strlen($data);
385 // walk through each character
386 for ( $i=0; $i < $strlen; $i++ ) {
388 $nch = ( isset($data{$i+1}) ) ? $data{$i+1} : false ;
389 $pch = ( isset($data{$i-1}) ) ? $data{$i-1} : false ;
391 // open/close quotes, and inline quotes
392 if ( $ch == $this->enclosure ) {
394 if ( ltrim($current, $white_spaces) == '' ) {
396 $was_enclosed = true;
399 $error_row = count($rows) + 1;
400 $error_col = $col + 1;
401 if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) {
402 $this->error_info[$error_row.'-'.$error_col] = array(
404 'info' => 'Syntax error found on row '.$error_row.'. Non-enclosed fields can not contain double-quotes.',
406 'field' => $error_col,
407 'field_name' => (!empty($head[$col])) ? $head[$col] : null,
412 } elseif ($nch == $this->enclosure) {
415 } elseif ( $nch != $this->delimiter && $nch != "\r" && $nch != "\n" ) {
416 for ( $x=($i+1); isset($data{$x}) && ltrim($data{$x}, $white_spaces) == ''; $x++ ) {}
417 if ( $data{$x} == $this->delimiter ) {
421 if ( $this->error < 1 ) {
424 $error_row = count($rows) + 1;
425 $error_col = $col + 1;
426 if ( !isset($this->error_info[$error_row.'-'.$error_col]) ) {
427 $this->error_info[$error_row.'-'.$error_col] = array(
430 'Syntax error found on row '.(count($rows) + 1).'. '.
431 'A single double-quote was found within an enclosed string. '.
432 'Enclosed double-quotes must be escaped with a second double-quote.',
433 'row' => count($rows) + 1,
435 'field_name' => (!empty($head[$col])) ? $head[$col] : null,
446 } elseif ( ($ch == $this->delimiter || $ch == "\n" || $ch == "\r") && !$enclosed ) {
447 $key = ( !empty($head[$col]) ) ? $head[$col] : $col ;
448 $row[$key] = ( $was_enclosed ) ? $current : trim($current) ;
450 $was_enclosed = false;
454 if ( $ch == "\n" || $ch == "\r" ) {
455 if ( $this->_validate_offset($row_count) && $this->_validate_row_conditions($row, $this->conditions) ) {
456 if ( $this->heading && empty($head) ) {
458 } elseif ( empty($this->fields) || (!empty($this->fields) && (($this->heading && $row_count > 0) || !$this->heading)) ) {
459 if ( !empty($this->sort_by) && !empty($row[$this->sort_by]) ) {
460 if ( isset($rows[$row[$this->sort_by]]) ) {
461 $rows[$row[$this->sort_by].'_0'] = &$rows[$row[$this->sort_by]];
462 unset($rows[$row[$this->sort_by]]);
463 for ( $sn=1; isset($rows[$row[$this->sort_by].'_'.$sn]); $sn++ ) {}
464 $rows[$row[$this->sort_by].'_'.$sn] = $row;
465 } else $rows[$row[$this->sort_by]] = $row;
466 } else $rows[] = $row;
472 if ( $this->sort_by === null && $this->limit !== null && count($rows) == $this->limit ) {
475 if ( $ch == "\r" && $nch == "\n" ) $i++;
478 // append character to current field
483 $this->titles = $head;
484 if ( !empty($this->sort_by) ) {
485 $sort_type = SORT_REGULAR;
486 if ( $this->sort_type == 'numeric' ) {
487 $sort_type = SORT_NUMERIC;
488 } elseif ( $this->sort_type == 'string' ) {
489 $sort_type = SORT_STRING;
491 ( $this->sort_reverse ) ? krsort($rows, $sort_type) : ksort($rows, $sort_type) ;
492 if ( $this->offset !== null || $this->limit !== null ) {
493 $rows = array_slice($rows, ($this->offset === null ? 0 : $this->offset) , $this->limit, true);
496 if ( !$this->keep_file_data ) {
497 $this->file_data = null;
503 * Create CSV data from array
504 * @param data 2D array with data
505 * @param fields field names
506 * @param append if true, field names will not be output
507 * @param is_php if a php die() call should be put on the first
508 * line of the file, this is later ignored when read.
509 * @param delimiter field delimiter to use
510 * @return CSV data (text string)
512 function unparse ( $data = array(), $fields = array(), $append = false , $is_php = false, $delimiter = null) {
513 if ( !is_array($data) || empty($data) ) $data = &$this->data;
514 if ( !is_array($fields) || empty($fields) ) $fields = &$this->titles;
515 if ( $delimiter === null ) $delimiter = $this->delimiter;
517 $string = ( $is_php ) ? "<?php header('Status: 403'); die(' '); ?>".$this->linefeed : '' ;
521 if ( $this->heading && !$append && !empty($fields) ) {
522 foreach( $fields as $key => $value ) {
523 $entry[] = $this->_enclose_value($value);
525 $string .= implode($delimiter, $entry).$this->linefeed;
530 foreach( $data as $key => $row ) {
531 foreach( $row as $field => $value ) {
532 $entry[] = $this->_enclose_value($value);
534 $string .= implode($delimiter, $entry).$this->linefeed;
542 * Load local file or string
543 * @param input local CSV file
544 * @return true or false
546 function load_data ($input = null) {
549 if ( $input === null ) {
551 } elseif ( file_exists($input) ) {
556 if ( !empty($data) || $data = $this->_rfile($file) ) {
557 if ( $this->file != $file ) $this->file = $file;
558 if ( preg_match('/\.php$/i', $file) && preg_match('/<\?.*?\?>(.*)/ims', $data, $strip) ) {
559 $data = ltrim($strip[1]);
561 if ( $this->convert_encoding ) $data = iconv($this->input_encoding, $this->output_encoding, $data);
562 if ( substr($data, -1) != "\n" ) $data .= "\n";
563 $this->file_data = &$data;
570 // ==============================================
571 // ----- [ Internal Functions ] -----------------
572 // ==============================================
575 * Validate a row against specified conditions
576 * @param row array with values from a row
577 * @param conditions specified conditions that the row must match
578 * @return true of false
580 function _validate_row_conditions ($row = array(), $conditions = null) {
581 if ( !empty($row) ) {
582 if ( !empty($conditions) ) {
583 $conditions = (strpos($conditions, ' OR ') !== false) ? explode(' OR ', $conditions) : array($conditions) ;
585 foreach( $conditions as $key => $value ) {
586 if ( strpos($value, ' AND ') !== false ) {
587 $value = explode(' AND ', $value);
589 foreach( $value as $k => $v ) {
590 $and .= $this->_validate_row_condition($row, $v);
592 $or .= (strpos($and, '0') !== false) ? '0' : '1' ;
594 $or .= $this->_validate_row_condition($row, $value);
597 return (strpos($or, '1') !== false) ? true : false ;
605 * Validate a row against a single condition
606 * @param row array with values from a row
607 * @param condition specified condition that the row must match
608 * @return true of false
610 function _validate_row_condition ($row, $condition) {
615 '>', 'is greater than',
616 '<=', 'is less than or equals',
617 '>=', 'is greater than or equals',
621 $operators_regex = array();
622 foreach( $operators as $value ) {
623 $operators_regex[] = preg_quote($value, '/');
625 $operators_regex = implode('|', $operators_regex);
626 if ( preg_match('/^(.+) ('.$operators_regex.') (.+)$/i', trim($condition), $capture) ) {
627 $field = $capture[1];
629 $value = $capture[3];
630 if ( preg_match('/^([\'\"]{1})(.*)([\'\"]{1})$/i', $value, $capture) ) {
631 if ( $capture[1] == $capture[3] ) {
632 $value = $capture[2];
633 $value = str_replace("\\n", "\n", $value);
634 $value = str_replace("\\r", "\r", $value);
635 $value = str_replace("\\t", "\t", $value);
636 $value = stripslashes($value);
639 if ( array_key_exists($field, $row) ) {
640 if ( ($op == '=' || $op == 'equals' || $op == 'is') && $row[$field] == $value ) {
642 } elseif ( ($op == '!=' || $op == 'is not') && $row[$field] != $value ) {
644 } elseif ( ($op == '<' || $op == 'is less than' ) && $row[$field] < $value ) {
646 } elseif ( ($op == '>' || $op == 'is greater than') && $row[$field] > $value ) {
648 } elseif ( ($op == '<=' || $op == 'is less than or equals' ) && $row[$field] <= $value ) {
650 } elseif ( ($op == '>=' || $op == 'is greater than or equals') && $row[$field] >= $value ) {
652 } elseif ( $op == 'contains' && preg_match('/'.preg_quote($value, '/').'/i', $row[$field]) ) {
654 } elseif ( $op == 'does not contain' && !preg_match('/'.preg_quote($value, '/').'/i', $row[$field]) ) {
665 * Validates if the row is within the offset or not if sorting is disabled
666 * @param current_row the current row number being processed
667 * @return true of false
669 function _validate_offset ($current_row) {
670 if ( $this->sort_by === null && $this->offset !== null && $current_row < $this->offset ) return false;
675 * Enclose values if needed
676 * - only used by unparse()
677 * @param value string to process
678 * @return Processed value
680 function _enclose_value ($value = null) {
681 if ( $value !== null && $value != '' ) {
682 $delimiter = preg_quote($this->delimiter, '/');
683 $enclosure = preg_quote($this->enclosure, '/');
684 if ( preg_match("/".$delimiter."|".$enclosure."|\n|\r/i", $value) || ($value{0} == ' ' || substr($value, -1) == ' ') ) {
685 $value = str_replace($this->enclosure, $this->enclosure.$this->enclosure, $value);
686 $value = $this->enclosure.$value.$this->enclosure;
694 * @param file local filename
695 * @return true or false
697 function _check_data ($file = null) {
698 if ( empty($this->file_data) ) {
699 if ( $file === null ) $file = $this->file;
700 return $this->load_data($file);
707 * Check if passed info might be delimiter
708 * - only used by find_delimiter()
709 * @return special string used for delimiter selection, or false
711 function _check_count ($char, $array, $depth, $preferred) {
712 if ( $depth == count($array) ) {
716 foreach( $array as $key => $value ) {
717 if ( $first == null ) {
719 } elseif ( $value == $first && $equal !== false) {
721 } elseif ( $value == $first+1 && $equal !== false ) {
729 $match = ( $almost ) ? 2 : 1 ;
730 $pref = strpos($preferred, $char);
731 $pref = ( $pref !== false ) ? str_pad($pref, 3, '0', STR_PAD_LEFT) : '999' ;
732 return $pref.$match.'.'.(99999 - str_pad($first, 5, '0', STR_PAD_LEFT));
739 * @param file local filename
740 * @return Data from file, or false on failure
742 function _rfile ($file = null) {
743 if ( is_readable($file) ) {
744 if ( !($fh = fopen($file, 'r')) ) return false;
745 $data = fread($fh, filesize($file));
753 * Write to local file
754 * @param file local filename
755 * @param string data to write to file
756 * @param mode fopen() mode
757 * @param lock flock() mode
758 * @return true or false
760 function _wfile ($file, $string = '', $mode = 'wb', $lock = 2) {
761 if ( $fp = fopen($file, $mode) ) {
763 $re = fwrite($fp, $string);
765 if ( $re != false && $re2 != false ) return true;