wpdb::strip_invalid_text( array $data )
Strips any invalid characters based on value/charset pairs.
Parameters
- $data
-
(array) (Required) Array of value arrays. Each value array has the keys 'value' and 'charset'. An optional 'ascii' key can be set to false to avoid redundant ASCII checks.
Return
(array|WP_Error) The $data parameter, with invalid characters removed from each value. This works as a passthrough: any additional keys such as 'field' are retained in each value array. If we cannot remove invalid characters, a WP_Error object is returned.
Source
File: wp-includes/wp-db.php
protected function strip_invalid_text( $data ) { $db_check_string = false; foreach ( $data as &$value ) { $charset = $value['charset']; if ( is_array( $value['length'] ) ) { $length = $value['length']['length']; $truncate_by_byte_length = 'byte' === $value['length']['type']; } else { $length = false; // Since we have no length, we'll never truncate. Initialize the variable to false. // True would take us through an unnecessary (for this case) codepath below. $truncate_by_byte_length = false; } // There's no charset to work with. if ( false === $charset ) { continue; } // Column isn't a string. if ( ! is_string( $value['value'] ) ) { continue; } $needs_validation = true; if ( // latin1 can store any byte sequence. 'latin1' === $charset || // ASCII is always OK. ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) ) { $truncate_by_byte_length = true; $needs_validation = false; } if ( $truncate_by_byte_length ) { mbstring_binary_safe_encoding(); if ( false !== $length && strlen( $value['value'] ) > $length ) { $value['value'] = substr( $value['value'], 0, $length ); } reset_mbstring_encoding(); if ( ! $needs_validation ) { continue; } } // utf8 can be handled by regex, which is a bunch faster than a DB lookup. if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) { $regex = '/ ( (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 | [\xE1-\xEC][\x80-\xBF]{2} | \xED[\x80-\x9F][\x80-\xBF] | [\xEE-\xEF][\x80-\xBF]{2}'; if ( 'utf8mb4' === $charset ) { $regex .= ' | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 | [\xF1-\xF3][\x80-\xBF]{3} | \xF4[\x80-\x8F][\x80-\xBF]{2} '; } $regex .= '){1,40} # ...one or more times ) | . # anything else /x'; $value['value'] = preg_replace( $regex, '$1', $value['value'] ); if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) { $value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' ); } continue; } // We couldn't use any local conversions, send it to the DB. $value['db'] = true; $db_check_string = true; } unset( $value ); // Remove by reference. if ( $db_check_string ) { $queries = array(); foreach ( $data as $col => $value ) { if ( ! empty( $value['db'] ) ) { // We're going to need to truncate by characters or bytes, depending on the length value we have. if ( isset( $value['length']['type'] ) && 'byte' === $value['length']['type'] ) { // Using binary causes LEFT() to truncate by bytes. $charset = 'binary'; } else { $charset = $value['charset']; } if ( $this->charset ) { $connection_charset = $this->charset; } else { if ( $this->use_mysqli ) { $connection_charset = mysqli_character_set_name( $this->dbh ); } else { $connection_charset = mysql_client_encoding(); } } if ( is_array( $value['length'] ) ) { $length = sprintf( '%.0f', $value['length']['length'] ); $queries[ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING $charset ), $length ) USING $connection_charset )", $value['value'] ); } elseif ( 'binary' !== $charset ) { // If we don't have a length, there's no need to convert binary - it will always return the same result. $queries[ $col ] = $this->prepare( "CONVERT( CONVERT( %s USING $charset ) USING $connection_charset )", $value['value'] ); } unset( $data[ $col ]['db'] ); } } $sql = array(); foreach ( $queries as $column => $query ) { if ( ! $query ) { continue; } $sql[] = $query . " AS x_$column"; } $this->check_current_query = false; $row = $this->get_row( 'SELECT ' . implode( ', ', $sql ), ARRAY_A ); if ( ! $row ) { return new WP_Error( 'wpdb_strip_invalid_text_failure' ); } foreach ( array_keys( $data ) as $column ) { if ( isset( $row[ "x_$column" ] ) ) { $data[ $column ]['value'] = $row[ "x_$column" ]; } } } return $data; }
Changelog
Version | Description |
---|---|
4.2.0 | Introduced. |
© 2003–2021 WordPress Foundation
Licensed under the GNU GPLv2+ License.
https://developer.wordpress.org/reference/classes/wpdb/strip_invalid_text