php内置函数分析之trim()

官方手册中:

类似函数还有两个:ltrim() 和 rtrim()。分别处理字符串的左侧、右侧。

trim()的具体实现位于:ext/standard/string.c

/* {{{ proto string trim(string str [, string character_mask])
   Strips whitespace from the beginning and end of a string */
PHP_FUNCTION(trim)
{
    php_do_trim(INTERNAL_FUNCTION_PARAM_PASSTHRU, 3);
}
/* }}} */
/* {{{ php_do_trim
 * Base for trim(), rtrim() and ltrim() functions.
 */
static void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode)
{
    zend_string *str;
    zend_string *what = NULL;

    ZEND_PARSE_PARAMETERS_START(1, 2)
        Z_PARAM_STR(str)
        Z_PARAM_OPTIONAL
        Z_PARAM_STR(what)
    ZEND_PARSE_PARAMETERS_END();

    ZVAL_STR(return_value, php_trim(str, (what ? ZSTR_VAL(what) : NULL), (what ? ZSTR_LEN(what) : 0), mode));
}
/* }}} */

具体实现:

  1 /* {{{ php_trim()
  2  * mode 1 : trim left
  3  * mode 2 : trim right
  4  * mode 3 : trim left and right
  5  * what indicates which chars are to be trimmed. NULL->default (' \t\n\r\v\0')
  6  */
  7 PHPAPI zend_string *php_trim(zend_string *str, char *what, size_t what_len, int mode)
  8 {
  9     const char *c = ZSTR_VAL(str);
 10     size_t len = ZSTR_LEN(str);
 11     register size_t i;
 12     size_t trimmed = 0;
 13     char mask[256];
 14 
 15     if (what) {
 16         if (what_len == 1) {
 17             char p = *what;
 18             if (mode & 1) {
 19                 for (i = 0; i < len; i++) {
 20                     if (c[i] == p) {
 21                         trimmed++;
 22                     } else {
 23                         break;
 24                     }
 25                 }
 26                 len -= trimmed;
 27                 c += trimmed;
 28             }
 29             if (mode & 2) {
 30                 if (len > 0) {
 31                     i = len - 1;
 32                     do {
 33                         if (c[i] == p) {
 34                             len--;
 35                         } else {
 36                             break;
 37                         }
 38                     } while (i-- != 0);
 39                 }
 40             }
 41         } else {
 42             php_charmask((unsigned char*)what, what_len, mask);
 43 
 44             if (mode & 1) {
 45                 for (i = 0; i < len; i++) {
 46                     if (mask[(unsigned char)c[i]]) {
 47                         trimmed++;
 48                     } else {
 49                         break;
 50                     }
 51                 }
 52                 len -= trimmed;
 53                 c += trimmed;
 54             }
 55             if (mode & 2) {
 56                 if (len > 0) {
 57                     i = len - 1;
 58                     do {
 59                         if (mask[(unsigned char)c[i]]) {
 60                             len--;
 61                         } else {
 62                             break;
 63                         }
 64                     } while (i-- != 0);
 65                 }
 66             }
 67         }
 68     } else {
 69         if (mode & 1) {
 70             for (i = 0; i < len; i++) {
 71                 if ((unsigned char)c[i] <= ' ' &&
 72                     (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || c[i] == '\t' || c[i] == '\v' || c[i] == '\0')) {
 73                     trimmed++;
 74                 } else {
 75                     break;
 76                 }
 77             }
 78             len -= trimmed;
 79             c += trimmed;
 80         }
 81         if (mode & 2) {
 82             if (len > 0) {
 83                 i = len - 1;
 84                 do {
 85                     if ((unsigned char)c[i] <= ' ' &&
 86                         (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || c[i] == '\t' || c[i] == '\v' || c[i] == '\0')) {
 87                         len--;
 88                     } else {
 89                         break;
 90                     }
 91                 } while (i-- != 0);
 92             }
 93         }
 94     }
 95 
 96     if (ZSTR_LEN(str) == len) {
 97         return zend_string_copy(str);
 98     } else {
 99         return zend_string_init(c, len, 0);
100     }
101 }
102 /* }}} */

ltrim()、rtrim() 或 trim()的参数二(what)存在时进入15行处的分支,参数二不存在时进入68行处的分支。

没有参数二时,会去除空格、"\t"、"\n"、"\r"、"\0"、"\v"

即:(unsigned char)c[i] <= ' ' && (c[i] == ' ' || c[i] == '\n' || c[i] == '\r' || c[i] == '\t' || c[i] == '\v' || c[i] == '\0')

存在参数二时,会去除参数二中指定的字符。

其中函数 php_charmask() 用于处理 trim("abc123456fjsklsf", "a..z") 中 字符范围 a...z

 1 static inline int php_charmask(unsigned char *input, size_t len, char *mask)
 2 {
 3     unsigned char *end;
 4     unsigned char c;
 5     int result = SUCCESS;
 6 
 7     memset(mask, 0, 256);
 8     for (end = input+len; input < end; input++) {
 9         c=*input;
10         if ((input+3 < end) && input[1] == '.' && input[2] == '.'
11                 && input[3] >= c) {
12             memset(mask+c, 1, input[3] - c + 1);
13             input+=3;
14         } else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
15             /* Error, try to be as helpful as possible:
16                (a range ending/starting with '.' won't be captured here) */
17             if (end-len >= input) { /* there was no 'left' char */
18                 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the left of '..'");
19                 result = FAILURE;
20                 continue;
21             }
22             if (input+2 >= end) { /* there is no 'right' char */
23                 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, no character to the right of '..'");
24                 result = FAILURE;
25                 continue;
26             }
27             if (input[-1] > input[2]) { /* wrong order */
28                 php_error_docref(NULL, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing");
29                 result = FAILURE;
30                 continue;
31             }
32             /* FIXME: better error (a..b..c is the only left possibility?) */
33             php_error_docref(NULL, E_WARNING, "Invalid '..'-range");
34             result = FAILURE;
35             continue;
36         } else {
37             mask[c]=1;
38         }
39     }
40     return result;
41 }

第二个参数是字符范围的时候,源码中并没有严格限制成 a..z 这样的格式。

总的流程就是,待处理的字符串左侧(或右侧)逐字符去匹配是否在将要去除的字符中(第二个参数设置的字符 或者 默认的那6个字符),

如果字符匹配到则进行下一个比较,否则中断匹配查找。返回余下字符串。

猜你喜欢

转载自www.cnblogs.com/natian-ws/p/9085227.html