引言

cctype是C++对ctype.h头文件的封装,这个文件里面定义了一系列字符识别和转换函数,我们一起来看看它们的作用和具体实现。

cctype头文件

源码位置

​www.aospxref.com/android-12.…​

36  #include <sys/cdefs.h>
37
38 #define __BIONIC_CTYPE_INLINE static __inline
39 #include <bits/ctype_inlines.h>

跳转到​​www.aospxref.com/android-12.…​

字符识别函数

isalnum---识别字符是否是字母或数字

66  /** Returns true if `ch` is in `[A-Za-z0-9]`. */
67 __BIONIC_CTYPE_INLINE int isalnum(int __ch) {
68 // `isalnum(c)` is `isalpha(c) || isdigit(c)`, but there's no obvious way
69 // to simplify that, and the table lookup is just slightly faster...
70 // Note that this is unsafe for inputs less than -1 (EOF) or greater than
71 // 0xff. This is true of other C libraries too.
72 return (_ctype_[__ch + 1] & (_CTYPE_U|_CTYPE_L|_CTYPE_N));
73 }

这里使用的是查表的方式进行判断,我们知道A-Z是0x41-0x5A,a-z是0x61-0x7A,0-9是0x30-0x39。

38  /** Internal implementation detail. Do not use. */
39 #define _CTYPE_U 0x01
40 /** Internal implementation detail. Do not use. */
41 #define _CTYPE_L 0x02
42 /** Internal implementation detail. Do not use. */
43 #define _CTYPE_D 0x04
44 /** Internal implementation detail. Do not use. */
45 #define _CTYPE_S 0x08
46 /** Internal implementation detail. Do not use. */
47 #define _CTYPE_P 0x10
48 /** Internal implementation detail. Do not use. */
49 #define _CTYPE_C 0x20
50 /** Internal implementation detail. Do not use. */
51 #define _CTYPE_X 0x40
52 /** Internal implementation detail. Do not use. */
53 #define _CTYPE_B 0x80
54 /** Internal implementation detail. Do not use. */
55 #define _CTYPE_R (_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
56 /** Internal implementation detail. Do not use. */
57 #define _CTYPE_A (_CTYPE_L|_CTYPE_U)
58 /** Internal implementation detail. Do not use. */
59 #define _CTYPE_N _CTYPE_D

_CTYPE_U = 0x01,_CTYPE_L = 0x02,_CTYPE_N = 0x04,即第0位为1,第一位为1,第二位为1。 我们再来看看_ctype_表

​www.aospxref.com/android-12.…​

39  const char _C_ctype_[1 + CTYPE_NUM_CHARS] = {
40 0,
41 _C, _C, _C, _C, _C, _C, _C, _C,
42 _C, _C|_S, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C,
43 _C, _C, _C, _C, _C, _C, _C, _C,
44 _C, _C, _C, _C, _C, _C, _C, _C,
45 _S|(char)_B, _P, _P, _P, _P, _P, _P, _P,
46 _P, _P, _P, _P, _P, _P, _P, _P,//下一行第一个index为49,开始数字,_N标记
47 _N, _N, _N, _N, _N, _N, _N, _N,
48 _N, _N, _P, _P, _P, _P, _P, _P,//下一行第二个index为66,开始大写写字母,_U标记
49 _P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U,
50 _U, _U, _U, _U, _U, _U, _U, _U,
51 _U, _U, _U, _U, _U, _U, _U, _U,
52 _U, _U, _U, _P, _P, _P, _P, _P,//下一行第二个index为90,开始小写字母,_L标记
53 _P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L,
54 _L, _L, _L, _L, _L, _L, _L, _L,
55 _L, _L, _L, _L, _L, _L, _L, _L,
56 _L, _L, _L, _P, _P, _P, _P, _C,
57
58 0, 0, 0, 0, 0, 0, 0, 0, /* 80 */
59 0, 0, 0, 0, 0, 0, 0, 0, /* 88 */
60 0, 0, 0, 0, 0, 0, 0, 0, /* 90 */
61 0, 0, 0, 0, 0, 0, 0, 0, /* 98 */
62 0, 0, 0, 0, 0, 0, 0, 0, /* A0 */
63 0, 0, 0, 0, 0, 0, 0, 0, /* A8 */
64 0, 0, 0, 0, 0, 0, 0, 0, /* B0 */
65 0, 0, 0, 0, 0, 0, 0, 0, /* B8 */
66 0, 0, 0, 0, 0, 0, 0, 0, /* C0 */
67 0, 0, 0, 0, 0, 0, 0, 0, /* C8 */
68 0, 0, 0, 0, 0, 0, 0, 0, /* D0 */
69 0, 0, 0, 0, 0, 0, 0, 0, /* D8 */
70 0, 0, 0, 0, 0, 0, 0, 0, /* E0 */
71 0, 0, 0, 0, 0, 0, 0, 0, /* E8 */
72 0, 0, 0, 0, 0, 0, 0, 0, /* F0 */
73 0, 0, 0, 0, 0, 0, 0, 0 /* F8 */
74 };
75
76 const char *_ctype_ = _C_ctype_;

对应字母,数字的位置,进行了标记,由于跳过了第0位,所以检测时加一即可。

isalpha---识别字符是否是字母

这里的逻辑很简单,判断输入值是否在A-Z或a-z之间即可

75  /** Returns true if `ch` is in `[A-Za-z]`. */
76 __BIONIC_CTYPE_INLINE int isalpha(int __ch) {
77 return (__ch >= 'A' && __ch <= 'Z') || (__ch >= 'a' && __ch <= 'z');
78 }

isblank---识别字符是否是空白

判断输入值是否是' '或者制表符'\t'即可

80  /** Returns true if `ch` is a space or tab. */
81 __BIONIC_CTYPE_INLINE int isblank(int __ch) {
82 return __ch == ' ' || __ch == '\t';
83 }

iscntrl---识别字符是否是控制字符

根据ASCII码表,0x00-0x1f,0x7f,是控制字符,0x32是空格符' ',所以值小于空格符,或者等于0x7f都是控制字符。

85  /** Returns true if `ch` is a control character (any character before space, plus DEL). */
86 __BIONIC_CTYPE_INLINE int iscntrl(int __ch) {
87 return (__BIONIC_CAST(static_cast, unsigned, __ch) < ' ') || __ch == 0x7f;
88 }
57  #if defined(__cplusplus)
58 #define __BIONIC_CAST(_k,_t,_v) (_k<_t>(_v))
59 #else
60 #define __BIONIC_CAST(_k,_t,_v) ((_t) (_v))
61 #endif

​__BIONIC_CAST的作用是转换为static_cast<unsigned>(__ch),避免int出现越界判断。​

isdigit---识别字符是否是数字

判断输入值是否在数字之间即可

90  /** Returns true if `ch` is in `[0-9]`. */
91 __BIONIC_CTYPE_INLINE int isdigit(int __ch) {
92 return (__ch >= '0' && __ch <= '9');
93 }

isgraph---识别是否是有图形的

从0x21的'!'到最后的0x7e'~'都是有图形的

95  /** Returns true if `ch` is `[A-Za-z0-9]` or punctuation. */
96 __BIONIC_CTYPE_INLINE int isgraph(int __ch) {
97 return (__ch >= '!' && __ch <= '~');
98 }

islower---识别是否是小写字母字符

判断输入值在'a'-'z'之间即可

100  /** Returns true if `ch` is in `[a-z]`. */
101 __BIONIC_CTYPE_INLINE int islower(int __ch) {
102 return (__ch >= 'a' && __ch <= 'z');
103 }

isprint---识别是否是可打印的

从0x20的' '到最后的0x7e'~'都是可打印的

105  /** Returns true if `ch` is `[A-Za-z0-9]` or punctuation or space. */
106 __BIONIC_CTYPE_INLINE int isprint(int __ch) {
107 return (__ch >= ' ' && __ch <= '~');
108 }

ispunct---识别是否是标点符号

这里的判断方式与isalnum相似,通过查表获得

110  /** Returns true if `ch` is punctuation. */
111 __BIONIC_CTYPE_INLINE int ispunct(int __ch) {
112 // `ispunct(c)` is `isgraph(c) && !isalnum(c)`, but there's no obvious way
113 // to simplify that, and the table lookup is just slightly faster...
114 // Note that this is unsafe for inputs less than -1 (EOF) or greater than
115 // 0xff. This is true of other C libraries too.
116 return (_ctype_[__ch + 1] & _CTYPE_P);
117 }

isspace---识别是否是空格

只有以下字符为空格

  • ​' '​​(0x20)space (SPC)
  • ​'\t'​​(0x09)horizontal tab (TAB)
  • ​'\n'​​(0x0a)newline (LF)
  • ​'\v'​​(0x0b)vertical tab (VT)
  • ​'\f'​​(0x0c)feed (FF)
  • ​'\r'​​(0x0d)carriage return (CR)
119  /** Returns true if `ch` is in `[ \f\n\r\t\v]`. */
120 __BIONIC_CTYPE_INLINE int isspace(int __ch) {
121 return __ch == ' ' || (__ch >= '\t' && __ch <= '\r');
122 }

isupper---识别是否是大写字母

判断输入值在'A'-'Z'之间即可

124  /** Returns true if `ch` is in `[A-Z]`. */
125 __BIONIC_CTYPE_INLINE int isupper(int __ch) {
126 return (__ch >= 'A' && __ch <= 'Z');
127 }

isxdigit---识别是否是十六进制数字

判断输入值是否满足[0-9A-Fa-f]即可

129  /** Returns true if `ch` is in `[0-9A-Fa-f]`. */
130 __BIONIC_CTYPE_INLINE int isxdigit(int __ch) {
131 return (__ch >= '0' && __ch <= '9') || (__ch >= 'a' && __ch <= 'f') || (__ch >= 'A' && __ch <= 'F');
132 }

增加的isascii函数---实际通过判断输入值是否小于128来判断是否是ASCII码

166  /** Returns true if `ch` is less than 0x80. */
167 __BIONIC_CTYPE_INLINE int isascii(int __ch) {
168 return __BIONIC_CAST(static_cast, unsigned, __ch) < 0x80;
169 }

字符转换函数

tolower---将大写字符转换为小写

核心是将输入的大写字符与0x20做或运算,实际上就是加上32,A-Z是0x41-0x5A,a-z是0x61-0x7A,验证是可以的。

134  /**
135 * Returns the corresponding lower-case character if `ch` is upper-case, or undefined otherwise.
136 *
137 * Prefer tolower() instead.
138 */
139 __BIONIC_CTYPE_INLINE int _tolower(int __ch) {
140 return __ch | 0x20;
141 }
142
143 /** Returns the corresponding lower-case character if `ch` is upper-case, or `ch` otherwise. */
144 __BIONIC_CTYPE_INLINE int tolower(int __ch) {
145 if (__ch >= 'A' && __ch <= 'Z') return _tolower(__ch);
146 return __ch;
147 }

toupper---将小写字母转换为大写

核心是用小写字母与0x20做异或运算,具体原理可以参看​​位运算的巧妙使用 -- 字母大小写转换​

149  /**
150 * Returns the corresponding upper-case character if `ch` is lower-case, or undefined otherwise.
151 *
152 * Prefer toupper() instead.
153 */
154 __BIONIC_CTYPE_INLINE int _toupper(int __ch) {
155 // Using EOR rather than AND makes no difference on arm, but saves an
156 // instruction on arm64.
157 return __ch ^ 0x20;
158 }
159
160 /** Returns the corresponding upper-case character if `ch` is lower-case, or `ch` otherwise. */
161 __BIONIC_CTYPE_INLINE int toupper(int __ch) {
162 if (__ch >= 'a' && __ch <= 'z') return _toupper(__ch);
163 return __ch;
164 }

增加的toascii函数---通过输入值与0x7f做与运算,截断低7位转换为有效的ASCII码

171  /** Returns `ch & 0x7f`. */
172 __BIONIC_CTYPE_INLINE int toascii(int __ch) {
173 return __ch & 0x7f;
174 }

以上就是ctype.h文件的全部内容