一、case 小于等于3项的情况
先看第一个程序段:
switch (nscore)
{
case 1:
ntmpNum = 1;
break;
case 3:
ntmpNum = 3;
break;
case 4:
ntmpNum = 4;
break;
default:
ntmpNum = 10;
}
printf("%d", ntmpNum); // 要调用一下ntmpNum,否则上面的switch会被优化掉
OD
载入,看一下:
00401013 >|. E8 0F010000 call 00401127 ; scanf
00401018 |. 8B4424 08 mov eax, dword ptr [esp+8]
0040101C |? 83C4 08 add esp, 8 ; 上面scanf是C类调用
0040101F |? 48 dec eax ; 通过EAX的减法来判断属于哪个分支
00401020 |? 74 1D je short 0040103F
00401022 |. 83E8 02 sub eax, 2
00401025 \. 74 11 je short 00401038
00401027 48 dec eax
00401028 74 07 je short 00401031
0040102A B8 0A000000 mov eax, 0A
0040102F EB 13 jmp short 00401044 ; break
00401031 |. B8 04000000 mov eax, 4
00401036 |. EB 0C jmp short 00401044
00401038 |? B8 03000000 mov eax, 3
0040103D |? EB 05 jmp short 00401044
0040103F |? B8 01000000 mov eax, 1
00401044 |. 50 push eax
00401045 |? 68 38904000 push 00409038 ; ASCII "%d"
这种情况跟
if
差不多,不多分析。
二、case项多于3项且有规律的情况
看第二段代码:
scanf("%d", &nscore);
switch (nscore)
{
case 3:
ntmpNum = 1;
break;
case 1:
ntmpNum = 3;
break;
case 5:
ntmpNum = 4;
break;
case 9:
ntmpNum = 4;
break;
case 7:
ntmpNum = 4;
break;
case 11:
ntmpNum = 4;
break;
default:
ntmpNum = 10;
}
printf("%d", ntmpNum); // 要调用一下ntmpNum,否则上面的switch会被优化掉
这段代码,我们将有规律的case打乱顺序,然后看编译器是怎么处理的。
OD中查看反汇编形式:
0040100E |. 68 38904000 push 00409038 ; ASCII "%d"
00401013 |. E8 3F010000 call 00401157 ; scanf
00401018 |. 8B4C24 08 mov ecx, dword ptr [esp+8]; 得到输入的内容
0040101C |. 83C4 08 add esp, 8
0040101F |. 8D41 FF lea eax, dword ptr [ecx-1]; 输入的内容-1; Switch (cases 1..B)
00401022 |. 83F8 0A cmp eax, 0A
00401025 |. 77 1C ja short 00401043
00401027 |. FF2485 641040>jmp dword ptr [eax*4+401064]; 查表,跳转到对应的CASE中
0040102E |> B8 01000000 mov eax, 1 ; Case 3 of switch 0040101F
00401033 |. EB 13 jmp short 00401048
00401035 |> B8 03000000 mov eax, 3 ; Case 1 of switch 0040101F
0040103A |. EB 0C jmp short 00401048
0040103C |> B8 04000000 mov eax, 4 ; Cases 5,7,9,B of switch 0040101F
00401041 |. EB 05 jmp short 00401048
00401043 |> B8 0A000000 mov eax, 0A ; Default case of switch 0040101F
00401048 |> 50 push eax
00401049 |. 68 38904000 push 00409038 ; ASCII "%d"
0040104E |. E8 D3000000 call 00401126 ; printf
跟随下这个表,我们发现,这个表就在调用它的函数后,如下:
跳转表:
00401064 00401035 switch.00401035
00401068 00401043 switch.00401043 插入的是default分支的首地址
0040106C 0040102E switch.0040102E
00401070 00401043 switch.00401043 插入的是default分支的首地址
00401074 0040103C switch.0040103C
00401078 00401043 switch.00401043 插入的是default分支的首地址
0040107C 0040103C switch.0040103C
00401080 00401043 switch.00401043 插入的是default分支的首地址
00401084 0040103C switch.0040103C
00401088 00401043 switch.00401043 插入的是default分支的首地址
0040108C 0040103C switch.0040103C
认真对比一下这个表,发现,它先是对case后的常量排序,然后再将对应的处理代码的首地址写成一个表,通过jmp dword ptr [eax*4+401064] 查表直接进入到对应的case中。对于缺省的case(我们是间隔2递增的case)在表中填充的是default分支的首地址。
三、多于三项部分有规律的情况
上个我们发现,它会给缺省的case表项中填补default分支的首地址,那我们将这个间隔调大,观察一下编译器会怎么处理,代码段如下:
scanf("%d", &nscore);
switch (nscore)
{
case 1:
ntmpNum = 1;
break;
case 2:
ntmpNum = 2;
break;
case 3:
ntmpNum = 3;
break;
//这里丢失20多个case
case 26:
ntmpNum = 26;
break;
case 27:
ntmpNum = 27;
break;
case 28:
ntmpNum = 28;
break;
default:
ntmpNum = 10;
}
printf("%d", ntmpNum); // 要调用一下ntmpNum,否则上面的switch会被优化掉
反汇编观察一下:
0040100E 68 38904000 push 00409038 ; ASCII "%d"
00401013 E8 6F010000 call 00401187 ; scanf
00401018 8B4C24 08 mov ecx, dword ptr [esp+8] ; 得到输入的内容
0040101C 83C4 08 add esp, 8
0040101F 8D41 FF lea eax, dword ptr [ecx-1] ; 输入的内容-1
00401022 83F8 1B cmp eax, 1B
00401025 77 39 ja short 00401060
00401027 33D2 xor edx, edx
00401029 8A90 9C104000 mov dl, byte ptr [eax+40109C] ; 检索case的下标索引值表
;它参与运算从地址表中找到对应的case地址
0040102F FF2495 80104000 jmp dword ptr [edx*4+401080]
; 通过值表填充的CASE索引值,查地址表
00401036 B8 01000000 mov eax, 1
0040103B EB 28 jmp short 00401065 ; break
0040103D B8 02000000 mov eax, 2
00401042 EB 21 jmp short 00401065
00401044 B8 03000000 mov eax, 3
00401049 EB 1A jmp short 00401065
0040104B B8 1A000000 mov eax, 1A
00401050 EB 13 jmp short 00401065
00401052 B8 1B000000 mov eax, 1B
00401057 EB 0C jmp short 00401065
00401059 B8 1C000000 mov eax, 1C
0040105E EB 05 jmp short 00401065
00401060 B8 0A000000 mov eax, 0A
00401065 50 push eax
00401066 68 38904000 push 00409038 ; ASCII "%d"
下标索引表:
0040109C 00 DB 00 case 1的索引值
0040109D 01 DB 01 case 2的索引值
0040109E 02 DB 02 case 3的索引值
0040109F 06 DB 06 下面全部填充default的索引值
004010A0 06 DB 06
...
004010B4 06 DB 06
004010B5 03 DB 03 case 4的索引值
004010B6 04 DB 04 case 5的索引值
004010B7 05 DB 05 case 6的索引值
跳转地址表:
00401080 00401036 switch.00401036
00401084 0040103D switch.0040103D
00401088 00401044 switch.00401044
0040108C 0040104B switch.0040104B
00401090 00401052 switch.00401052
00401094 00401059 switch.00401059
00401098 00401060 switch.00401060
这样查两个表,缺省的
case
项在索引表中插入
default
的索引值,这样每个
case
项就节省了
3
个字节的空间。
mov dl, byte ptr [eax+40109C] // 40109C是索引表首地址
jmp dword ptr [edx*4+401080] // 401080是跳转地址表的首地址。
这样,就可以定位到对应的case项了。
我们继续增大这个case之间的差距,让它超过255,代码段如下:
00401018 8B4424 08 mov eax, dword ptr [esp+8] ; 得到输入的内容
0040101C 83C4 08 add esp, 8
0040101F 3D 46010000 cmp eax, 146 ; 判断是不是大case中最小的
00401024 7F 27 jg short 0040104D ; 如果大于,就进入大case中比较
00401026 74 1E je short 00401046 ; 如果相等就直接进入0x146的case代码段
00401028 48 dec eax ; 否则就到小的case段中比较。
00401029 74 14 je short 0040103F
0040102B 48 dec eax
0040102C 74 0A je short 00401038
0040102E 48 dec eax
0040102F 75 26 jnz short 00401057 ; default了。
00401031 B8 03000000 mov eax, 3
00401036 EB 32 jmp short 0040106A
00401038 B8 02000000 mov eax, 2
0040103D EB 2B jmp short 0040106A
0040103F B8 01000000 mov eax, 1
00401044 EB 24 jmp short 0040106A
00401046 B8 1A000000 mov eax, 1A
0040104B EB 1D jmp short 0040106A
0040104D 2D 47010000 sub eax, 147
; 减去一个case项值,得到一个差值,这样就可以判断大case了。
00401052 74 11 je short 00401065
00401054 48 dec eax
00401055 74 07 je short 0040105E
00401057 B8 0A000000 mov eax, 0A
0040105C EB 0C jmp short 0040106A
0040105E B8 1C000000 mov eax, 1C
00401063 EB 05 jmp short 0040106A
00401065 B8 1B000000 mov eax, 1B
0040106A 50 push eax
0040106B 68 38904000 push 00409038 ; ASCII "%d"
00401070 E8 B1000000 call 00401126
看到了么?这里就分成了两段,每段当做if来处理的,我想应该是我们每段的case数量太少,我们让上面的case 数量大于3个试试,看看会不会是只要大于三项的有规律case就查表,少于等于3项的就当成if来处理。
代码段如下:
scanf("%d", &nscore);
switch (nscore)
{
case 1:
ntmpNum = 1;
break;
case 2:
ntmpNum = 2;
break;
case 3:
ntmpNum = 3;
break;
case 4:
ntmpNum = 4;
break;
case 5:
ntmpNum = 5;
break;
//这里丢失几个case
case 326:
ntmpNum = 326;
break;
case 327:
ntmpNum = 327;
break;
case 328:
ntmpNum = 328;
break;
default:
ntmpNum = 10;
}
printf("%d", ntmpNum); // 要调用一下ntmpNum,否则上面的switch会被优化掉
反汇编看下效果:
0040100E 68 38904000 push 00409038 ; ASCII "%d"
00401013 E8 5F010000 call 00401177 ; scanf
00401018 8B4424 08 mov eax, dword ptr [esp+8] ; 得到输入的内容
0040101C 83C4 08 add esp, 8
0040101F 3D 46010000 cmp eax, 146
00401024 7F 39 jg short 0040105F
00401026 74 30 je short 00401058
00401028 48 dec eax
00401029 83F8 04 cmp eax, 4
0040102C 77 3B ja short 00401069
0040102E FF2485 98104000 jmp dword ptr [eax*4+401098]
00401035 B8 01000000 mov eax, 1
0040103A EB 40 jmp short 0040107C
0040103C B8 02000000 mov eax, 2
00401041 EB 39 jmp short 0040107C
00401043 B8 03000000 mov eax, 3
00401048 EB 32 jmp short 0040107C
0040104A B8 04000000 mov eax, 4
0040104F EB 2B jmp short 0040107C
00401051 B8 05000000 mov eax, 5
00401056 EB 24 jmp short 0040107C
00401058 B8 46010000 mov eax, 146
0040105D EB 1D jmp short 0040107C
0040105F 2D 47010000 sub eax, 147
00401064 74 11 je short 00401077
00401066 48 dec eax
00401067 74 07 je short 00401070
00401069 B8 0A000000 mov eax, 0A
0040106E EB 0C jmp short 0040107C
00401070 B8 48010000 mov eax, 148
00401075 EB 05 jmp short 0040107C
00401077 B8 47010000 mov eax, 147
0040107C 50 push eax
0040107D 68 38904000 push 00409038 ; ASCII "%d"
00401082 E8 BF000000 call 00401146
跳转表如下:
00401098 00401035 switch.00401035
0040109C 0040103C switch.0040103C
004010A0 00401043 switch.00401043
004010A4 0040104A switch.0040104A
004010A8 00401051 switch.00401051
哈哈,不多说了,我们看下无规律的情况。
四、对于毫无规律的情况。
通过上个例子的分析,我们大概可以猜出来,编译器会择优选择查表,查双表来对部分离得比较近的case项作处理,最后才考虑毫无规律的情况,为了提高我们这次测试的成功率,我们让每个相邻的case项差值都超过255,为了避免switch当做if来处理,我们多写几个case,具体代码段如下:
scanf("%d", &nscore);
switch (nscore)
{
case 1:
ntmpNum = 1;
break;
case 300:
ntmpNum = 300;
break;
case 570:
ntmpNum = 570;
break;
case 830:
ntmpNum = 830;
break;
case 1094:
ntmpNum = 1094;
break;
case 1314:
ntmpNum = 32;
break;
case 1614:
ntmpNum = 1614;
break;
case 1894:
ntmpNum = 1894;
break;
case 2199:
ntmpNum = 2199;
break;
case 2578:
ntmpNum = 2578;
break;
case 2800:
ntmpNum = 2800;
break;
case 3178:
ntmpNum = 3178;
break;
case 3568:
ntmpNum = 3568;
break;
case 3856:
ntmpNum = 3856;
break;
case 4212:
ntmpNum = 4212;
break;
case 4679:
ntmpNum = 4679;
break;
case 5050:
ntmpNum = 5050;
break;
case 5486:
ntmpNum = 5486;
break;
case 5797:
ntmpNum = 5797;
break;
case 6089:
ntmpNum = 6089;
break;
case 6713:
ntmpNum = 6713;
break;
case 8425:
ntmpNum = 8425;
break;
case 8973:
ntmpNum = 8973;
break;
case 9545:
ntmpNum = 9545;
break;
case 9987:
ntmpNum = 9987;
break;
case 11254:
ntmpNum = 11254;
break;
case 12489:
ntmpNum = 12489;
break;
case 15798:
ntmpNum = 15798;
break;
case 26874:
ntmpNum = 26874;
break;
case 34721:
ntmpNum = 34721;
break;
case 39681:
ntmpNum = 39681;
break;
default:
ntmpNum = 10;
}
printf("%d", ntmpNum); // 要调用一下ntmpNum,否则上面的switch会被优化掉
反汇编结果:
0040100E |. 68 38A04000 push 0040A038 ASCII "%d"
00401013 |. E8 EF020000 call 00401307
00401018 |. 8B4424 08 mov eax, dword ptr [esp+8] ; 得到输入的内容
0040101C |. 83C4 08 add esp, 8
0040101F |. 3D 47120000 cmp eax, 1247 ; 0x1247是case后常量中间的一个元素
; Switch (cases 1..9B01)
00401024 |. 0F8F 16010000 jg 00401140 ; 如果大于就比较0x2549
0040102A |. 0F84 06010000 je 00401136 ; 等于就跳到对应的case中
00401030 |. 3D 66070000 cmp eax, 766 ; 如果小于就再跟766比较
00401035 |. 0F8F 84000000 jg 004010BF ; 同上,遍历二叉树的方法……
0040103B |. 74 78 je short 004010B5
0040103D |. 3D 3E030000 cmp eax, 33E
00401042 |. 7F 3F jg short 00401083
00401044 |. 74 33 je short 00401079
00401046 |. 48 dec eax
; 用输入的数据依次减去某个CASE后的常量值,来判断是否为0……
00401047 |. 74 26 je short 0040106F
; 所以,看到这寄存器连续做减法运算应该就是这种switch的特征了吧……
00401049 |. 2D 2B010000 sub eax, 12B
0040104E |. 74 15 je short 00401065
00401050 |. 2D 0E010000 sub eax, 10E
00401055 |. 0F85 A9010000 jnz 00401204
0040105B |. B8 3A020000 mov eax, 23A ; Case 23A of switch 0040101F
00401060 |. E9 B9010000 jmp 0040121E
00401065 |> B8 2C010000 mov eax, 12C ; Case 12C of switch 0040101F
0040106A |. E9 AF010000 jmp 0040121E
0040106F |> B8 01000000 mov eax, 1 ; Case 1 of switch 0040101F
00401074 |. E9 A5010000 jmp 0040121E
00401079 |> B8 3E030000 mov eax, 33E ; Case 33E of switch 0040101F
0040107E |. E9 9B010000 jmp 0040121E
00401083 |> 3D 46040000 cmp eax, 446
00401088 |. 74 21 je short 004010AB
0040108A |. 3D 22050000 cmp eax, 522
0040108F |. 74 10 je short 004010A1
00401091 |. 3D 4E060000 cmp eax, 64E
00401096 |. 0F85 68010000 jnz 00401204
0040109C |. E9 7D010000 jmp 0040121E ; Case 64E of switch 0040101F
004010A1 |> B8 20000000 mov eax, 20 ; Case 522 of switch 0040101F
004010A6 |. E9 73010000 jmp 0040121E
004010AB |> B8 46040000 mov eax, 446 ; Case 446 of switch 0040101F
004010B0 |. E9 69010000 jmp 0040121E
004010B5 |> B8 66070000 mov eax, 766 ; Case 766 of switch 0040101F
004010BA |. E9 5F010000 jmp 0040121E
004010BF |> 3D 6A0C0000 cmp eax, 0C6A
004010C4 |. 7F 3E jg short 00401104
004010C6 |. 74 32 je short 004010FA
004010C8 |. 3D 97080000 cmp eax, 897
004010CD |. 74 21 je short 004010F0
004010CF |. 3D 120A0000 cmp eax, 0A12
004010D4 |. 74 10 je short 004010E6
004010D6 |. 3D F00A0000 cmp eax, 0AF0
004010DB |. 0F85 23010000 jnz 00401204
004010E1 |. E9 38010000 jmp 0040121E ; Case AF0 of switch 0040101F
004010E6 |> B8 120A0000 mov eax, 0A12 ; Case A12 of switch 0040101F
004010EB |. E9 2E010000 jmp 0040121E
004010F0 |> B8 97080000 mov eax, 897 ; Case 897 of switch 0040101F
004010F5 |. E9 24010000 jmp 0040121E
004010FA |> B8 6A0C0000 mov eax, 0C6A ; Case C6A of switch 0040101F
004010FF |. E9 1A010000 jmp 0040121E
00401104 |> 3D F00D0000 cmp eax, 0DF0
00401109 |. 74 21 je short 0040112C
0040110B |. 3D 100F0000 cmp eax, 0F10
00401110 |. 74 10 je short 00401122
00401112 |. 3D 74100000 cmp eax, 1074
00401117 |. 0F85 E7000000 jnz 00401204
0040111D |. E9 FC000000 jmp 0040121E ; Case 1074 of switch 0040101F
00401122 |> B8 100F0000 mov eax, 0F10 ; Case F10 of switch 0040101F
00401127 |. E9 F2000000 jmp 0040121E
0040112C |> B8 F00D0000 mov eax, 0DF0 ; Case DF0 of switch 0040101F
00401131 |. E9 E8000000 jmp 0040121E
00401136 |> B8 47120000 mov eax, 1247 ; Case 1247 of switch 0040101F
0040113B |. E9 DE000000 jmp 0040121E
00401140 |> 3D 49250000 cmp eax, 2549
00401145 |. 7F 73 jg short 004011BA
00401147 |. 74 6A je short 004011B3
00401149 |. 3D C9170000 cmp eax, 17C9
0040114E |. 7F 3E jg short 0040118E
00401150 |. 74 32 je short 00401184
00401152 |. 3D BA130000 cmp eax, 13BA
00401157 |. 74 21 je short 0040117A
00401159 |. 3D 6E150000 cmp eax, 156E
0040115E |. 74 10 je short 00401170
00401160 |. 3D A5160000 cmp eax, 16A5
00401165 |. 0F85 99000000 jnz 00401204
0040116B |. E9 AE000000 jmp 0040121E ; Case 16A5 of switch 0040101F
00401170 |> B8 6E150000 mov eax, 156E ; Case 156E of switch 0040101F
00401175 |. E9 A4000000 jmp 0040121E
0040117A |> B8 BA130000 mov eax, 13BA ; Case 13BA of switch 0040101F
0040117F |. E9 9A000000 jmp 0040121E
00401184 |> B8 C9170000 mov eax, 17C9 ; Case 17C9 of switch 0040101F
00401189 |. E9 90000000 jmp 0040121E
0040118E |> 3D 391A0000 cmp eax, 1A39
00401193 |. 74 17 je short 004011AC
00401195 |. 3D E9200000 cmp eax, 20E9
0040119A |. 74 09 je short 004011A5
0040119C |. 3D 0D230000 cmp eax, 230D
004011A1 |. 75 61 jnz short 00401204
004011A3 |. EB 79 jmp short 0040121E ; Case 230D of switch 0040101F
004011A5 |> B8 E9200000 mov eax, 20E9 ; Case 20E9 of switch 0040101F
004011AA |. EB 72 jmp short 0040121E
004011AC |> B8 391A0000 mov eax, 1A39 ; Case 1A39 of switch 0040101F
004011B1 |. EB 6B jmp short 0040121E
004011B3 |> B8 49250000 mov eax, 2549 ; Case 2549 of switch 0040101F
004011B8 |. EB 64 jmp short 0040121E
004011BA |> 3D B63D0000 cmp eax, 3DB6
004011BF |. 7F 2E jg short 004011EF
004011C1 |. 74 25 je short 004011E8
004011C3 |. 3D 03270000 cmp eax, 2703
004011C8 |. 74 17 je short 004011E1
004011CA |. 3D F62B0000 cmp eax, 2BF6
004011CF |. 74 09 je short 004011DA
004011D1 |. 3D C9300000 cmp eax, 30C9
004011D6 |. 75 2C jnz short 00401204
004011D8 |. EB 44 jmp short 0040121E ; Case 30C9 of switch 0040101F
004011DA |> B8 F62B0000 mov eax, 2BF6 ; Case 2BF6 of switch 0040101F
004011DF |. EB 3D jmp short 0040121E
004011E1 |> B8 03270000 mov eax, 2703 ; Case 2703 of switch 0040101F
004011E6 |. EB 36 jmp short 0040121E
004011E8 |> B8 B63D0000 mov eax, 3DB6 ; Case 3DB6 of switch 0040101F
004011ED |. EB 2F jmp short 0040121E
004011EF |> 3D FA680000 cmp eax, 68FA
004011F4 |. 74 23 je short 00401219
004011F6 |. 3D A1870000 cmp eax, 87A1
004011FB |. 74 15 je short 00401212
004011FD |. 3D 019B0000 cmp eax, 9B01
00401202 |. 74 07 je short 0040120B
00401204 |> B8 0A000000 mov eax, 0A ; Default case of switch 0040101F
00401209 |. EB 13 jmp short 0040121E
0040120B |> B8 019B0000 mov eax, 9B01 ; Case 9B01 of switch 0040101F
00401210 |. EB 0C jmp short 0040121E
00401212 |> B8 A1870000 mov eax, 87A1 ; Case 87A1 of switch 0040101F
00401217 |. EB 05 jmp short 0040121E
00401219 |> B8 FA680000 mov eax, 68FA ; Case 68FA of switch 0040101F
0040121E |> 50 push eax
0040121F |. 68 38A04000 push 0040A038 ; ASCII "%d"
00401224 |. E8 AD000000 call 004012D6
这个看起来很麻烦哦,仔细观察下,像是在遍历二叉树,又像是二分法排序,嘿嘿,可以看得出来这个是以最高效的方式找到相应的
case
项。
五、小结
通过上面几个小例子的分析,我对编译器处理switch结构的流程大概有了了解,对于一堆无规律的case,编译器会:
1、 先对case后的常量排序
2、 对小于3个相连的case常量仿照if结构(最迅速)
3、 对于多于三个的case相连或差距很小的case常量,它会创建跳转表,不存在的常量地址填补default段的首地址。(效率很高,不用每个项目都比较)
4、 对于间距小于255但也很大的case常量会再额外创建一个索引表,不存在的索引填补default索引。
(比方案3节省地址空间,效率不如3。)
5、 最后处理毫无规律且间距大于255的case项(最不效率)