统计字符串中单词个数

转载

mb5ff5909699060 2013-05-08 15:31:00

文章标签 i++ #include hash算法缓存 html 文章分类 代码人生

统计字符串中单词个数_#include

要求：输入一个字符串，统计每个单词的个数。单词间用空格隔开，可多个空格，写出自己认为高效的算法。

例如：输入：I love love China

输出为：

I： 1

love： 2

China： 1

统计字符串中单词个数_#include

首先想到的还是模拟的方法，就是用struct把出现过的单词缓存起来，然后再输入文本中遍历到新单词的时候，遍历一次struct，看这个单词是不是已经存，做相关处理。

如果输入文本中有n个字母，不重复的字母为m个，则算法复杂度为O(nm^2) 最好情况是m =1 ，最差情况是m=n 其实现代码如下：

1 #include <stdio.h>
 2  #include <string.h>
 3   struct struct_words{
 4          char word[20];
 5          int count;
 6  };
 7   int main(){
 8          char string[100];
 9          char c;
10          struct struct_words words[20];
11          int i = 0, k = 0 , ws =0;
12  
13          for(; i < 20; i++){
14                  words[i].word[0] = '\0';
15                  words[i].count = 0;
16          }
17          puts("please input words.");
18          gets(string);
19          puts("=============开始取词================");
20  
21          i = 0;
22          do{
23                  c = string[i];
24                  if(c != ' ' && c !='\0'){
25                          words[k].word[ws] = c;
26                          words[k].count = 1;
27                          ws ++;
28                  }else{
29                          words[k].word[ws] = '\0';
30                          ws = 0;
31                          k ++;
32                  }
33                  i ++;
34          }while(c!='\0');lda
35  
36  
37          puts("=========== 合并相同的单词 ==============");
38          for(i = 0; words[i].word[0] != '\0' ; i++){
39                  puts(words[i].word);
40                  if( words[i].count >= 1)
41                  for(k = i; words[k].word[0] != '\0'; k++){
42                          if(strcmp(words[i].word, words[k].word) == 0
43                             && words[k].count == 1){
44                                  words[k].count --;
45                                  words[i].count ++;
46                          }
47                  }
48          }
49  
50          puts("=============== End ==============");
51          for(i = 0;words[i].word[0] != '\0' ;i++){
52                  if(words[i].count != 0 )
53                          printf("%s:\t\t%d\n",words[i].word, words[i].count);
54          }
55          return(0);
56  }

然后呢，做一下优化，恩路是遍历用户的输入文本是必须的，但是，单词的缓存和出现次数的统计是可以使用hash算法来优化的，借用hash算法的特性，使复杂度立刻就降低到了 O(n),实现代码如下：

1 #include <stdio.h>
 2 #include <string.h>
 3 #define N 100
 4 
 5 struct struct_words{
 6     char word[100];
 7     int count;
 8 };
 9 
10 int hash(char* key)
11 {
12      unsigned long h=0;
13       while(*key)
14            {   
15                  h=(h<<4)+*key++;
16                    unsigned long g=h & 0xF0000000L;
17                      if(g)
18                             h^=g>>24;
19                        h&=~g;
20                         }   
21        return h&N;
22 }
23 int main(){
24     char string[1000];
25     char current_word[100];
26     char c;
27     struct struct_words words[200]; 
28     int i = 0, k = 0 , ws =0 , key;
29     int keys[100];
30 
31     for(; i < 200; i++){
32         words[i].word[0] = '\0';
33         words[i].count = 0;
34     }   
35     puts("=============输入一些单词，用空格隔开================");
36     gets(string);
37 
38     i = 0;
39     do{ 
40         c = string[i];
41         //如果第一个单词前有空格，跳过去
42         if( ws == 0  && c == ' ') {i++ ; continue;}
43         if(c != ' ' && c !='\0'){
44             current_word[ws] = c;
45             ws ++; 
46         }else{
47             current_word[ws] = '\0';
48             key = hash(current_word);
49             if(words[key].count == 0){ 
50                 strcpy(words[key].word, current_word);
51                 keys[k] = key;
52                 k++;
53             }   
54             words[key].count ++; 
55             ws = 0;
56         }
57     i ++;
58     }while(c != '\0');
59 
60     printf("%d" ,k);
61     puts("===============打印結果 ==============");
62     for(i = 0 ; i < k ;i++){
63             printf("%s:\t\t%d\n",words[keys[i]].word, words[keys[i]].count);
64     }
65     puts("=============== End ==============");
66     return 0;
67 }