采用二叉搜索树来统计文本中单词出现的频率

转载

我不是萧海哇 2022-06-14 06:01:10

把几个主要的函数组合起来即可：

1.从文本读取单个单词（去掉空格，特殊符号等）

2.用读出来的单词去更新搜索二叉树的节点（涉及二叉树的构建问题，递归）

3.中序遍历，来递归打印二叉树的每个节点

代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAXWORD 1000

//单词出现频率的统计哦
struct tnode
{
    char* word;
    int count;
    struct tnode* left;
    struct tnode* right;
};

struct tnode* addtree(struct tnode*,char*);
void treeprint(struct tnode*);
int getword(char*,int);
FILE* fp;

int main(){
    fp=fopen("//Users//wuzengxiong//Desktop//c//c1.c","r");
    struct tnode* root;
    char word[MAXWORD];
    root=NULL;
    while(getword(word,MAXWORD)!=EOF)
        if(isalpha(word[0]))
            root=addtree(root,word);
    treeprint(root);
    fclose(fp);
    return 0;
}

struct tnode* talloc(void);
char* my_strdup(char*);

struct tnode* addtree(struct tnode* p,char* w){
    int cond;

    if(p==NULL){
        p=talloc();
        p->count=1;
        p->word=my_strdup(w);
        p->left=p->right=NULL;
    }else if((cond=strcmp(p->word,w))==0){
        p->count++;
    }else if(cond>0){
        p->left=addtree(p->left,w);
    }else{
        p->right=addtree(p->right,w);
    }
    return p;
}
void treeprint(struct tnode* p){
    if(p!=NULL){
        treeprint(p->left);
        printf("%4d %s\n",p->count,p->word);
        treeprint(p->right);
    }
    return;
}


struct tnode* talloc(void){
    return (struct tnode*)malloc(sizeof(struct tnode));
}

//把s复制到堆内存中
char* my_strdup(char* s){
    char* p;
    p=(char*)malloc(strlen(s)+1);
    if(p!=NULL){
        strcpy(p,s);
    }
    return p;
}


#define BUFSIZE 100
char buf[BUFSIZE];//自己来管理一个缓存实现字符的回退。
int bufp=0;
int getch(){
    return (bufp>0)?buf[--bufp]:getc(fp);
}
void ungetch(int c){
    if(bufp==BUFSIZE)
        printf("ungech: too many characters\n");
    else
        buf[bufp++]=c;
}


int getword(char* word,int lim){
    int c; //作为返回值，来判断是否为EOF
    char* w=word;

    while(isspace(c=getch()))
        ;

    if(c!=EOF) *w++=c;


    if(!isalpha(c)){
        *w='\0';//若为符号，直接返回单个符号+‘\0’
        return c;
    }
    for (;--lim>0;++w)
    {
        if(!isalnum(*w=getch())){  //若这个读入的时非数字或者字母，则退出，说明已读完一个单词，但是多读了一个字符，需要压回缓存
            ungetch(*w);
            break;
        }
    }
    *w='\0';
    return word[0];
}