C语言实现英文文本词频统计

#C语言实现英文文本词频统计| 来源: 网络整理| 查看: 265

#include #include #include

#define MAX_WORDS 100 // 单词的最大数量#define MAX_WORD_LENGTH 50 // 单词的最大长度

struct Word { // 单词结构体 char text[MAX_WORD_LENGTH]; // 单词的内容 int count; // 单词的出现次数};

// 读取单词的函数int read_word(char *word, int max_word_length, FILE *file) { int c, len = 0; // c 为当前读入的字符，len 为已读入字符的数量

// 跳过非字母字符 while ((c = getc(file)) != EOF && !isalpha(c)) ; if (c == EOF) { return EOF; }

// 读入字母字符 word[len++] = (char)c; while ((c = getc(file)) != EOF && isalpha(c) && len < max_word_length - 1) { word[len++] = (char)c; } word[len] = '\0';

return len;}

int main(void) { char word[MAX_WORD_LENGTH]; // 当前读入的单词 struct Word words[MAX_WORDS]; // 存储单词的数组 int num_words = 0; // 单词数量 int i, j; // 循环变量 int word_length; // 单词长度 FILE *file = fopen("text.txt", "r"); // 打开文件

// 判断文件是否成功打开 if (file == NULL) { printf("Failed to open text.txt\n"); return 1; }

// 循环读取单词 while ((word_length = read_word(word, MAX_WORD_LENGTH, file)) != EOF) { // 查找单词是否已经存在 for (i = 0; i < num_words; i++) { if (strcmp(word, words[i].text) == 0) { break;

} } if (i < num_words) { words[i].count++; } else { strcpy(words[num_words].text, word); words[num_words].count = 1; num_words++; } }

fclose(file);

for (i = 0; i < num_words; i++) { for (j = i + 1; j < num_words; j++) { if (words[i].count < words[j].count) { struct Word temp = words[i]; words[i] = words[j]; words[j] = temp; } } }

for (i = 0; i < num_words; i++) { printf("%s: %d\n", words[i].text, words[i].count); }

return 0;}

【本文地址】

公司简介

联系我们