#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAX_WORD_NUM 10000
#define MAX_WORD_LEN 50
#define MAX_INDEX_NUM 1000

// 将字符串转换为小写字母形式
void tolowerstr(char *str) {
    for (int i = 0; str[i]; i++) {
        str[i] = tolower(str[i]);
    }
}

// 二分查找单词在单词索引表中的位置
int binary_search(char **words, int size, char *word) {
    int left = 0, right = size - 1;
    while (left <= right) {
        int mid = (left + right) / 2;
        int cmp = strcmp(words[mid], word);
        if (cmp == 0) {
            return mid;
        } else if (cmp < 0) {
            left = mid + 1;
        } else {
            right = mid - 1;
        }
    }
    return -1;
}

int main() {
    char index_file[] = 'index.txt';
    char in_file[] = 'in.txt';
    char error_file[] = 'error.txt';

    // 读取单词索引表
    char *index_words[MAX_INDEX_NUM];
    int index_size = 0;
    FILE *fp = fopen(index_file, 'r');
    if (fp == NULL) {
        printf('Failed to open file %s\n', index_file);
        return 1;
    }
    char word[MAX_WORD_LEN];
    while (fscanf(fp, '%s', word) != EOF) {
        tolowerstr(word); // 转换为小写字母形式
        index_words[index_size++] = strdup(word); // 保存到数组中
    }
    fclose(fp);

    // 读取英文文章
    char *in_words[MAX_WORD_NUM];
    int in_size = 0;
    fp = fopen(in_file, 'r');
    if (fp == NULL) {
        printf('Failed to open file %s\n', in_file);
        return 1;
    }
    while (fscanf(fp, '%s', word) != EOF) {
        tolowerstr(word); // 转换为小写字母形式
        // 只保留连续字母组成的单词
        int len = strlen(word);
        int flag = 1;
        for (int i = 0; i < len; i++) {
            if (!isalpha(word[i])) {
                flag = 0;
                break;
            }
        }
        if (flag) {
            in_words[in_size++] = strdup(word); // 保存到数组中
        }
    }
    fclose(fp);

    // 检查错误单词
    char *error_words[MAX_WORD_NUM];
    int error_size = 0;
    for (int i = 0; i < in_size; i++) {
        if (binary_search(index_words, index_size, in_words[i]) == -1) {
            error_words[error_size++] = in_words[i]; // 保存错误单词到数组中
        }
    }

    // 对错误单词数组进行排序
    for (int i = 0; i < error_size - 1; i++) {
        for (int j = i + 1; j < error_size; j++) {
            if (strcmp(error_words[i], error_words[j]) > 0) {
                char *temp = error_words[i];
                error_words[i] = error_words[j];
                error_words[j] = temp;
            }
        }
    }

    // 输出错误单词到文件
    fp = fopen(error_file, 'w');
    if (fp == NULL) {
        printf('Failed to create file %s\n', error_file);
        return 1;
    }
    for (int i = 0; i < error_size; i++) {
        fprintf(fp, '%s\n', error_words[i]);
    }
    fclose(fp);

    // 释放内存
    for (int i = 0; i < index_size; i++) {
        free(index_words[i]);
    }
    for (int i = 0; i < in_size; i++) {
        free(in_words[i]);
    }
    return 0;
}
英文文章单词拼写检查器:利用索引表高效识别错误单词

原文地址: https://www.cveoy.top/t/topic/glWN 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录