Machine Learning Model for Multi-Class Classification with Feature Engineering and Optimization
import pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import MinMaxScaler,Normalizer,MaxAbsScaler\nfrom sklearn.tree import DecisionTreeClassifier\nfrom sklearn.metrics import f1_score, make_scorer\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn.model_selection import cross_val_score\nimport pickle\n\n#归一化/标准化\ndef data_scale(data):\n scale = MinMaxScaler()\n data = scale.fit_transform(data)\n return data\n\n#降维\ndef dimension_reduce(data):\n corr = data.corr().loc[:, "label"]\n corr_bigger_001 = corr[(corr > 0.02) | (corr < -0.02)]\n return data\n\n#处理空值\ndef handle_na(data):\n data = data.apply(lambda x: x.fillna(x.mean()))\n return data\n\n#预处理\ndef preprocessing(data):\n data = dimension_reduce(data)\n data = handle_na(data)\n return data\n\n#选择模型\ndef select_model():\n model = DecisionTreeClassifier(max_depth=5)\n return model\n\n#评价方法\ndef get_score():\n score = cross_val_score(model,X,Y,cv=10)\n return score\n\nif name == 'main':\n data = pd.read_csv("preprocess_train.csv")\n data = preprocessing(data)\n X = data.iloc[:, 0:-1]\n Y = data.iloc[:, -1]\n X = data_scale(X)\n X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)\n model = select_model()\n model.fit(X_train, y_train)\n score = get_score()\n \n # Save the model as a pickle file\n with open('model.pkl', 'wb') as file:\n pickle.dump(model, file)\n\n # Load the model from the pickle file\n with open('model.pkl', 'rb') as file:\n loaded_model = pickle.load(file)\n\n # Use the loaded model to make predictions\n y_pred = loaded_model.predict(X_test)\n f1 = f1_score(y_test, y_pred, average='macro')\n print("F1 Score:", f1)
原文地址: http://www.cveoy.top/t/topic/pFsw 著作权归作者所有。请勿转载和采集!