Load data

data = pd.read_csv('data.csv')

Preprocess data

data['Title'] = data['Title'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x)) data['Description'] = data['Description'].apply(lambda x: re.sub('[^a-zA-Z0-9\s]', '', x)) data['Title'] = data['Title'].apply(lambda x: x.lower()) data['Description'] = data['Description'].apply(lambda x: x.lower())

Split data into train and test sets

train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

Preprocess input features

title_encoder = LabelEncoder() description_encoder = LabelEncoder() title_encoder.fit(train_data['Title']) description_encoder.fit(train_data['Description']) train_data['Title'] = title_encoder.transform(train_data['Title']) train_data['Description'] = description_encoder.transform(train_data['Description']) test_data['Title'] = title_encoder.transform(test_data['Title']) test_data['Description'] = description_encoder.transform(test_data['Description'])

Scale input features

scaler = MinMaxScaler() train_data[['Title', 'Description']] = scaler.fit_transform(train_data[['Title', 'Description']]) test_data[['Title', 'Description']] = scaler.transform(test_data[['Title', 'Description']])

Preprocess target variable

train_data['Target'] = train_data['Target'].apply(lambda x: 1 if x == 'Yes' else 0) test_data['Target'] = test_data['Target'].apply(lambda x: 1 if x == 'Yes' else 0)

Define the model

input_title = Input(shape=(1,)) input_description = Input(shape=(1,))

embedding_dim = 32 vocab_size_title = len(title_encoder.classes_) vocab_size_description = len(description_encoder.classes_)

embedding_title = Embedding(vocab_size_title, embedding_dim)(input_title) embedding_description = Embedding(vocab_size_description, embedding_dim)(input_description)

flatten_title = layers.Flatten()(embedding_title) flatten_description = layers.Flatten()(embedding_description)

concat = Concatenate()([flatten_title, flatten_description]) dropout = Dropout(0.2)(concat) dense1 = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(dropout) output = Dense(1, activation='sigmoid')(dense1)

model = keras.Model(inputs=[input_title, input_description], outputs=output)

Compile the model

model.compile(optimizer=Adam(learning_rate=0.001), loss=binary_crossentropy, metrics=[AUC()])

Define callbacks

early_stopping = EarlyStopping(monitor='val_loss', patience=5)

Train the model

history = model.fit( x=[train_data['Title'], train_data['Description']], y=train_data['Target'], batch_size=32, epochs=50, validation_split=0.2, callbacks=[early_stopping] )

Evaluate the model

loss, auc = model.evaluate([test_data['Title'], test_data['Description']], test_data['Target']) print(f'Test loss: {loss}') print(f'Test AUC: {auc}'

import pandas as pdimport numpy as npfrom sklearnpreprocessing import LabelEncoder MinMaxScalerfrom sklearnmodel_selection import train_test_splitimport reimport tensorflow as tffrom tensorflow import

原文地址: https://www.cveoy.top/t/topic/hYF3 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录