可视化分析:使用 Python Bokeh 库分析有房贷与信用卡违约之间的关系
数据处理
df['housing'] = df['housing'].replace(['yes', 'no', 'unknown'], [1, 0, -1]) df['default'] = df['default'].replace(['yes', 'no', 'unknown'], [1, 0, -1]) df = df[(df['housing'] != -1) & (df['default'] != -1)] # 去除未知数据
计算比例
housing_default = df.groupby(['housing', 'default']).size().unstack() housing_default = housing_default.apply(lambda x: x / x.sum(), axis=1)
绘图
output_file('housing_default.html') colors = ['#c9d9d3', '#718dbf', '#e84d60'] p = figure(x_range=(-0.5, 2.5), y_range=(0, 1), plot_width=400, plot_height=400, title='Housing and Default', toolbar_location=None, tools='') p.vbar(x=[0, 1], top=housing_default[1], width=0.2, color=colors[0], legend_label='Default') p.vbar(x=[0, 1], top=housing_default[0], width=0.2, color=colors[1], legend_label='No Default', bottom=housing_default[1]) p.vbar(x=[0, 1], top=housing_default[-1], width=0.2, color=colors[2], legend_label='Unknown', bottom=housing_default[1] + housing_default[0]) p.xaxis.axis_label = 'Housing' p.yaxis.axis_label = 'Default' p.xaxis.major_label_overrides = {0: 'No', 1: 'Yes'} p.legend.location = 'top_left' show(p)
原文地址: https://www.cveoy.top/t/topic/jkq0 著作权归作者所有。请勿转载和采集!