Python数据可视化: Bokeh库分析房贷与信用卡违约关系
from bokeh.plotting import figure, show, output_file
import pandas as pd
# 读取数据
df = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
# 数据清洗
df.dropna(inplace=True)
test.dropna(inplace=True)
# 统计有/无房贷违约比例
housing_default = df.groupby(['housing', 'default']).size().reset_index(name='count')
housing_default['percent'] = housing_default['count'] / housing_default['count'].sum()
# 绘制条形图
p = figure(x_range=housing_default['housing'] + housing_default['default'], plot_height=350, title='房贷与信用卡违约关系',
toolbar_location=None, tools='')
p.vbar(x=housing_default['housing'] + housing_default['default'], top=housing_default['percent'], width=0.9)
p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 0.7
p.legend.orientation = 'horizontal'
p.legend.location = 'top_center'
# 输出HTML文件
output_file('housing_default.html')
# 显示图形
show(p)
原文地址: https://www.cveoy.top/t/topic/jkqP 著作权归作者所有。请勿转载和采集!