from itertools import combinations from copy import deepcopy

导入数据，并剔除支持度计数小于min_support的1项集

def load_data(data): # 统计每个项的支持度计数 I_dict = {} for i in data: for j in i: I_dict[j] = I_dict.get(j, 0) + 1 # 剔除支持度计数小于min_support的1项集 F_dict = deepcopy(I_dict) for k in I_dict.keys(): if F_dict.get(k) < min_support: del F_dict[k] return F_dict

判断频繁项集是否大于min_support

def get_support_set(p_set): item_supp_set = [] for item in p_set: count = 0 for ds in data_set: if item.issubset(ds): count += 1 if count >= min_support: item_supp_set.append([item, count]) return item_supp_set

找出所有频繁项集

以二项集为初始集

def get_all_items(two_set, k=3): all_frequent = [] flag = True while flag: mid_set = [] temp = [] # 将二项集中的项合并到temp中 t_ = [ks[0] for ks in two_set] for kk in t_: for tt in kk: if tt not in temp: temp.append(tt) # 生成k项集 k_ = [set(t) for t in combinations(temp, k)] # 判断k项集是否为频繁项集 for ff in k_: count_k = 0 for d in t_: if ff.issuperset(d): count_k += 1 if count_k == k: mid_set.append(ff) # 获取频繁项集 frequent_mid_set = get_support_set(mid_set) if mid_set: k += 1 two_set = frequent_mid_set all_frequent.extend(frequent_mid_set) else: flag = False return all_frequent

if name == 'main': data = [['I1', 'I2', 'I5'], ['I2', 'I4'], ['I2', 'I3'], ['I1', 'I2', 'I4'], ['I1', 'I3'], ['I2', 'I3'], ['I1', 'I3'], ['I1', 'I2', 'I3', 'I5'], ['I1', 'I2', 'I3']] data_set = [set(d) for d in data] min_support = 1 # 获取1项集 one = [[{lk}, lv] for lk, lv in load_data(data).items()] # 获取2项集 two = [set(t) for t in combinations(list(load_data(data).keys()), 2)] two_f_set = get_support_set(two) # 获取所有频繁项集 all_frequent_set = one + two_f_set + get_all_items(two_f_set) for afs in all_frequent_set: print(afs)

Apriori Algorithm for Frequent Itemset Mining: Python Implementation

Apriori Algorithm for Frequent Itemset Mining: Python Implementation

导入数据，并剔除支持度计数小于min_support的1项集

判断频繁项集是否大于min_support

找出所有频繁项集

以二项集为初始集