使用PCA分析法绘制fasta序列状态转移矩阵的pAC散点图
from Bio import SeqIO import numpy as np import matplotlib.pyplot as plt
读取fasta文件,计算状态转移矩阵
def calc_transition_matrix(fasta_file): sequences = list(SeqIO.parse(fasta_file, 'fasta')) alphabet = list(set(''.join([str(seq.seq) for seq in sequences]))) states = len(alphabet) matrix = np.zeros((states, states)) for seq in sequences: seq_str = str(seq.seq) for i in range(len(seq_str) - 1): from_state = alphabet.index(seq_str[i]) to_state = alphabet.index(seq_str[i+1]) matrix[from_state, to_state] += 1 return matrix
对矩阵进行z-score标准化
def standardize(matrix): standardized_matrix = (matrix - np.mean(matrix, axis=0)) / np.std(matrix, axis=0) return standardized_matrix
对标准化后的矩阵进行PCA分析
def pca(matrix): cov = np.cov(matrix.T) eig_vals, eig_vecs = np.linalg.eig(cov) idx = np.argsort(eig_vals)[::-1] eig_vecs = eig_vecs[:, idx] projection = np.dot(matrix, eig_vecs) return projection
画出PCA散点图
def plot_pca(projection, label): plt.scatter(projection[:, 0], projection[:, 1], c=label) plt.xlabel('PC1') plt.ylabel('PC2') plt.show()
测试代码
if name == 'main': # 读取fasta文件,计算状态转移矩阵 matrix1 = calc_transition_matrix('WBKG00000000.fasta') matrix2 = calc_transition_matrix('WHPN00000000.fasta') matrix3 = calc_transition_matrix('WSRO00000000.fasta') # 对矩阵进行标准化 standardized_matrix1 = standardize(matrix1) standardized_matrix2 = standardize(matrix2) standardized_matrix3 = standardize(matrix3) # 对标准化后的矩阵进行PCA分析 pca_result1 = pca(standardized_matrix1) pca_result2 = pca(standardized_matrix2) pca_result3 = pca(standardized_matrix3) # 画出PCA散点图 plot_pca(pca_result1, 0) plot_pca(pca_result2, 1) plot_pca(pca_result3, 2)
原文地址: https://www.cveoy.top/t/topic/lMA6 著作权归作者所有。请勿转载和采集!