Python 代码分析与优化:将数据处理结果输出到指定目录
'#\ coding:\ utf-8'\nimport\ argparse\nimport\ sys\nimport\ os\nimport\ pandas\ as\ pd\nfrom\ datatable\ import\ dt,\ join,\ f,\ by,\ g\n\nparser\ =\ argparse.ArgumentParser(prog='mergspko_meta.py',\n epilog='''\nUsage:\nmergspko_meta.py\ -g\ gnct_rel.txt\ -s\ orf-sp.csv\ -k\ kotab.csv\ -r\ orfidko.txt\ -o\ out_dir\n''',\ formatter_class=argparse.RawDescriptionHelpFormatter)\n\nparser.add_argument('-g',\ '--gnct',\ required=True,\ type=str,\n help='gnct_rel.txt:\ 为orf丰度表')\nparser.add_argument('-s',\ '--orfsp',\ required=True,\ type=str,\n help='orf-sp.csv:\ 组内流程物种注释结果文件')\nparser.add_argument('-k',\ '--kotab',\ required=True,\ type=str,\n help='kotab.csv,\ :\ 为用户提供的KO表,列名必须为'ko'')\nparser.add_argument('-r',\ '--orfidko',\ required=True,\ type=str,\n help='orfidko.txt:\ 组内kegg注释结果的文件')\nparser.add_argument('-m',\ '--merge',\ required=False,\ type=str,\ default=None,\n help='merge:\ 不同KO是否需要合并,默认不合并,若要合并,可给出任意值')\nparser.add_argument('-o',\ '--output',\ required=True,\ type=str,\n help='out_dir:\ 输出目录')\n\nif\ name\ ==\ 'main':\n args\ =\ parser.parse_args()\n genusdt0\ =\ dt.fread(args.gnct)\n orfsp\ =\ dt.fread(args.orfsp)\n kodt\ =\ dt.fread(args.kotab)\n orfko\ =\ dt.fread(args.orfidko)\n kodt.key\ =\ 'ko'\n orfko\ =\ orfko[g[-1]\ !=\ None,\ :,\ join(kodt)]\n orfko.key\ =\ 'orfid'\n orfsp\ =\ orfsp[g[-1]\ !=\ None,\ :,\ join(orfko)]\n orfsp.to_csv(os.path.join(args.output,\ 'orf-ko-sp.csv'))\n orfsp.key\ =\ 'orfid'\n genusdt0\ =\ genusdt0[g[-1]\ !=\ None,\ :,\ join(orfsp)]\n\n if\ args.merge:\n genusdt0\ =\ genusdt0[:,\ f[:].remove([f.orfid,\ f.spid])]\n genusdt0\ =\ genusdt0.to_pandas()\n genusdt0\ =\ genusdt0.groupby('taxonomy').agg('sum')\n genusdt0\ =\ genusdt0.reset_index()\n genusdt0\ =\ pd.concat([genusdt0,\ genusdt0['taxonomy'].str.split('|',\ expand=True)],\ axis=1)\n genusdt0\ =\ genusdt0.rename(columns={0:\ 'K',\ 1:\ 'P',\ 2:\ 'C',\ 3:\ 'O',\ 4:\ 'F',\ 5:\ 'G',\ 6:\ 'S',\ 7:\ 'T'})\n genusdt0\ =\ genusdt0.drop(['T'],\ axis=1)\n genusdt0['taxonomy']\ =\ genusdt0['K']\ +\ str(';')\ +\ genusdt0['P']\ +\ str(';')\ +\ genusdt0['C']\ +\ str(';')\ +\ genusdt0[\n 'O']\ +\ str(';')\ +\ genusdt0['F']\ +\ str(';')\ +\ genusdt0['G']\ +\ str(';')\ +\ genusdt0['S']\n genusdt0\ =\ genusdt0.groupby('taxonomy').agg('sum')\n genusdt0\ =\ genusdt0.loc[(genusdt0.sum(axis=1)\ !=\ 0),\ ]\n genusdt0.to_csv(os.path.join(args.output,\ 'species.txt'),\ index=1,\ sep='\t'')\n\n #\ G\n genusdt0\ =\ genusdt0.reset_index()\ #\ 将索引(index)转换为列名\n genusdt0\ =\ pd.concat([genusdt0\n
原文地址: https://www.cveoy.top/t/topic/p6mf 著作权归作者所有。请勿转载和采集!