使用kraken2软件做宏基因组或扩增子项目物种注释项目的时候,如果后续不使用bracken则可能会遇到一个问题,结果生成的biom文件中taxonomy并不是最终结果,比如下面这样

可以用脚本来处理,删除未具体命名的分类级别
import argparse
from biom import load_table, Table
import numpy as np
def main():
# Parse command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--input', help='Input biom file', required=True)
parser.add_argument('-o', '--output', help='Output biom file', required=True)
args = parser.parse_args()
# Load the BIOM table
table = load_table(args.input)
# Get the OTU IDs and the data
otu_ids = table.ids(axis='observation')
data = table.matrix_data
# Get the taxonomy metadata
metadata = table.metadata(axis='observation')
new_metadata = []
for otu_id, meta in zip(otu_ids, metadata):
if meta is not None and 'taxonomy' in meta:
# Filter out unnamed taxonomic ranks
taxonomy = [level for level in meta['taxonomy'] if not level.endswith('__')]
meta['taxonomy'] = taxonomy
new_metadata.append(meta)
# Create a new BIOM table with the new metadata
new_table = Table(data, otu_ids, table.ids(), new_metadata, table.metadata())
# Write the new BIOM table to a file
with open(args.output, 'w') as f:
f.write(new_table.to_json("Filtered Taxonomy"))
if __name__ == "__main__":
main()
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!