MFP - Molecular Functional Portrait

MFP - Molecular Functional Portrait


https://github.com/BostonGene/MFP


# Python3.7
import pandas as pd
from portraits.clustering import clustering_profile_metrics, clustering_profile_metrics_plot
from portraits.utils import read_gene_sets, ssgsea_formula, median_scale
# Example script
# Read signatures
gmt = read_gene_sets('signatures.gmt')  # GMT format like in MSIGdb
# Read expressions
exp = pd.read_csv('expression.tsv', sep='\t', index_col=0)  # log2+1 transformed; Genes in columns
exp=exp.T
# Calc signature scores
signature_scores = ssgsea_formula(exp, gmt)
# Scale signatures
signature_scores_scaled = median_scale(signature_scores)
signature_scores_scaled.to_csv('signature_scores.tsv', sep='\t', index=True)
# Check the clustering within a range of 30 to 65% similarity.
# >65% - usually graph is not connected; <30% - unreasonable correlation
clustering_metrics = clustering_profile_metrics(signature_scores_scaled, threshold_mm=(.3, .65), step=.01)
# Visualize the partitions
fig,ax=clustering_profile_metrics_plot(clustering_metrics)
fig.savefig('heat.png', dpi=300)
# Then select the best threshold using one ore more metrics.
best_threshold = '0.51'
#best_threshold = 0.51

def detect_type(ser, scores):
    #cmeans = pd.DataFrame({cg: scores.loc[samps.index].mean() for cg, samps in ser.groupby(ser)})
    cmeans = pd.DataFrame({cg: scores.loc[samps.index].mean() for cg, samps in ser.groupby("group")})
    print(cmeans)
    mapper = {}
    deltas = (cmeans.loc[['Angiogenesis', 'Endothelium', 'CAF', 'Matrix', 'Matrix_remodeling']].mean() -
              cmeans.loc[['MHCII', 'Antitumor_cytokines', 'Coactivation_molecules',
                          'B_cells', 'NK_cells', 'Checkpoint_inhibition',
                          'Effector_cells', 'T_cells', 'Th1_signature',
                          'T_cell_traffic', 'MHCI']].mean()).sort_values()
    mapper[deltas.index[-1]] = 'F'  # That's fibrotic
    mapper[deltas.index[0]] = 'IE'  # Immune enriched, non-fibrotic
    cmeans.pop(deltas.index[-1])
    cmeans.pop(deltas.index[0])
    
    
    print(deltas)
    print(cmeans)
    deltas = (cmeans.loc[['Angiogenesis', 'Endothelium', 'CAF', 'Matrix', 'Matrix_remodeling',
                          'Protumor_cytokines', 'Neutrophil_signature', 'Granulocyte_traffic',
                          'Macrophages', 'Macrophage_DC_traffic', 'MDSC_traffic', 'MDSC',
                          'Th2_signature', 'T_reg_traffic', 'Treg', 'M1_signatures', 'MHCII',
                          'Antitumor_cytokines', 'Coactivation_molecules', 'B_cells', 'NK_cells',
                          'Checkpoint_inhibition', 'Effector_cells', 'T_cells', 'Th1_signature',
                          'T_cell_traffic', 'MHCI', 'EMT_signature']].mean() -
              cmeans.loc['Proliferation_rate']).sort_values()
    mapper[deltas.index[-1]] = 'IE/F'  # Immune enriched & fibrotic
    mapper[deltas.index[0]] = 'D'  # Desert
    
    print(deltas)
    print(cmeans)
    print(mapper)
    #return ser.map(mapper).rename('MFP')
    return mapper
print(clustering_metrics.axes)
# Detect cluster types
ser=clustering_metrics.loc[best_threshold]
df = pd.DataFrame({"sam":ser.perc.index,"group":ser.perc})
df.to_csv('group_clusters.tsv', sep='\t', index=False)
final_clusters = detect_type(df, signature_scores_scaled)
# Output the clusters
final_clusters.to_csv('final_clusters.tsv', sep='\t', index=True)



Bagaev A, Kotlov N, Nomie K, Svekolkin V, Gafurov A, Isaeva O, Osokin N, Kozlov I, Frenkel F, Gancharova O, Almog N, Tsiper M, Ataullakhanov R, Fowler N. Conserved pan-cancer microenvironment subtypes predict response to immunotherapy. Cancer Cell. 2021 Jun 14;39(6):845-865.e7. doi: 10.1016/j.ccell.2021.04.014. Epub 2021 May 20. PMID: 34019806.




  • 发表于 2021-08-17 13:59
  • 阅读 ( 1495 )
  • 分类:TCGA

你可能感兴趣的文章

相关问题

0 条评论

请先 登录 后评论
omicsgene
omicsgene

生物信息

654 篇文章

作家榜 »

  1. omicsgene 654 文章
  2. 安生水 325 文章
  3. Daitoue 167 文章
  4. 生物女学霸 120 文章
  5. 红橙子 78 文章
  6. CORNERSTONE 72 文章
  7. rzx 67 文章
  8. xun 66 文章