Source code for pymgpipe.modeling

import cobra
import re
import os
import time
from optlang.symbolics import Zero
from cobra.medium import is_boundary_type
from .io import load_cobra_model, UnsupportedSolverException
from .utils import load_dataframe
from .logger import logger

[docs] def build( abundances, sample, taxa_directory, threshold=1e-6, diet_fecal_compartments=True, solver="gurobi", ): """Build community COBRA model using mgpipe-like compartments and constraints. This function is pymgpipe's main model building function, can can be used to build a single model (with no further modifications) Args: abundances (pandas.DataFrame | str): Abundance matrix with taxa as rows and samples as columns sample (str): Label corresponding to the sample you want to build (needs to match up to column name in abundance matrix) taxa_directory (str): Directory containing individual strain/species taxa models (file names corresponding to index of coverage matrix) threshold (float): Abundance threshold, any taxa with an abundance less than this value will be left out and abundances will be re-normalized diet_fecal_compartments (bool): Build models with mgpipe's diet/fecal compartmentalization, defaults to False solver (str): LP solver (gurobi or cplex) used to solve models, defaults to gurobi """ abundances = load_dataframe(abundances) assert sample in abundances.columns, 'Sample %s not found in abundance matrix!'%sample if not os.path.exists(taxa_directory): raise Exception('Taxa directory %s not found!'%taxa_directory) if solver not in ['gurobi','cplex']: raise UnsupportedSolverException sample_abundances = abundances[sample] sample_abundances = sample_abundances[sample_abundances != 0] if threshold is not None: sample_abundances = sample_abundances[sample_abundances > threshold] sample_abundances = sample_abundances / sample_abundances.sum() existing_taxa_files = { t.split("/")[-1].split(".")[0]: os.path.join(taxa_directory,t) for t in os.listdir(taxa_directory) } missing = [t for t in sample_abundances.index if t not in existing_taxa_files] if len(missing) > 0: logger.warning('Could not find associated models for %s taxa- %s\nRemoving missing taxa and renormalizing abundances.'%(len(missing),missing)) sample_abundances.drop(missing, inplace=True) sample_abundances = sample_abundances / sample_abundances.sum() print('Building community model for %s with %s unique taxa...\n'%(sample,len(sample_abundances.index))) start = time.time() community_model = cobra.Model(name=sample) community_model.solver = solver for taxon in sample_abundances.index: model = load_cobra_model(existing_taxa_files[taxon]) taxon = taxon.replace(' ','_') ex_metabolites = [m for m in model.metabolites if '[e]' in m.id] missing = [] for ex in ex_metabolites: if 'EX_%s(e)'%ex.id.split('[e]')[0] not in model.reactions: missing.append(_get_missing_exchange(ex)) if len(missing) > 0: print('Adding %s missing exchange reaction(s) to %s!'%(len(missing),taxon)) model.add_reactions(missing) # -- Reactions -- for r in list(model.reactions): r.id = _remove_non_alphanumeric(r.id+'__'+taxon).replace("(e)", "[u]") # -- Metabolites -- for m in list(model.metabolites): m.id = _remove_non_alphanumeric(m.id+'__'+taxon).replace("[e]", "[u]") m.compartment = _remove_non_alphanumeric(m.compartment+'__'+taxon).replace('e__','u__') community_model.add_reactions(model.reactions) community_model.solver.update() print('Adding exchange reactions...\n') _add_exchanges(community_model, diet_fecal_compartments) microbe_biomass = cobra.Metabolite(id="microbeBiomass[u]", compartment="u") community_model.add_metabolites([microbe_biomass]) if diet_fecal_compartments: _add_fecal_exchange(community_model, microbe_biomass) else: _add_lumen_exchange(community_model, microbe_biomass) biomass_metabs = community_model.metabolites.query(re.compile("^biomass.*", re.IGNORECASE)) biomass = cobra.Reaction( id="communityBiomass", lower_bound=0.4, upper_bound=1, ) biomass.add_metabolites( {m: -float(sample_abundances[m.id.split('__')[-1]]) for m in biomass_metabs} ) biomass.add_metabolites({microbe_biomass: 1}) community_model.add_reactions([biomass]) print('Setting community objective to biomass reaction...') print(biomass.id+': '+biomass.reaction) community_model.objective = biomass community_model.objective_direction = 'max' community_model.solver.update() elapsed = time.time() - start print('\n-----------------------------------') logger.info('Finished building %s in %.2f minutes!'%(sample,elapsed/60)) return community_model
def _add_exchanges(model, diet_fecal_compartments): for r in list(model.reactions): r_taxon = r.id.split('__')[-1] # -- Handling non-exchanges -- if r.id.startswith("DM_"): r.lower_bound = 0 if r.id.startswith("sink_"): r.lower_bound = -1 if 'biomass' in r.id.lower(): if 'EX_' in r.id: model.remove_reactions(r.id) r.bounds = (0, 1000) # -- Handling exchanges -- if not is_boundary_type(r, 'exchange', 'u__'+r_taxon): continue metab = (r.reactants + r.products)[0] lumen_id = metab.id.split('__'+r_taxon)[0] if lumen_id not in model.metabolites: lumen_metab = metab.copy() lumen_metab.id = lumen_id lumen_metab.compartment = 'u' model.add_metabolites([lumen_metab]) if diet_fecal_compartments: _add_fecal_exchange(model, lumen_metab) _add_diet_exchange(model, lumen_metab) else: _add_lumen_exchange(model, lumen_metab) else: lumen_metab = model.metabolites.get_by_id(lumen_id) r.add_metabolites({lumen_metab: 1 if len(r.reactants) == 1 else -1}) r.id = r.id.replace("EX_", "IEX_") r.add_metabolites({k: -v for k, v in r.metabolites.items()}, combine=False) r.lower_bound = -1000 r.upper_bound = 1000 model.solver.update() def _add_lumen_exchange(model, met): ex_medium = cobra.Reaction( id="EX_" + met.id, name=met.id + " lumen exchange", lower_bound=-1000, upper_bound=1000, ) ex_medium.add_metabolites({met: -1}) ex_medium.global_id = ex_medium.id ex_medium.community_id = "lumen" model.add_reactions([ex_medium]) def _add_fecal_exchange(model, met): f_m = met.copy() f_m.id = met.id.replace("[u]", "[fe]") f_m.global_id = f_m.id[:-4] f_m.compartment = "fe" f_m.community_id = "fecal" model.add_metabolites([f_m]) f_ex = cobra.Reaction( id="EX_" + f_m.id, name=f_m.id + " fecal exchange", lower_bound=-1000, upper_bound=1000000, ) f_ex.add_metabolites({f_m: -1}) f_ex.global_id = f_ex.id f_ex.community_id = "fecal" f_tr = cobra.Reaction( id="UFEt_" + f_m.global_id, name=f_m.id + " lumen fecal transport", lower_bound=0, upper_bound=1000000, ) f_tr.add_metabolites({met: -1, f_m: 1}) f_tr.global_id = f_tr.id f_tr.community_id = "lumen/fecal" model.add_reactions([f_ex, f_tr]) def _add_diet_exchange(model, met): d_m = met.copy() d_m.id = met.id.replace("[u]", "[d]") d_m.global_id = d_m.id[:-3] d_m.compartment = "d" d_m.community_id = "diet" model.add_metabolites([d_m]) d_ex = cobra.Reaction( id="Diet_EX_" + d_m.id, name=d_m.id + " diet exchange", lower_bound=-1000, upper_bound=1000, ) d_ex.add_metabolites({d_m: -1}) d_ex.global_id = d_ex.id d_ex.community_id = "diet" d_tr = cobra.Reaction( id="DUt_" + d_m.global_id, name=d_m.id + " diet lumen transport", lower_bound=0, upper_bound=1000000, ) d_tr.add_metabolites({met: 1, d_m: -1}) d_tr.global_id = d_tr.id d_tr.community_id = "diet/lumen" model.add_reactions([d_ex, d_tr]) def _get_missing_exchange(metab): ex = cobra.Reaction( id='EX_%s(e)'%metab.id.split('[e]')[0], name='%s exchange'%metab.name, lower_bound=-1000, upper_bound=1000, subsystem='Exchange/demand reaction') ex.add_metabolites({metab:-1}) return ex def _remove_non_alphanumeric(string): return re.sub(r'[^\w[\]()]+', '', string)