0001 function model = convertSBMLToCobra(modelSBML,defaultBound,compSymbolList,compNameList)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 if (nargin < 2)
0027 defaultBound = 1000;
0028 end
0029
0030 if nargin < 3
0031 compSymbolList = {};
0032 compNameList = {};
0033 end
0034
0035 nMetsTmp = length(modelSBML.species);
0036 nRxns = length(modelSBML.reaction);
0037
0038
0039 formulaCount = 0;
0040 speciesList = {};
0041 chargeList = [];
0042 metFormulas = {};
0043 haveFormulasFlag = false;
0044 tmpSpecies = [];
0045 for i = 1:nMetsTmp
0046
0047 if (~modelSBML.species(i).boundaryCondition)
0048
0049 if (isempty(regexp(modelSBML.species(i).id,'_b$')));
0050 tmpSpecies = [ tmpSpecies modelSBML.species(i)];
0051 speciesList{end+1} = modelSBML.species(i).id;
0052 notesField = modelSBML.species(i).notes;
0053
0054 if (~isempty(notesField))
0055 [tmp,tmp,tmp,tmp,formula,tmp,tmp,tmp,tmp,charge] = parseSBMLNotesField(notesField);
0056 tmpCharge = charge;
0057 metFormulas {end+1} = formula;
0058 formulaCount = formulaCount + 1;
0059 haveFormulasFlag = true;
0060 end
0061 chargeList= [chargeList modelSBML.species(i).charge];
0062 end
0063 end
0064 end
0065
0066 nMets = length(speciesList);
0067
0068
0069 S = sparse(nMets,nRxns);
0070 rev = zeros(nRxns,1);
0071 lb = zeros(nRxns,1);
0072 ub = zeros(nRxns,1);
0073 c = zeros(nRxns,1);
0074 rxns = cell(nRxns,1);
0075 rules = cell(nRxns,1);
0076 genes = cell(nRxns,1);
0077 allGenes = {};
0078 h = waitbar(0,'Reading SBML file ...');
0079 hasNotesField = false;
0080 for i = 1:nRxns
0081 if mod(i,10) == 0
0082 waitbar(i/nRxns,h);
0083 end
0084
0085 notesField = modelSBML.reaction(i).notes;
0086 if (~isempty(notesField))
0087 [geneList,rule,subSystem,grRule,formula,confidenceScore, citation, comment, ecNumber] = parseSBMLNotesField(notesField);
0088 subSystems{i} = subSystem;
0089 genes{i} = geneList;
0090 allGenes = [allGenes geneList];
0091 rules{i} = rule;
0092 grRules{i} = grRule;
0093 hasNotesField = true;
0094 confidenceScores{i}= confidenceScore;
0095 citations{i} = citation;
0096 comments{i} = comment;
0097 ecNumbers{i} = ecNumber;
0098 end
0099 rev(i) = modelSBML.reaction(i).reversible;
0100 rxnNameTmp = regexprep(modelSBML.reaction(i).name,'^R_','');
0101 rxnNames{i} = regexprep(rxnNameTmp,'_+',' ');
0102 rxnsTmp = regexprep(modelSBML.reaction(i).id,'^R_','');
0103 rxns{i} = cleanUpFormatting(rxnsTmp);
0104
0105 reactantStruct = modelSBML.reaction(i).reactant;
0106 for j = 1:length(reactantStruct)
0107 speciesID = find(strcmp(reactantStruct(j).species,speciesList));
0108 if (~isempty(speciesID))
0109 stoichCoeff = reactantStruct(j).stoichiometry;
0110 S(speciesID,i) = -stoichCoeff;
0111 end
0112 end
0113 productStruct = modelSBML.reaction(i).product;
0114 for j = 1:length(productStruct)
0115 speciesID = find(strcmp(productStruct(j).species,speciesList));
0116 if (~isempty(speciesID))
0117 stoichCoeff = productStruct(j).stoichiometry;
0118 S(speciesID,i) = stoichCoeff;
0119 end
0120 end
0121 if isfield(modelSBML.reaction(i).kineticLaw,'parameter')
0122 parameters = modelSBML.reaction(i).kineticLaw.parameter;
0123 else
0124 parameters =[];
0125 end
0126 if (~isempty(parameters))
0127 for j = 1:length(parameters)
0128 paramStruct = parameters(j);
0129 switch paramStruct.id
0130 case 'LOWER_BOUND'
0131 lb(i) = paramStruct.value;
0132 if (lb(i) < -defaultBound)
0133 lb(i) = -defaultBound;
0134 end
0135 case 'UPPER_BOUND'
0136 ub(i) = paramStruct.value;
0137 if (ub(i) > defaultBound)
0138 ub(i) = defaultBound;
0139 end
0140 case 'OBJECTIVE_COEFFICIENT'
0141 c(i) = paramStruct.value;
0142 end
0143 end
0144 else
0145 ub(i) = defaultBound;
0146 if (rev(i) == 1)
0147 lb(i) = -defaultBound;
0148 else
0149 lb(i) = 0;
0150 end
0151 end
0152 end
0153
0154 if (regexp(version, 'R20'))
0155 close(h);
0156 end
0157 allGenes = unique(allGenes);
0158
0159
0160 if (hasNotesField)
0161
0162 rxnGeneMat = sparse(nRxns,length(allGenes));
0163 h = waitbar(0,'Constructing GPR mapping ...');
0164 for i = 1:nRxns
0165 if mod(i,10) == 0
0166 waitbar(i/nRxns,h);
0167 end
0168 if iscell(genes{i})
0169 [tmp,geneInd] = ismember(genes{i},allGenes);
0170 else
0171 [tmp,geneInd] = ismember(num2cell(genes{i}),allGenes);
0172 end
0173
0174 rxnGeneMat(i,geneInd) = 1;
0175 for j = 1:length(geneInd)
0176 rules{i} = strrep(rules{i},['x(' num2str(j) ')'],['x(' num2str(geneInd(j)) '_TMP_)']);
0177 end
0178 rules{i} = strrep(rules{i},'_TMP_','');
0179 end
0180
0181 if (regexp(version, 'R20'))
0182 close(h);
0183 end
0184
0185 end
0186
0187
0188 mets = cell(nMets,1);
0189 compartmentList = cell(length(modelSBML.compartment),1);
0190 if isempty(compSymbolList), useCompList = true; else useCompList = false; end
0191 for i=1:length(modelSBML.compartment)
0192 compartmentList{i} = modelSBML.compartment(i).id;
0193 end
0194
0195 h = waitbar(0,'Constructing metabolite lists ...');
0196 hasAnnotationField = 0;
0197 for i = 1:nMets
0198 if mod(i,10) == 0
0199 waitbar(i/nMets,h);
0200 end
0201
0202
0203 metID = regexprep(speciesList{i},'^M_','');
0204 metID = regexprep(metID,'^_','');
0205
0206 tmpCell = {};
0207 if useCompList
0208 for j=1:length(compartmentList)
0209 tmpCell = regexp(metID,['_(' compartmentList{j} ')$'],'tokens');
0210 if ~isempty(tmpCell), break; end
0211 end
0212 if isempty(tmpCell), useCompList = false; end
0213 elseif ~isempty(compSymbolList)
0214 for j = 1: length(compSymbolList)
0215 tmpCell = regexp(metID,['_(' compSymbolList{j} ')$'],'tokens');
0216 if ~isempty(tmpCell), break; end
0217 end
0218 end
0219 if isempty(tmpCell), tmpCell = regexp(metID,'_(.)$','tokens'); end
0220 if ~isempty(tmpCell)
0221 compID = tmpCell{1};
0222 metTmp = [regexprep(metID,['_' compID{1} '$'],'') '[' compID{1} ']'];
0223 else
0224 metTmp = metID;
0225 end
0226
0227 mets{i} = cleanUpFormatting(metTmp);
0228
0229
0230 metNamesTmp = regexprep(tmpSpecies(i).name,'^M_','');
0231 metNamesTmp = cleanUpFormatting(metNamesTmp);
0232 metNamesTmp = regexprep(metNamesTmp,'^_','');
0233
0234 metNamesTmp = regexprep(metNamesTmp,'-+','-');
0235 metNamesTmp = regexprep(metNamesTmp,'-$','');
0236 metNamesAlt{i} = metNamesTmp;
0237
0238
0239 if (~haveFormulasFlag)
0240 [tmp,tmp,tmp,tmp,tokens] = regexp(metNamesTmp,'(.*)_((((A|Ag|As|C|Ca|Cd|Cl|Co|Cu|F|Fe|H|Hg|I|K|L|M|Mg|Mn|Mo|N|Na|Ni|O|P|R|S|Se|U|W|X|Y|Zn)?)(\d*)))*$');
0241 if (isempty(tokens))
0242 if length(metFormulas)<i||(metFormulas{i}=='')
0243 metFormulas{i} = '';
0244 end
0245 metNames{i} = metNamesTmp;
0246 else
0247 formulaCount = formulaCount + 1;
0248 metFormulas{i} = tokens{1}{2};
0249 metNames{i} = tokens{1}{1};
0250 end
0251 else
0252 metNames{i} = metNamesTmp;
0253 end
0254 if isfield(modelSBML.species(i),'annotation')
0255 hasAnnotationField = 1;
0256 [metCHEBI,metKEGG,metPubChem,metInChI] = parseSBMLAnnotationField(modelSBML.species(i).annotation);
0257 metCHEBIID{i} = metCHEBI;
0258 metKEGGID{i} = metKEGG;
0259 metPubChemID{i} = metPubChem;
0260 metInChIString{i} = metInChI;
0261 end
0262 end
0263 if ( regexp( version, 'R20') )
0264 close(h);
0265 end
0266
0267
0268 model.rxns = rxns;
0269 model.mets = mets;
0270 model.S = S;
0271 model.rev = rev;
0272 model.lb = lb;
0273 model.ub = ub;
0274 model.c = c;
0275 model.metCharge = transpose(chargeList);
0276 if (hasNotesField)
0277 model.rules = rules;
0278 model.genes = columnVector(allGenes);
0279 model.rxnGeneMat = rxnGeneMat;
0280 model.grRules = columnVector(grRules);
0281 model.subSystems = columnVector(subSystems);
0282 model.confidenceScores = columnVector(confidenceScores);
0283 model.rxnReferences = columnVector(citations);
0284 model.rxnECNumbers = columnVector(ecNumbers);
0285 model.rxnNotes = columnVector(comments);
0286 end
0287 model.rxnNames = columnVector(rxnNames);
0288
0289
0290 if (formulaCount < 0.9*nMets)
0291 model.metNames = columnVector(metNamesAlt);
0292 else
0293 model.metNames = columnVector(metNames);
0294 model.metFormulas = columnVector(metFormulas);
0295 end
0296 if (hasAnnotationField)
0297 model.metChEBIID = columnVector(metCHEBIID);
0298 model.metKEGGID = columnVector(metKEGGID);
0299 model.metPubChemID = columnVector(metPubChemID);
0300 model.metInChIString = columnVector(metInChIString);
0301 end
0302
0303
0304 function [genes,rule,subSystem,grRule,formula,confidenceScore,citation,comment,ecNumber,charge] = parseSBMLNotesField(notesField)
0305
0306
0307
0308
0309
0310
0311
0312
0313 if isempty(regexp(notesField,'html:p', 'once'))
0314 tag = 'p';
0315 else
0316 tag = 'html:p';
0317 end
0318
0319 subSystem = '';
0320 grRule = '';
0321 genes = {};
0322 rule = '';
0323 formula = '';
0324 confidenceScore = '';
0325 citation = '';
0326 ecNumber = '';
0327 comment = '';
0328 charge = [];
0329 Comment = 0;
0330
0331 [tmp,fieldList] = regexp(notesField,['<' tag '>.*?</' tag '>'],'tokens','match');
0332
0333 for i = 1:length(fieldList)
0334 fieldTmp = regexp(fieldList{i},['<' tag '>(.*)</' tag '>'],'tokens');
0335 fieldStr = fieldTmp{1}{1};
0336 if (regexp(fieldStr,'GENE_ASSOCIATION'))
0337 gprStr = regexprep(strrep(fieldStr,'GENE_ASSOCIATION:',''),'^(\s)+','');
0338 grRule = gprStr;
0339 [genes,rule] = parseBoolean(gprStr);
0340 elseif (regexp(fieldStr,'GENE ASSOCIATION'))
0341 gprStr = regexprep(strrep(fieldStr,'GENE ASSOCIATION:',''),'^(\s)+','');
0342 grRule = gprStr;
0343 [genes,rule] = parseBoolean(gprStr);
0344 elseif (regexp(fieldStr,'SUBSYSTEM'))
0345 subSystem = regexprep(strrep(fieldStr,'SUBSYSTEM:',''),'^(\s)+','');
0346 subSystem = strrep(subSystem,'S_','');
0347 subSystem = regexprep(subSystem,'_+',' ');
0348 if (isempty(subSystem))
0349 subSystem = 'Exchange';
0350 end
0351 elseif (regexp(fieldStr,'EC Number'))
0352 ecNumber = regexprep(strrep(fieldStr,'EC Number:',''),'^(\s)+','');
0353 elseif (regexp(fieldStr,'FORMULA'))
0354 formula = regexprep(strrep(fieldStr,'FORMULA:',''),'^(\s)+','');
0355 elseif (regexp(fieldStr,'CHARGE'))
0356 charge = str2num(regexprep(strrep(fieldStr,'CHARGE:',''),'^(\s)+',''));
0357 elseif (regexp(fieldStr,'AUTHORS'))
0358 if isempty(citation)
0359 citation = strcat(regexprep(strrep(fieldStr,'AUTHORS:',''),'^(\s)+',''));
0360 else
0361 citation = strcat(citation,';',regexprep(strrep(fieldStr,'AUTHORS:',''),'^(\s)+',''));
0362 end
0363 elseif Comment == 1 && isempty(regexp(fieldStr,'genes:', 'once'))
0364 Comment = 0;
0365 comment = fieldStr;
0366 elseif (regexp(fieldStr,'Confidence'))
0367 confidenceScore = regexprep(strrep(fieldStr,'Confidence Level:',''),'^(\s)+','');
0368 Comment = 1;
0369 end
0370 end
0371
0372 function [metCHEBI,metKEGG,metPubChem,metInChI] = parseSBMLAnnotationField(annotationField)
0373
0374
0375
0376
0377
0378
0379
0380
0381
0382 metPubChem = '';
0383 metCHEBI = '';
0384 metKEGG = '';
0385 metPubChem = '';
0386 metInChI='';
0387
0388 [tmp,fieldList] = regexp(annotationField,'<rdf:li rdf:resource="urn:miriam:(\w+).*?"/>','tokens','match');
0389
0390
0391 for i = 1:length(fieldList)
0392 fieldTmp = regexp(fieldList{i},['<rdf:li rdf:resource="urn:miriam:(.*)"/>'],'tokens');
0393 fieldStr = fieldTmp{1}{1};
0394 if (regexp(fieldStr,'obo.chebi'))
0395 metCHEBI = strrep(fieldStr,'obo.chebi:CHEBI%','');
0396 elseif (regexp(fieldStr,'kegg.compound'))
0397 metKEGG = strrep(fieldStr,'kegg.compound:','');
0398 elseif (regexp(fieldStr,'pubchem.substance'))
0399 metPubChem = strrep(fieldStr,'pubchem.substance:','');
0400 end
0401 end
0402
0403 fieldTmp = regexp(annotationField,'<in:inchi xmlns:in="http://biomodels.net/inchi" metaid="(.*?)">(.*?)</in:inchi>','tokens');
0404 if ~isempty(fieldTmp)
0405 fieldStr = fieldTmp{1}{2};
0406 if (regexp(fieldStr,'InChI'))
0407 metInChI = strrep(fieldStr,'InChI=','');
0408 end
0409 end
0410
0411
0412 function str = cleanUpFormatting(str)
0413 str = strrep(str,'-DASH-','-');
0414 str = strrep(str,'_DASH_','-');
0415 str = strrep(str,'_FSLASH_','/');
0416 str = strrep(str,'_BSLASH_','\');
0417 str = strrep(str,'_LPAREN_','(');
0418 str = strrep(str,'_LSQBKT_','[');
0419 str = strrep(str,'_RSQBKT_',']');
0420 str = strrep(str,'_RPAREN_',')');
0421 str = strrep(str,'_COMMA_',',');
0422 str = strrep(str,'_PERIOD_','.');
0423 str = strrep(str,'_APOS_','''');
0424 str = regexprep(str,'_e_$','(e)');
0425 str = regexprep(str,'_e$','(e)');
0426 str = strrep(str,'&','&');
0427 str = strrep(str,'<','<');
0428 str = strrep(str,'>','>');
0429 str = strrep(str,'"','"');