readSimPhenyGprText

PURPOSE ^

readSimPhenyGprText Parses SimPheny GPRA's in text format into a rxn x gene association

SYNOPSIS ^

function gpraModel = readSimPhenyGprText(file,model)

DESCRIPTION ^

readSimPhenyGprText Parses SimPheny GPRA's in text format into a rxn x gene association
matrix

 gpraModel = readSimPhenyGPRText(file,model)

INPUTS
 file      GPR text file
 model     COBRA model structure

OUTPUT
 gpraModel COBRA model structure with reaction-gene association matrix
 Markus Herrgard 2/23/06

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function gpraModel = readSimPhenyGprText(file,model)
0002 %readSimPhenyGprText Parses SimPheny GPRA's in text format into a rxn x gene association
0003 %matrix
0004 %
0005 % gpraModel = readSimPhenyGPRText(file,model)
0006 %
0007 %INPUTS
0008 % file      GPR text file
0009 % model     COBRA model structure
0010 %
0011 %OUTPUT
0012 % gpraModel COBRA model structure with reaction-gene association matrix
0013 % Markus Herrgard 2/23/06
0014 
0015 % Read GPRA file
0016 [tmp,tmp,rxnNames,rxnDesrs,rxnEqns,Confs,subSystems,Regions,gpraStrings,proteinStrings,ECs] = ...
0017     textread(file,'%s %s %s %s %s %s %s %s %s %s %s','delimiter','\t','headerlines',1,'bufsize',200000);
0018 
0019 sel = ~strcmp(rxnNames,'');
0020 gpraStrings = gpraStrings(sel);
0021 rxnNames = rxnNames(sel);
0022 subSystems = subSystems(sel);
0023 
0024 nRxns = length(rxnNames);
0025 allGenes = {};
0026 % Parse GPRA's
0027 h = waitbar(0,'Reading GPRA text file ...');
0028 for i = 1:nRxns
0029     if mod(i,10) == 0
0030         waitbar(i/nRxns,h);
0031     end
0032     thisGpra = gpraStrings{i};
0033     thisGpra = regexprep(thisGpra,'+',' and ');
0034     thisGpra = regexprep(thisGpra,',',' or ');
0035     [thisGenes,rule] = parseBoolean(thisGpra);
0036     genes{i} = thisGenes;
0037     allGenes = [allGenes thisGenes];
0038     rules{i} = rule;
0039     grRules{i} = thisGpra;
0040 end
0041 if ( regexp( version, 'R20') )
0042         close(h);
0043 end
0044 
0045 allGenes = unique(allGenes);
0046 
0047 % Construct gene to rxn mapping
0048 rxnGeneMat = sparse(nRxns,length(allGenes));
0049 h = waitbar(0,'Constructing GPR mapping ...');
0050 for i = 1:nRxns
0051     if mod(i,10) == 0
0052         waitbar(i/nRxns,h);
0053     end
0054     [tmp,geneInd] = ismember(genes{i},allGenes);
0055     rxnGeneMat(i,geneInd) = 1;
0056     for j = 1:length(geneInd)
0057         rules{i} = strrep(rules{i},['x(' num2str(j) ')'],['x(' num2str(geneInd(j)) ')']);
0058     end
0059 end
0060 if ( regexp( version, 'R20') )
0061         close(h);
0062 end
0063 
0064 gpraModel.rxns = rxnNames;
0065 gpraModel.genes = columnVector(allGenes);
0066 gpraModel.rules = columnVector(rules);
0067 gpraModel.grRules = grRules;
0068 gpraModel.subSystems = subSystems;
0069 gpraModel.rxnGeneMat = rxnGeneMat;
0070     
0071 if (nargin > 1)
0072     [hasGpra,gpraMap] = ismember(model.rxns,gpraModel.rxns);
0073     model.genes = columnVector(gpraModel.genes);
0074     nRxns = length(model.rxns);
0075     nGenes = length(gpraModel.genes);
0076     model.rxnGeneMat = sparse(nRxns,nGenes);
0077     for i = 1:length(model.rxns)
0078        if (hasGpra(i))
0079           gpraID = gpraMap(i);
0080           model.rules{i} = gpraModel.rules{gpraID};
0081           model.rxnGeneMat(i,:) = gpraModel.rxnGeneMat(gpraID,:);
0082           model.subSystems{i} = gpraModel.subSystems{gpraID};
0083           model.grRules{i} = gpraModel.grRules{gpraID}; 
0084        else
0085           model.rules{i} = '';
0086           model.subSystems{i} = '';
0087           model.grRules{i} = '';
0088        end
0089     end
0090     model.rules = columnVector(model.rules);
0091     model.subSystems = columnVector(model.subSystems);
0092     model.grRules = columnVector(model.grRules);
0093     gpraModel = model;
0094 end
0095 
0096 %     geneString = geneStrings{i};
0097 %     geneList = {};
0098 %     if (~strcmp(geneString,''))
0099 %         if (~isempty(regexp(geneString,',')))
0100 %             if (~isempty(regexp(geneString,'+')))
0101 %                 % Both complexes and isozymes
0102 %                 geneStrTmp = splitString(geneString,',');
0103 %                 for j = 1:length(geneStrTmp)
0104 %                     if (~isempty(regexp(geneStrTmp{j},'+')))
0105 %                         geneListTmp = splitString(geneStrTmp{j},'+');
0106 %                         for k = 1:length(geneListTmp)
0107 %                             geneList{end+1} = geneListTmp{k};
0108 %                         end
0109 %                     else
0110 %                         geneList{end+1} = geneStrTmp{j};
0111 %                     end
0112 %                 end
0113 %             else
0114 %                 % Only isozymes
0115 %                 geneList = splitString(geneString,',');
0116 %             end
0117 %         elseif (~isempty(regexp(geneString,'+')))
0118 %             % Only complexes
0119 %             geneList = splitString(geneString,'+');
0120 %         else
0121 %             % Just a single gene
0122 %             geneList{1} = geneString;
0123 %         end
0124 %     end
0125 %     for j = 1:length(geneList)
0126 %         geneList{j} = regexprep(geneList{j},' ','');
0127 %     end
0128 %     geneLists{i} = unique(geneList);
0129 %     allGenes = union(allGenes,geneList);
0130 % end
0131 %
0132 % % Create rxn to gene association matrix
0133 % rxnGeneMat = sparse(length(rxnNames),length(allGenes));
0134 % for i = 1:length(rxnNames)
0135 %     [tmp,geneInd] = ismember(geneLists{i},allGenes);
0136 %     rxnGeneMat(i,geneInd) = 1;
0137 % end
0138 %
0139 % genes = allGenes';
0140

Generated on Thu 21-Jun-2012 15:39:23 by m2html © 2003