0001 function [tissueModel,Rxns] = createTissueSpecificModel(model, ...
0002 expressionData,proceedExp,orphan,exRxnRemove,solver,options,funcModel)
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028
0029
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065 if iscell(expressionData.Locus(1))
0066 match_strings = true;
0067 else
0068 match_strings = false;
0069 end
0070
0071 if ~exist('proceedExp','var') || isempty(proceedExp)
0072 proceedExp = 1;
0073 end
0074
0075 if ~exist('solver','var') || isempty(solver)
0076 solver = 'GIMME';
0077 end
0078
0079 if ~exist('exRxnRemove','var') || isempty(exRxnRemove)
0080 exRxnRemove = [];
0081 end
0082
0083 if ~exist('orphan','var') || isempty(orphan)
0084 orphan = 1;
0085 end
0086
0087 if ~exist('funcModel','var') || isempty(funcModel)
0088 funcModel = 0;
0089 end
0090
0091
0092
0093 [parsedGPR,corrRxn] = extractGPRs(model);
0094
0095 if proceedExp == 0
0096
0097 [Results,Transcripts] = charExpData(expressionData);
0098
0099 x = ismember(Transcripts.Locus,[Transcripts.Expressed;Transcripts.UnExpressed]);
0100 unkLocus = find(x==0);
0101
0102 AltSplice.Locus = Transcripts.Locus(unkLocus);
0103 AltSplice.Transcripts = Transcripts.Data(unkLocus);
0104 AltSplice.Expression = Transcripts.Expression(unkLocus);
0105
0106 fprintf('There are some probes that match up with different transcripts and expression patterns\n');
0107 fprintf('Please elucidate these discrepancies below\n');
0108 fprintf('To do so, look up the transcript in RefSeq and enter the proper locii below\n');
0109
0110 cnt1 = 1;
0111 cnt2 = 1;
0112 locusNE =[];
0113 locusE = [];
0114 for i = 1:length(AltSplice.Locus)
0115 if length(AltSplice.Transcripts{i}) < 1
0116 elseif AltSplice.Expression(i) == 0
0117 fprintf('Probe: %i, Transcript: %s, Expression: %i\n',AltSplice.Locus(i),AltSplice.Transcripts{i},AltSplice.Expression(i));
0118
0119 locusNE(cnt1,1) = AltSplice.Locus(i);
0120 cnt1=cnt1+1;
0121 elseif AltSplice.Expression(i) == 1
0122 fprintf('Probe: %i, Transcript: %s, Expression: %i\n',AltSplice.Locus(i),AltSplice.Transcripts{i},AltSplice.Expression(i));
0123
0124 locusE(cnt2,1) = AltSplice.Locus(i);
0125 cnt2=cnt2+1;
0126 end
0127 end
0128
0129 locusP = [Results.Expressed;Transcripts.Expressed;locusE];
0130 locusNP = [Results.UnExpressed;Transcripts.UnExpressed;locusNE];
0131
0132 genePresenceP = ones(length(locusP),1);
0133 genePresenceNP = zeros(length(locusNP),1);
0134
0135 locus = [locusP;locusNP];
0136 genePresence = [genePresenceP;genePresenceNP];
0137 else
0138 locus = expressionData.Locus;
0139 genePresence = zeros(length(locus),1);
0140 genePresence(find(expressionData.Data(:,1))) = 1;
0141 end
0142
0143
0144 [ExpressedRxns,UnExpressedRxns,unknown] = mapProbes(parsedGPR,corrRxn,locus,genePresence,match_strings);
0145
0146
0147
0148 if ~isempty(exRxnRemove)
0149 model = removeRxns(model,exRxnRemove);
0150 end
0151
0152 nRxns = length(model.lb);
0153
0154
0155 RHindex = findRxnIDs(model,ExpressedRxns);
0156 RLindex = findRxnIDs(model,UnExpressedRxns);
0157 if (strcmp(solver, 'iMAT'))
0158 solver = 'Shlomi';
0159 end
0160 switch solver
0161 case 'Shlomi'
0162
0163 S = model.S;
0164 lb = model.lb;
0165 ub = model.ub;
0166 eps = 1;
0167
0168
0169 A = sparse(size(S,1)+2*length(RHindex)+2*length(RLindex),size(S,2)+2*length(RHindex)+length(RLindex));
0170 [nConstr,nVar] = size(S);
0171 [m,n,s] = find(S);
0172 for i = 1:length(m)
0173 A(m(i),n(i)) = s(i);
0174 end
0175
0176 for i = 1:length(RHindex)
0177 A(i+size(S,1),RHindex(i)) = 1;
0178 A(i+size(S,1),i+size(S,2)) = lb(RHindex(i)) - eps;
0179 A(i+size(S,1)+length(RHindex),RHindex(i)) = 1;
0180 A(i+size(S,1)+length(RHindex),i+size(S,2)+length(RHindex)+length(RLindex)) = ub(RHindex(i)) + eps;
0181 end
0182
0183 for i = 1:length(RLindex)
0184 A(i+size(S,1)+2*length(RHindex),RLindex(i)) = 1;
0185 A(i+size(S,1)+2*length(RHindex),i+size(S,2)+length(RHindex)) = lb(RLindex(i));
0186 A(i+size(S,1)+2*length(RHindex)+length(RLindex),RLindex(i)) = 1;
0187 A(i+size(S,1)+2*length(RHindex)+length(RLindex),i+size(S,2)+length(RHindex)) = ub(RLindex(i));
0188 end
0189
0190
0191 csense1(1:size(S,1)) = 'E';
0192 csense2(1:length(RHindex)) = 'G';
0193 csense3(1:length(RHindex)) = 'L';
0194 csense4(1:length(RLindex)) = 'G';
0195 csense5(1:length(RLindex)) = 'L';
0196 csense = [csense1 csense2 csense3 csense4 csense5];
0197
0198
0199 lb_y = zeros(2*length(RHindex)+length(RLindex),1);
0200 ub_y = ones(2*length(RHindex)+length(RLindex),1);
0201 lb = [lb;lb_y];
0202 ub = [ub;ub_y];
0203
0204
0205 c_v = zeros(size(S,2),1);
0206 c_y = ones(2*length(RHindex)+length(RLindex),1);
0207 c = [c_v;c_y];
0208
0209
0210 b_s = zeros(size(S,1),1);
0211 lb_rh = lb(RHindex);
0212 ub_rh = ub(RHindex);
0213 lb_rl = lb(RLindex);
0214 ub_rl = ub(RLindex);
0215 b = [b_s;lb_rh;ub_rh;lb_rl;ub_rl];
0216
0217
0218 vartype1(1:size(S,2),1) = 'C';
0219 vartype2(1:2*length(RHindex)+length(RLindex),1) = 'B';
0220 vartype = [vartype1;vartype2];
0221 n_int = length(vartype2);
0222
0223 MILPproblem.A = A;
0224 MILPproblem.b = b;
0225 MILPproblem.c = c;
0226 MILPproblem.lb = lb;
0227 MILPproblem.ub = ub;
0228 MILPproblem.csense = csense;
0229 MILPproblem.vartype = vartype;
0230 MILPproblem.osense = -1;
0231 MILPproblem.x0 = [];
0232
0233 verboseFlag = true;
0234
0235 solution = solveCobraMILP(MILPproblem);
0236
0237 Rxns.solution = solution;
0238
0239 x = solution.cont;
0240 for i = 1:length(x)
0241 if abs(x(i)) < 1e-6
0242 x(i,1) = 0;
0243 end
0244 end
0245
0246 removed = find(x==0);
0247
0248 if orphan == 1
0249 orphans = findorphanRxns(model);
0250 removed(find(ismember(model.rxns(removed),orphans)))=[];
0251 end
0252 rxnRemList = model.rxns(removed);
0253 tissueModel = removeRxns(model,rxnRemList);
0254
0255 Rxns.Expressed = ExpressedRxns;
0256 Rxns.UnExpressed = UnExpressedRxns;
0257 Rxns.unknown = unknown;
0258
0259 x = ismember(UnExpressedRxns,tissueModel.rxns);
0260 loc = find(x);
0261 Rxns.UpRegulated = UnExpressedRxns(loc);
0262
0263 x = ismember(ExpressedRxns,tissueModel.rxns);
0264 loc = find(x==0);
0265 Rxns.DownRegulated = ExpressedRxns(loc);
0266
0267 x = ismember(model.rxns,[ExpressedRxns;UnExpressedRxns]);
0268 loc = find(x==0);
0269 x = ismember(tissueModel.rxns,model.rxns(loc));
0270 loc = find(x);
0271 Rxns.UnknownIncluded = tissueModel.rxns(loc);
0272
0273 case 'GIMME'
0274 x = ismember(model.rxns,[ExpressedRxns;UnExpressedRxns]);
0275 unk = find(x==0);
0276
0277 expressionCol = zeros(length(model.rxns),1);
0278 for i = 1:length(unk)
0279 expressionCol(unk(i)) = -1;
0280 end
0281
0282 for i = 1:length(RHindex)
0283 expressionCol(RHindex(i)) = 2;
0284 end
0285 if ~exist('options','var') || isempty(options)
0286 loc = find(model.c);
0287
0288 sol = optimizeCbModel(model);
0289
0290 options = [loc 0.9];
0291 end
0292 cutoff = 1;
0293 [reactionActivity,reactionActivityIrrev,model2gimme,gimmeSolution] = solveGimme(model,options,expressionCol,cutoff);
0294
0295 remove = model.rxns(find(reactionActivity == 0));
0296 tissueModel = removeRxns(model,remove);
0297
0298 if funcModel ==1
0299 c = tissueModel.c;
0300
0301 remove = [];
0302 tissueModel.c = zeros(length(tissueModel.c),1);
0303 for i = 1:length(tissueModel.rxns)
0304 tissueModel.c(i) = 1;
0305 sol1 = optimizeCbModel(tissueModel,'max');
0306 sol2 = optimizeCbModel(tissueModel,'min');
0307 if sol1.f == 0 & sol2.f == 0
0308 remove = [remove tissueModel.rxns(i)];
0309 end
0310 tissueModel.c(i) = 0;
0311 end
0312
0313 tissueModel.c = c;
0314 tissueModel = removeRxns(tissueModel,remove);
0315 end
0316
0317 Rxns.Expressed = ExpressedRxns;
0318 Rxns.UnExpressed = UnExpressedRxns;
0319 Rxns.unknown = unknown;
0320
0321 x = ismember(UnExpressedRxns,tissueModel.rxns);
0322 loc = find(x);
0323 Rxns.UpRegulated = UnExpressedRxns(loc);
0324
0325 x = ismember(ExpressedRxns,tissueModel.rxns);
0326 loc = find(x==0);
0327 Rxns.DownRegulated = ExpressedRxns(loc);
0328
0329 x = ismember(model.rxns,[ExpressedRxns;UnExpressedRxns]);
0330 loc = find(x==0);
0331 x = ismember(tissueModel.rxns,model.rxns(loc));
0332 loc = find(x);
0333 Rxns.UnknownIncluded = tissueModel.rxns(loc);
0334
0335 end
0336
0337
0338 function [reactionActivity,reactionActivityIrrev,model2gimme,gimmeSolution] = solveGimme(model,objectiveCol,expressionCol,cutoff)
0339
0340 nRxns = size(model.S,2);
0341
0342
0343 [modelIrrev,matchRev,rev2irrev,irrev2rev] = convertToIrreversible(model);
0344
0345 nExpressionCol = size(expressionCol,1);
0346 if (nExpressionCol < nRxns)
0347 display('Warning: Fewer expression data inputs than reactions');
0348 expressionCol(nExpressionCol+1:nRxns,:) = zeros(nRxns-nExpressionCol, size(expressionCol,2));
0349 end
0350
0351 nIrrevRxns = size(irrev2rev,1);
0352 expressionColIrrev = zeros(nIrrevRxns,1);
0353 for i=1:nIrrevRxns
0354
0355 expressionColIrrev(i,1) = expressionCol(irrev2rev(i,1),1);
0356 end
0357
0358 nObjectives = size(objectiveCol,1);
0359 for i=1:nObjectives
0360 objectiveColIrrev(i,:) = [rev2irrev{objectiveCol(i,1),1}(1,1) objectiveCol(i,2)];
0361 end
0362
0363
0364 for i=1:size(objectiveCol)
0365
0366 modelIrrev.c=zeros(nIrrevRxns,1);
0367 modelIrrev.c(objectiveColIrrev(i,1),1)=1;
0368
0369
0370 FBAsolution = optimizeCbModel(modelIrrev);
0371 if (FBAsolution.stat ~= 1)
0372 not_solved=1;
0373 display('Failed to solve initial FBA problem');
0374 return
0375 end
0376 maxObjective(i)=FBAsolution.f;
0377 end
0378
0379 model2gimme = modelIrrev;
0380 model2gimme.c = zeros(nIrrevRxns,1);
0381
0382
0383 for i=1:nIrrevRxns
0384 if (expressionColIrrev(i,1) > -1)
0385 if (expressionColIrrev(i,1) < cutoff)
0386 model2gimme.c(i,1) = cutoff-expressionColIrrev(i,1);
0387 end
0388 end
0389 end
0390
0391 for i=1:size(objectiveColIrrev,1)
0392 model2gimme.lb(objectiveColIrrev(i,1),1) = objectiveColIrrev(i,2) * maxObjective(i);
0393 end
0394
0395 gimmeSolution = optimizeCbModel(model2gimme,'min');
0396
0397 if (gimmeSolution.stat ~= 1)
0398
0399
0400
0401 gimmeSolution.x = zeros(nIrrevRxns,1);
0402 end
0403
0404 reactionActivityIrrev = zeros(nIrrevRxns,1);
0405 for i=1:nIrrevRxns
0406 if ((expressionColIrrev(i,1) > cutoff) | (expressionColIrrev(i,1) == -1))
0407 reactionActivityIrrev(i,1)=1;
0408 elseif (gimmeSolution.x(i,1) > 0)
0409 reactionActivityIrrev(i,1)=2;
0410 end
0411 end
0412
0413
0414 reactionActivity = zeros(nRxns,1);
0415 for i=1:nRxns
0416 for j=1:size(rev2irrev{i,1},2)
0417 if (reactionActivityIrrev(rev2irrev{i,1}(1,j)) > reactionActivity(i,1))
0418 reactionActivity(i,1) = reactionActivityIrrev(rev2irrev{i,1}(1,j));
0419 end
0420 end
0421 end
0422
0423 function [rxnExpressed,unExpressed,unknown] = mapProbes(parsedGPR,corrRxn,locus,genePresence,match_strings)
0424 if ~exist('match_strings', 'var') || isempty(match_strings)
0425 match_strings = false;
0426 end
0427
0428 rxnExpressed = [];
0429 unExpressed = [];
0430 unknown = [];
0431 for i = 1:size(parsedGPR,1)
0432 cnt = 0;
0433 for j = 1:size(parsedGPR,2)
0434 if length(parsedGPR{i,j}) == 0
0435 break
0436 end
0437 cnt = cnt+1;
0438 end
0439
0440 test = 0;
0441 for j = 1:cnt
0442 if match_strings
0443 loc = parsedGPR{i,j};
0444 x = strmatch(loc, locus, 'exact');
0445 else
0446 loc = str2num(parsedGPR{i,j});
0447 loc = floor(loc);
0448 x = find(locus == loc);
0449 end
0450
0451 if length(x) > 0 & genePresence(x) == 0
0452 unExpressed = [unExpressed;corrRxn(i)];
0453 test = 1;
0454 break
0455 elseif length(x) == 0
0456 test = 2;
0457 end
0458 end
0459
0460 if test == 0
0461 rxnExpressed = [rxnExpressed;corrRxn(i)];
0462 elseif test == 2
0463 unknown = [unknown;corrRxn(i)];
0464 end
0465 end
0466
0467 rxnExpressed = unique(rxnExpressed);
0468 unExpressed = unique(unExpressed);
0469 unknown = unique(unknown);
0470
0471 unknown = setdiff(unknown,rxnExpressed);
0472 unknown = setdiff(unknown,unExpressed);
0473 unExpressed = setdiff(unExpressed,rxnExpressed);
0474
0475 function [parsedGPR,corrRxn] = extractGPRs(model)
0476
0477 warning off all
0478
0479 parsedGPR = [];
0480 corrRxn = [];
0481 cnt = 1;
0482
0483 for i = 1:length(model.rxns)
0484 if length(model.grRules{i}) > 1
0485
0486 [parsing{1,1},parsing{2,1}] = strtok(model.grRules{i},'or');
0487 for j = 2:1000
0488 [parsing{j,1},parsing{j+1,1}] = strtok(parsing{j,1},'or');
0489 if isempty(parsing{j+1,1})==1
0490 break
0491 end
0492 end
0493
0494 for j = 1:length(parsing)
0495 for k = 1:1000
0496 [parsing{j,k},parsing{j,k+1}] = strtok(parsing{j,k},'and');
0497 if isempty(parsing{j,k+1})==1
0498 break
0499 end
0500 end
0501 end
0502
0503 for j = 1:size(parsing,1)
0504 for k = 1:size(parsing,2)
0505 parsing{j,k} = strrep(parsing{j,k},'(','');
0506 parsing{j,k} = strrep(parsing{j,k},')','');
0507 parsing{j,k} = strrep(parsing{j,k},' ','');
0508 end
0509 end
0510
0511 for j = 1:size(parsing,1)-1
0512 newparsing(j,:) = parsing(j,1:length(parsing(j,:))-1);
0513 end
0514
0515 parsing = newparsing;
0516
0517
0518 for j = 1:size(parsing,1)
0519 for k = 1:size(parsing,2)
0520 if length(parsing{j,k}) == 0
0521 parsing{j,k} = '';
0522 end
0523 end
0524 end
0525
0526
0527 num = size(parsing,1);
0528 for j = 1:num
0529 sizeP = length(parsing(j,:));
0530 if sizeP > size(parsedGPR,2)
0531 for k = 1:size(parsedGPR,1)
0532 parsedGPR{k,sizeP} = {''};
0533 end
0534 end
0535
0536 for l = 1:sizeP
0537 parsedGPR{cnt,l} = parsing(j,l);
0538 end
0539 cnt = cnt+1;
0540 end
0541
0542 for j = 1:num
0543 corrRxn = [corrRxn;model.rxns(i)];
0544 end
0545
0546 clear parsing newparsing
0547
0548 end
0549
0550 end
0551
0552 for i = 1:size(parsedGPR,1)
0553 for j = 1:size(parsedGPR,2)
0554 if isempty(parsedGPR{i,j}) == 1
0555 parsedGPR{i,j} = {''};
0556 end
0557 end
0558 end
0559
0560 i =1 ;
0561 sizeP = size(parsedGPR,1);
0562 while i < sizeP
0563 if strcmp(parsedGPR{i,1},{''}) == 1
0564 parsedGPR = [parsedGPR(1:i-1,:);parsedGPR(i+1:end,:)];
0565 corrRxn = [corrRxn(1:i-1,:);corrRxn(i+1:end,:)];
0566 sizeP = sizeP-1;
0567 i=i-1;
0568 end
0569 i = i+1;
0570 end
0571
0572 for i = 1:size(parsedGPR,1)
0573 for j= 1:size(parsedGPR,2)
0574 parsedGPR2(i,j) = cellstr(parsedGPR{i,j});
0575 end
0576 end
0577
0578 parsedGPR = parsedGPR2;
0579
0580 function [Results,Transcripts] = charExpData(ExpressionData)
0581
0582 n = length(ExpressionData.Locus);
0583 Locus = ExpressionData.Locus;
0584
0585 ExpThreshold = floor(0.75*size(ExpressionData.Data,2))/size(ExpressionData.Data,2);
0586 UnExpThreshold = ceil(0.25*size(ExpressionData.Data,2))/size(ExpressionData.Data,2);
0587 Results.Expressed = [];
0588 Results.UnExpressed = [];
0589 Results.AltSplice = [];
0590 Results.Total = 0;
0591 for i = 1:n
0592 if ExpressionData.Locus(i) > 0
0593
0594 locus = ExpressionData.Locus(i);
0595 loc = find(ExpressionData.Locus == locus);
0596 cap = length(loc)*size(ExpressionData.Data,2)*2;
0597
0598 for j = 1:length(loc)
0599 total(j) = sum(ExpressionData.Data(loc(j),:));
0600 ExpressionData.Locus(loc(j)) = 0;
0601 end
0602
0603 transcripts = {};
0604 for j = 1:length(loc)
0605 if length(ExpressionData.Transcript{loc(j)}) > 0
0606 transcripts = [transcripts;ExpressionData.Transcript{loc(j)}];
0607 end
0608 end
0609
0610 if length(unique(transcripts)) <= 1
0611
0612
0613 if sum(total)/cap >= ExpThreshold
0614 Results.Expressed = [Results.Expressed;locus];
0615
0616 elseif sum(total)/cap <= UnExpThreshold
0617 Results.UnExpressed = [Results.UnExpressed;locus];
0618
0619
0620 else
0621 cntP = 0;
0622 for j = 1:length(total)
0623
0624
0625 if total(j) >= floor(0.75*size(ExpressionData.Data,2))*2;
0626 cntP = cntP+1;
0627 end
0628 end
0629
0630
0631
0632 if cntP/length(total) >= 0.5
0633 Results.Expressed = [Results.Expressed;locus];
0634 else
0635 Results.UnExpressed = [Results.UnExpressed;locus];
0636 end
0637 end
0638 else
0639
0640
0641
0642 Results.AltSplice = [Results.AltSplice;locus];
0643 end
0644 Results.Total = Results.Total+1;
0645 clear total
0646 end
0647 end
0648
0649
0650 Transcripts.Locus = [];
0651 Transcripts.Data = {};
0652 for i = 1:length(Results.AltSplice)
0653 num = find(Locus==Results.AltSplice(i));
0654 transcripts = unique(ExpressionData.Transcript(num));
0655 for j = 1:length(transcripts)
0656 Transcripts.Locus = [Transcripts.Locus;Results.AltSplice(i)];
0657 end
0658 Transcripts.Data = [Transcripts.Data;transcripts];
0659 end
0660
0661
0662
0663 for i = 1:length(Transcripts.Data)
0664 loc = strmatch(Transcripts.Data{i},ExpressionData.Transcript,'exact');
0665 for j = 1:length(loc)
0666 total(j) = sum(ExpressionData.Data(loc(j),:));
0667 ExpressionData.Locus(loc(j)) = 0;
0668 end
0669 cap = length(loc)*11*2;
0670 if sum(total)/cap >= ExpThreshold
0671 Transcripts.Expression(i,1) = 1;
0672 else
0673 Transcripts.Expression(i,1) = 0;
0674 end
0675 end
0676
0677
0678
0679 Transcripts.Expressed = [];
0680 Transcripts.UnExpressed = [];
0681 mem_tran = Transcripts.Locus;
0682 for i = 1:length(Transcripts.Locus)
0683 if Transcripts.Locus(i) > 0
0684 locus = Transcripts.Locus(i);
0685 loc = find(Transcripts.Locus==locus);
0686 sumExp = 0;
0687 for j = 1:length(loc)
0688 sumExp = sumExp+Transcripts.Expression(loc(j));
0689 Transcripts.Locus(loc(j)) = 0;
0690 end
0691 if sumExp == 0
0692 Transcripts.UnExpressed = [Transcripts.UnExpressed;locus];
0693 elseif sumExp == length(loc)
0694 Transcripts.Expressed = [Transcripts.Expressed;locus];
0695 end
0696 end
0697 end
0698 Transcripts.Locus = mem_tran;