Home > BiKEGG > Bigg2KeggRestricted.m

Bigg2KeggRestricted

PURPOSE ^

Bigg2KeggRestricted

SYNOPSIS ^

function [BiGGID,AllRxn] = Bigg2KeggRestricted(D,Metkegg,rxnData,cpds,cpd2rxn)

DESCRIPTION ^

 Bigg2KeggRestricted 
 is a subfunction of Bigg2Kegg and employs restricted
 conditions for identifying reaction correspondences.
 Full documentation is available in the user manual.
 
 O. Jamialahmadi
 TMU, Chem. Eng. Dept., Biotech. Group 
 Oct. 2015

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [BiGGID,AllRxn] = Bigg2KeggRestricted(D,Metkegg,rxnData,cpds,cpd2rxn)
0002 % Bigg2KeggRestricted
0003 % is a subfunction of Bigg2Kegg and employs restricted
0004 % conditions for identifying reaction correspondences.
0005 % Full documentation is available in the user manual.
0006 %
0007 % O. Jamialahmadi
0008 % TMU, Chem. Eng. Dept., Biotech. Group
0009 % Oct. 2015
0010 
0011 load('UniModelKEGG.mat')
0012 UniB = B2Kegg.B; UniK = B2Kegg.K;
0013 Umulti = find(ismember(UniB,'MULTIR'));
0014 UniB = UniB(1:Umulti-1); UniK = UniK(1:Umulti-1);
0015 clear B2Kegg
0016 Fileid12 = fopen('PrunedRxns.txt','r');
0017 Prunedata12 = textscan(Fileid12,'%s %s %d %[^\n]');
0018 Testpruned = Prunedata12{1};
0019 fclose(Fileid12);
0020 RxnNames = D.rxns;
0021 Rxns = D.S;
0022 Keggrm = (0);
0023 BiGGID = {0};
0024 AllRxn = {0};
0025 Uctr = 1;
0026 for rn = 1:size(Rxns,2) % for all rxns
0027     fprintf('Rxn %d of %d\n',rn,size(Rxns,2))
0028     
0029     % ---------------------------------------------------------------------
0030     %if current reaction exists in pruned reactions, or UniModelKEGG there
0031     %is no need to further search for potential equivalent reactions.
0032     if any(ismember(Testpruned,RxnNames{rn}))
0033         continue
0034     end
0035     if any(ismember(UniB,RxnNames{rn}))
0036         UniBloci = find(ismember(UniB,RxnNames{rn}));
0037         for u1 = 1:numel(UniBloci)
0038             BiGGID{Uctr} = UniB{UniBloci(u1)};
0039             AllRxn{Uctr} = UniK{UniBloci(u1)};
0040             Uctr = Uctr + 1;
0041         end    
0042         continue
0043     end
0044     %----------------------------------------------------------------------
0045     Prd = find(Rxns(:,rn)); % Find substrates and products for each rxn
0046     CheckTransport = [Metkegg{Prd}]; % Exclude transport rxns
0047     if (numel(CheckTransport)/numel(unique(CheckTransport)) == 2)
0048         continue
0049     end
0050     % 1st condition: All reactants and products have metKeggIDs.
0051     % 2nd condition: Exclude all one element rxns
0052     if (~sum(strcmp (Metkegg(Prd),'')) && (length(Prd) > 1))
0053         Cbs = MetCombs(Metkegg,Prd); % All combinations (permutations) of
0054         % KEGG compounds, regarding the fact that there are more than one
0055         % KEGG cpd for some metabolites in the model.
0056         for Outctr = 1:size(Cbs,1)
0057             % Contains all KEGG cpds in a certain rxn
0058             TempMet = unique(Cbs(Outctr,:));
0059             % Ignore transport reactions with water molecule: c00001
0060             if (numel(TempMet) == 2) && sum(strcmp(TempMet,'C00001'))
0061                 continue
0062             end
0063             if length(TempMet) > 1 % Exclude all exchange rxns
0064                 KeggrmMethod = 'off'; %Offline
0065                 switch KeggrmMethod
0066                     case 'off'
0067                         Id1 = ismember(cpds,TempMet);
0068                         Keggrm = str2num(rxnData(Id1,5:end));
0069                     case 'on'
0070                         Keggrm = KeggApiMatch (TempMet);
0071                 end
0072                 Uq = unique(Keggrm);
0073                 N = histc(Keggrm, Uq);
0074                 if isempty(max(N))
0075                     continue
0076                 end
0077                 if (max(N) == numel(TempMet))
0078                     UqTemp = Uq(N==max(N));
0079                     for count = 1:numel(UqTemp)
0080                         UqTemp1{count} = strcat('rn:R',repmat('0',...
0081                             [1,4- floor(log10(UqTemp(count)+eps))])...
0082                             ,num2str(UqTemp(count)));
0083                     end
0084                     if numel(UqTemp) > 1
0085                         for count = 1:length(UqTemp1)
0086                             UqTempSum(count) = sum(ismember(cpd2rxn{2},...
0087                                 UqTemp1{count}));
0088                         end
0089                         if min(UqTempSum)==numel(TempMet)
0090                             [~,In1] = min(UqTempSum);
0091                             AllRxn{Uctr} = UqTemp1{In1}(4:end);
0092                             BiGGID {Uctr} = D.rxns{rn};
0093                             Uctr = Uctr + 1;
0094                         end
0095                     else
0096                         UqTempSum = sum(ismember(cpd2rxn{2},UqTemp1{1}));
0097                         if min(UqTempSum)==numel(TempMet)
0098                             AllRxn{Uctr} = UqTemp1{1}(4:end);
0099                             BiGGID {Uctr} = D.rxns{rn};
0100                             Uctr = Uctr + 1;
0101                         end
0102                         
0103                     end
0104                     UqTempSum = 0;
0105                     UqTemp1 = {0};
0106                     
0107                 end
0108             end
0109             Keggrm = 0;
0110         end
0111     end
0112 end
0113 % Identify BiGG rxns for which there is more than one KEGG rxns, and prune
0114 % BiGGIDs and AllRxns
0115 [BiGGID,AllRxn,MultiBID,MultiRxn] = RxnPrune(BiGGID, AllRxn, RxnNames);
0116 AllRxnTemp = {[]}; BiGGIDTemp = {[]}; Tctr = 1;
0117 if any ([MultiBID{:}])
0118     for ct = 1:numel(MultiBID)
0119         for ci = 1:numel(MultiRxn{ct})
0120             BiGGIDTemp{Tctr} = MultiBID{ct};
0121             AllRxnTemp{Tctr} = MultiRxn{ct}{ci};
0122             Tctr = Tctr + 1;
0123         end
0124     end
0125     BiGGID = [BiGGID,BiGGIDTemp];
0126     AllRxn = [AllRxn,AllRxnTemp];
0127 end
0128 
0129 
0130 
0131 %% Subfunctions
0132 function Cbs = MetCombs(Metkegg,Prd)
0133 SizeCell = cell(1,numel(Prd));
0134 for count = 1:numel(Prd)
0135     TempCounter = numel(Metkegg{Prd(count)});
0136     SizeCell{count} = 1:TempCounter;
0137 end
0138 MetkeggT = Metkegg(Prd);
0139 MetkeggTemp = [MetkeggT{:}];
0140 OutMet = combvec(SizeCell{:}).';
0141 for count = 1:size(OutMet,2)-1 % Corresponds to linearized Metkegg(Prd)
0142     OutMet(:,count+1)=OutMet(:,count+1)+max(OutMet(:,count));
0143 end
0144 CbsT = MetkeggTemp(OutMet);
0145 if any(strcmp(CbsT(:),'C00080'))
0146     Cbs1 = CbsT;
0147     [N1,N2] = find(strcmp(Cbs1,'C00080'));
0148     LenCbs = 1:numel(Cbs1);
0149     for count = 1:numel(N1)
0150         NonH2Cpd = find(~ismember(LenCbs,N2(count)));
0151         Cbs1{N1(count),N2(count)} = CbsT{N1(count),NonH2Cpd(1)};
0152     end
0153     Cbs = [CbsT;Cbs1];
0154 else 
0155     Cbs1 = CbsT;
0156     Cbs2 = Cbs1;
0157     for count = 1:size(CbsT,1)
0158         Cbs1{count,size(CbsT,2)+1} = 'C00080'; % Add H+
0159         % Keep dimensional consistency:
0160         Cbs2{count,size(CbsT,2)+1} = Cbs2{count,size(CbsT,2)};
0161     end
0162     Cbs = [Cbs1;Cbs2];
0163 end
0164 %--------------------------------------------------------------------------
0165 function [Bg,Ar,Bg2,Ar2] = RxnPrune(BiGGID, AllRxn, RxnNames)
0166 Fileid = fopen('PrunedRxns.txt','r');
0167 PrAll = textscan(Fileid,'%s %s %d %[^\n]');
0168 PrB = PrAll{3};
0169 PrBY = find(PrB); % To be added
0170 PrBN = ~PrB; % To be removed
0171 TBrxns = PrAll{1}; TKrxns = PrAll{2};
0172 BrxnY = TBrxns(PrBY); KrxnY = TKrxns(PrBY);
0173 BrxnN = TBrxns(PrBN);
0174 % Which BiGGID elements are in BrxnY?
0175 BiGGInBrxnY = BiGGID(ismember(BiGGID,BrxnY)); 
0176 KeggOfBrxnY = KrxnY(ismember(BrxnY,BiGGID));
0177 BrxnYTemp = BrxnY(ismember(BrxnY,BiGGID));
0178 SharedRxns = cell(1,numel(BiGGInBrxnY));
0179 for count = 1:numel(BrxnYTemp)
0180     T= ismember(BiGGInBrxnY,BrxnYTemp{count});
0181     SharedRxns(T) = KeggOfBrxnY(count);
0182 end
0183 AllRxn(ismember(BiGGID,BrxnY)) = SharedRxns;
0184 % Which BrxnY elements are not in BiGGID and should be added separately?
0185 NotInBiGG = BrxnY(~ismember(BrxnY,BiGGID));
0186 RxnNot = KrxnY(~ismember(BrxnY,BiGGID));
0187 % Are NotInBiGG elements exist in the current model (Note: BrxnY contains
0188 % universal BiGG reactions) ?
0189 YaLoci = find(ismember(NotInBiGG,RxnNames));
0190 Yf1 = NotInBiGG(YaLoci);
0191 AddedRxns = RxnNot(YaLoci);
0192 ModLength = (1:numel(Yf1))+numel(BiGGID);
0193 BiGGID(ModLength) = Yf1;
0194 AllRxn(ModLength) = AddedRxns;
0195 % Remove other found rxns in BiGGID which their absence is manually
0196 % confirmed.
0197 TobeRemovedRxns = ismember(BiGGID,BrxnN);
0198 BiGGID(TobeRemovedRxns)=[]; AllRxn(TobeRemovedRxns)=[]; % Remove other rxns
0199 % Find BiGG IDs with more than one rxn id
0200 ct=1;cr=1;
0201 Bg2={0};Ar2={0};Bg={0};Ar={0};
0202 BiGGT = BiGGID; RxnT = AllRxn;
0203 while numel(BiGGT)
0204     T = find(ismember(BiGGT,BiGGT(1))); R = RxnT(T);
0205     if numel(unique(R))~=1 % Reactions with more than one KEGG ID
0206         Bg2{ct} = BiGGT{1};
0207         Ar2{ct} = unique(R);      
0208         ct = ct+1;
0209     else
0210         Bg{cr}=BiGGT{1};
0211         Ar{cr}=cell2mat(unique(R));
0212         cr=cr+1;
0213     end
0214     BiGGT(T)=[]; %Remove
0215     RxnT(T)=[];
0216 end
0217 %--------------------------------------------------------------------------
0218 function Keggrm = KeggApiMatch(TempMet)
0219 ctr=1;
0220 Keggrm=(0);
0221 for mt = 1:length(TempMet)
0222     Metstr = urlread(['http://rest.kegg.jp/link/rn/',TempMet{mt}]);
0223     Id1 =  regexp(Metstr,'rn:R'); % All rxns containing a cpd
0224     if Id1 % KEGG rxn IDs are provided for this cpd
0225         for di = 1:length(Id1) % Extracts rxns
0226             Keggrm(ctr) = str2double(Metstr(Id1(di)+4:Id1(di)+9));
0227             ctr = ctr+1;
0228         end
0229     else
0230         break % Don't generate Metstr for other compounds
0231     end
0232 end
0233 % -------------------------------------------------------------------------
0234 function Metkegg = ModelMoidfy (Metkegg,MetAbr)
0235 % Read Metkegg data for RECON1.xml
0236 Mdat = load('RECON1Metkegg.mat');
0237 RECON1k = Mdat.RECON1meta.K;
0238 RECON1m = Mdat.RECON1meta.B;
0239 [Fx1,Fx2] = ismember(RECON1m,MetAbr);
0240 Fx2(Fx2==0) = [];
0241 % Metkegg(Fx2) = RECON1k(Fx1);
0242 Fx3 = find(Fx1);
0243 for i1 = 1:numel(Fx2)
0244     if numel(Metkegg{Fx2(i1)}) > numel(RECON1k{Fx3(i1)})
0245        Tempmet = Metkegg{Fx2(i1)};
0246        whrc = ismember(Tempmet,RECON1k{Fx3(i1)});
0247        if isempty(whrc)
0248         Metkegg{Fx2(i1)} = RECON1k(Fx3(i1));
0249         end
0250     else
0251         Metkegg{Fx2(i1)} = RECON1k{Fx3(i1)};
0252     end
0253 end
0254 % -------------------------------------------------------------------------
0255 function [BiGGID,AllRxn] = AddLumpRxns(Steprxns,Insteprxns,BiGGID1,AllRxn1)
0256 % ExcRxns contains rxns which need to be removed manually from Steprxns
0257 ExcRxns = {'R03082','R01651','R01210'};
0258 FindLocs = find(ismember(Steprxns,ExcRxns));
0259 Steprxns(ismember(Steprxns,ExcRxns)) = [];
0260 Insteprxns(FindLocs)=[];
0261 PrevLen = numel(BiGGID1)+1;
0262 BiGGID = BiGGID1; AllRxn = AllRxn1;
0263 BiGGID{PrevLen} = 'MULTIR'; AllRxn{PrevLen} = 'MULTIR';
0264 CurrLen = numel(BiGGID1)+2;
0265 for count = 1:numel(Steprxns)
0266     if ~isempty(Insteprxns{count})
0267         if ~any(ismember(Insteprxns{count},AllRxn1)) %No common rxns
0268             BiggTemp = BiGGID1(ismember(AllRxn1,Steprxns{count}));
0269             for count1 = 1:numel(BiggTemp)
0270                 for count2 = 1:numel(Insteprxns{count})
0271                     BiGGID{CurrLen} = BiggTemp{count1};
0272                     AllRxn{CurrLen} = Insteprxns{count}{count2};
0273                     CurrLen = CurrLen+1;
0274                 end
0275             end
0276         else
0277             TempLoc = find(~ismember(Insteprxns{count},AllRxn1));
0278             BiggTemp = BiGGID1(ismember(AllRxn1,Steprxns{count}));
0279             for count1 = 1:numel(BiggTemp)
0280                 for count2 = 1:numel(TempLoc)
0281                     BiGGID{CurrLen} = BiggTemp{count1};
0282                     AllRxn{CurrLen} = Insteprxns{count}{TempLoc(count2)};
0283                     CurrLen = CurrLen+1;
0284                 end
0285             end
0286         end
0287     end
0288 end

Generated on Sat 16-Jul-2016 20:21:30 by m2html © 2005