Home > BiKEGG > Bigg2Kegg.m

Bigg2Kegg

PURPOSE ^

BiKEGG

SYNOPSIS ^

function Bigg2Kegg(Idfier)

DESCRIPTION ^

 BiKEGG
 generates corresponding reactions in KEGG and BiGG/HMR
 databases and saves the output as a *.mat file in BiGG2KEGG folder.
 Input:
     - Idfier : a string of either 'bigg' or 'hmr'
 Full documentation can be found in the user manual
 
 O. Jamialahmadi
 TMU, Chem. Eng. Dept., Biotech. Group 
 Oct. 2015

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function Bigg2Kegg(Idfier)
0002 % BiKEGG
0003 % generates corresponding reactions in KEGG and BiGG/HMR
0004 % databases and saves the output as a *.mat file in BiGG2KEGG folder.
0005 % Input:
0006 %     - Idfier : a string of either 'bigg' or 'hmr'
0007 % Full documentation can be found in the user manual
0008 %
0009 % O. Jamialahmadi
0010 % TMU, Chem. Eng. Dept., Biotech. Group
0011 % Oct. 2015
0012 
0013 Pth1 = which ('Bigg2Kegg.m');
0014 tind = find(Pth1=='\',1,'last');
0015 Pth = Pth1(1:tind-1);
0016 if strcmp(Idfier,'hmr')
0017     [pname,fname]=uigetfile('*.xlsx','Select the HMRdatabase xlsx file');
0018     [~,dat2]=xlsread([fname,pname],'RXNS');
0019     ind=dat2(:,1);
0020     ind1=ismember(ind,'#');
0021     ind2=find(ind1,3,'first');
0022     hdrs=dat2(1,:);
0023     % Reading the BIGG IDs:
0024     BIGG=dat2(2:ind2(3)-1,ismember(hdrs,'BIGG DATABASE ID')); 
0025     % Reading the KEGG rxn IDs:
0026     KEGG=dat2(2:ind2(3)-1,ismember(hdrs,'KEGG ID'));
0027     % Reading the rxn equations:
0028     Eqn=dat2(2:ind2(3)-1,ismember(hdrs,'EQUATION'));
0029     % Locating empty cells
0030     r1= ~ismember(KEGG,'');
0031     r2= ~ismember(BIGG,'');
0032     r3 = find(r1 & r2);
0033     KEGG=KEGG(r3);BIGG=BIGG(r3);Eqn=Eqn(r3);
0034     B2Kegg.Kegg=KEGG;B2Kegg.Bigg=BIGG;B2Kegg.Eqn=Eqn;
0035     fname = fullfile(Pth,'BiGG2KEGG','BiGG2KEGG_HMRbased-RECON1.mat');
0036     save (fname,'B2Kegg')
0037     return
0038 end
0039 
0040 if ~strcmp(Idfier,'bigg')
0041     msgbox('Unknown identifier: bigg/hmr','Error','error');
0042     return
0043 end
0044 % Read text file extracted from http://rest.kegg.jp/link/rn/cpd, which
0045 % contains corresponding rxns for each metabolite.
0046 Fileid = fopen(fullfile(Pth,'cpd2rxn.txt'),'r');
0047 cpd2rxn = textscan(Fileid,'%s %[^\n]');
0048 rxnData = cell2mat(cpd2rxn{2}); % KEGG rxn IDs
0049 cpds = strrep(cpd2rxn{1},'cpd:',''); % KEGG Cpds
0050 fclose(Fileid);
0051 % Read the model file======================================================
0052 [pname,fname1]=uigetfile({'*.*'},'Select the input model file (mat|xml)');
0053 Testname=pname(strfind(pname,'.')+1:end);
0054 if strcmp(Testname,'mat')
0055     D = importdata([fname1,pname]);
0056     keggloc1 = fieldnames(D);
0057     keggid = strcmpi(keggloc1,'metkeggid');
0058     if ~any(keggid)
0059         msgbox('There is no metKeggID field in the model!','Error','error');
0060         return
0061     end
0062     Metkegg = D.(keggloc1{keggid});
0063 %     NOTE: following commented lines can be activated to retrieve the
0064 %     Metkegg online form BiGG database
0065 %     Mets = D.mets;
0066 %     for counter = 1:length(Mets)
0067 %         BiggMet = urlread(['http://bigg.ucsd.edu/api/v2/models/',...
0068 %             D.description,'/metabolites/',Mets{counter}]);
0069 %         KeggLoci = strfind(BiggMet,'KEGG');
0070 %         if ~isempty(KeggLoci)
0071 %             Metkegg{counter} = BiggMet(KeggLoci+62:KeggLoci+67);
0072 %         else
0073 %             Metkegg{counter} = '';
0074 %         end
0075 %     end
0076 elseif strcmp(Testname,'xml')
0077     Idf = fopen([fname1,pname]);
0078     D = textscan(Idf,'%s');
0079     CbModel = readCbModel([fname1,pname]);
0080 else
0081     msgbox('Unknown filetype!','Error','error');
0082     return
0083 end
0084 % =========================================================================
0085 if strcmp(Testname,'xml')
0086     % Reading xml
0087     % Find listOfSpecies which contains metabolite annotations
0088     SpeciesLoci1 = strfind(D{1},'listOfSpecies');
0089     SpeciesLoci = find(~cellfun('isempty', SpeciesLoci1));
0090     % Discard other data in the model:
0091     SpeciesData = D{1}(SpeciesLoci(1):SpeciesLoci(2));
0092     clear D
0093     % Find each metabolite-------------------------------------------------
0094     SpeciesStart1 = strfind(SpeciesData,'<species');
0095     SpeciesStart = find(~cellfun('isempty', SpeciesStart1));
0096     SpeciesEnd1 = strfind(SpeciesData,'</species>');
0097     SpeciesEnd = find(~cellfun('isempty', SpeciesEnd1));
0098     if numel(SpeciesStart) ~= numel(SpeciesEnd)
0099         msgbox('SBML format error:Species have wrong format!','Error',...
0100             'error');
0101         return
0102     end
0103     % ---------------------------------------------------------------------
0104     % Search for KEGG compounds for each BiGG metabolite. Note that each BiGG
0105     % metabolite may have more than one equivalent KEGG compund, and although
0106     % HMR and RECON2 (Thiele lab.) do not provide more than one KEGG compound
0107     % for each metabolite, here, we followed the format of BiGG models (i.e.
0108     % several equivalent KEGG compounds for a specific metabolite).
0109     Metkegg = {''}; % KEGG compounds will fill this cell!
0110     for count = 1:numel(SpeciesStart)
0111         SpeciesTempStr = SpeciesData(SpeciesStart(count):SpeciesEnd(count));
0112         SpeciesTempStr = [SpeciesTempStr{:}];
0113         KeggCpdLoci = regexp(SpeciesTempStr,...
0114             '(?<=http://identifiers.org/kegg.compound/)[CG]\d{5}','match');
0115         if isempty(KeggCpdLoci)
0116             Metkegg{count} = '';
0117             continue
0118         end
0119         for count1 = 1:numel(KeggCpdLoci)
0120             if strcmp(KeggCpdLoci{count1},'C00001') % H2O
0121                 Metkegg{count}= {'C00001'};
0122                 break
0123             elseif strcmp(KeggCpdLoci{count1},'C00027') % H2O2
0124                 Metkegg{count}= {'C00027'};
0125                 break
0126             else
0127                 Metkegg{count}{count1}= KeggCpdLoci{count1};
0128             end
0129         end
0130     end
0131 end
0132 % Model modification: some metabolites have wrong/absent KEGG annotations.
0133 % Therefore, on the basis of modified KEGG compound IDs for RECON1.xml,
0134 % these metabolites will be added to Metkegg separately.
0135 D = CbModel;
0136 ModelName = D.description;
0137 tind1 = find(ModelName=='\',1,'last');
0138 ModelName1 = ModelName(tind1+1:end-4);
0139 if ~strcmp(ModelName1,'RECON1')
0140     MetAbr = CbModel.mets;
0141     Metkegg = ModelMoidfy (Metkegg,MetAbr);
0142 end
0143 
0144 % Search for Metkegg components with numel > 1, compare their names to
0145 % BiGG metabolites, and remove other components with different names if a
0146 % unique result is obtained. KEGGcpd.txt is used which contains all KEGG
0147 % compounds.
0148 Fileid1 = fopen(fullfile(Pth,'KEGGcpd.txt'),'r');
0149 Keggcpd = textscan(Fileid1,'%s %[^\n]');
0150 CpdData = strrep(Keggcpd{1},'cpd:','');
0151 CpdNames = regexp(Keggcpd{2},';','split');
0152 BiGGmets = D.metNames;
0153 for count = 1:numel(Metkegg)
0154     if all(ismember({'C01326','C00177'},Metkegg{count})) % For cyan
0155         continue
0156     end
0157 % Second condition: L-carnitine definition discrepancy between KEGG and BiGG
0158     if numel(Metkegg{count})>1 && ~all(ismember({'C00318','C00487'},...
0159             Metkegg{count}))
0160         MatchLoci = find(ismember(CpdData,Metkegg{count}));
0161         for incount = 1:numel(MatchLoci)
0162             CpdNames{MatchLoci(incount)}=strtrim(CpdNames{MatchLoci(incount)});
0163             FoundL = find(ismember(BiGGmets,CpdNames{MatchLoci(incount)}));
0164             if FoundL
0165                 % Check if found cpd participates in any rxn
0166                 if any(strcmp(CpdData(MatchLoci(incount)),cpds))
0167                     Metkegg{count} = CpdData(MatchLoci(incount));
0168                     break
0169                 end
0170             end
0171         end
0172     end
0173 end
0174 
0175 % Extract KEGG metabolites from BIGG API ==================================
0176 Fileid3 = fopen(fullfile(Pth,'bigg_models_metabolites.txt'),'r');
0177 biggmets = textscan(Fileid3,'%s %s %[^\n]');
0178 fclose(Fileid3);
0179 biggmetid = biggmets{1}; biggmetdes = biggmets{3}; clear biggmets
0180 Metkegg1 = {''};
0181 modelm = D.mets;
0182 modelm=strrep(modelm,'[','_');modelm=strrep(modelm,']','');
0183 for ct1 = 1:numel(modelm)
0184     tempdes = biggmetdes(strcmp(biggmetid,modelm{ct1}));
0185     temploci = regexp(tempdes,...
0186         'http://identifiers.org/kegg.compound/C\S{5}','match');
0187     temploci = temploci{1};
0188     if isempty(temploci)
0189         Metkegg1{ct1} = '';
0190     else
0191         for ct2 = 1:numel(temploci)
0192             regtemp = regexp(temploci{ct2},'C\S{5}','match');
0193             Metkegg1{ct1}{ct2} = regtemp{1};
0194         end
0195     end
0196 end
0197 % Consensus Metkegg: Metkegg1 from BiGG API and Metkegg from COBRA models
0198 % are mixed to generate the most complete set of KEGG metabolites for a
0199 % SBML model.
0200 for cp1 = 1:numel(Metkegg1)
0201     if isempty(Metkegg1{cp1}) || (numel(Metkegg1{cp1}) ~= numel(Metkegg{cp1}))
0202         for cp2 = 1:numel(Metkegg{cp1})
0203             Metkegg1{cp1}{cp2} = Metkegg{cp1}{cp2};
0204         end
0205     end
0206 end
0207 clear Metkegg
0208 Metkegg = Metkegg1;
0209 % ==========================================================================
0210 % Check the existence of metabolites participating in Bigg rxns and map the
0211 % Bigg rxns to KEGG rxns if possible. To do so, metabolites in Bigg rxns are
0212 % compared to KEGG metabolites, and KEGG metabolite IDs are located in KEGG
0213 % databse to identify the KEGG rxn IDs they involve in KEGG databse. If all
0214 % KEGG metabolites share a unique KEGG rxn ID, then that rxn ID corresponds
0215 % to the Bigg rxn from which the metabolites were extracted (restricted
0216 % condition).
0217 % To reduce the CPU time, rxns are first compared to rxns in PrunedRxns.txt
0218 % and those in UniModelKEGG to exclude them from search process.
0219 load('UniModelKEGG.mat')
0220 UniB = B2Kegg.B; UniK = B2Kegg.K;
0221 Umulti = find(ismember(UniB,'MULTIR'));
0222 UniB = UniB(1:Umulti-1); UniK = UniK(1:Umulti-1);
0223 clear B2Kegg
0224 Fileid12 = fopen('PrunedRxns.txt','r');
0225 Prunedata12 = textscan(Fileid12,'%s %s %d %[^\n]');
0226 Testpruned = Prunedata12{1};
0227 fclose(Fileid12);
0228 RxnNames = D.rxns; % BiGG rxn names
0229 Rxns = D.S; % It is assumed that this field is universal.
0230 Keggrm = (0);
0231 BiGGID = {0};
0232 AllRxn = {0};
0233 Uctr = 1; ChkTrans = 1; TransKegg = ({}); TransBiGG = ({});
0234 for rn = 1:size(Rxns,2) % for all rxns
0235     fprintf('Rxn %d of %d\n',rn,size(Rxns,2))
0236     % ---------------------------------------------------------------------
0237     %if current reaction exists in pruned reactions, or UniModelKEGG there
0238     %is no need to further search for potential equivalent reactions.
0239     if any(ismember(Testpruned,RxnNames{rn}))
0240         continue
0241     end
0242     if any(ismember(UniB,RxnNames{rn}))
0243         UniBloci = find(ismember(UniB,RxnNames{rn}));
0244         for u1 = 1:numel(UniBloci)
0245             BiGGID{Uctr} = UniB{UniBloci(u1)};
0246             AllRxn{Uctr} = UniK{UniBloci(u1)};
0247             Uctr = Uctr + 1;
0248         end    
0249         continue
0250     end
0251     %----------------------------------------------------------------------
0252     Prd = find(Rxns(:,rn)); % Find substrates and products for each rxn
0253     CheckTransport = [Metkegg{Prd}]; % For transport rxns
0254     if (numel(CheckTransport)/numel(unique(CheckTransport)) == 2) && ...
0255             numel(Prd) > 2
0256         continue
0257     end
0258     % 1st condition: All reactants and products have metKeggIDs.
0259     % 2nd condition: Exclude all one element rxns
0260     if (~sum(strcmp (Metkegg(Prd),'')) && (length(Prd) > 1))
0261         Cbs = MetCombs(Metkegg,Prd); % All combinations (permutations) of
0262         % KEGG compounds, regarding the fact that there is more than one
0263         % KEGG cpd for some metabolites in the model.
0264         for Outctr = 1:size(Cbs,1)
0265             % Contains all KEGG cpds in a certain rxn
0266             TempMet = unique(Cbs(Outctr,:));
0267             ChkTempMet = Cbs(Outctr,:);
0268             % Transport rxns with 2 cpds
0269             % 1st cond: finding a transport rxn; 3 is as a result of Cbs
0270             % structure.
0271             % 2nd cond: Transport reactions with 2 metabolites; the reason
0272             % for 1 is similar to the 1st cond.
0273             % 3rd cond: No redundancy
0274             if (numel(ChkTempMet)/numel(TempMet) == 3) ...
0275                     && (numel(TempMet) == 1) ...
0276                    && ~any(ismember(TransBiGG,RxnNames{rn}))
0277                 fprintf('Transport rxn: %s\n',RxnNames{rn})
0278                 TransKegg{ChkTrans}{1} = TempMet{1};
0279                 TransKegg{ChkTrans}{2} = TempMet{1};
0280                 TransBiGG{ChkTrans} = RxnNames{rn};
0281                 ChkTrans = ChkTrans + 1;
0282                 continue
0283             end
0284             if (numel(TempMet) == 2) && any(strcmp(TempMet,'C00080'))
0285                 continue
0286             end
0287 
0288             if length(TempMet) > 1 % Exclude all exchange rxns
0289                 KeggrmMethod = 'off'; %Offline
0290                 switch KeggrmMethod
0291                     case 'off'
0292                         Id1 = ismember(cpds,TempMet);
0293                         Keggrm = str2num(rxnData(Id1,5:end));
0294                     case 'on'
0295                         Keggrm = KeggApiMatch (TempMet);
0296                 end
0297                 % Identify unique KEGG rxn ID
0298                 Uq = unique(Keggrm);
0299                 N = histc(Keggrm, Uq);
0300                 if isempty(max(N))
0301                     continue
0302                 end
0303                 if (max(N) == numel(TempMet)) || ...
0304                         (max(N) == numel(TempMet)-1) % Found repeated rxns
0305                     UqTemp = Uq(N==max(N));
0306                     % Convert rxn codes to string
0307                     for count = 1:numel(UqTemp)
0308                         UqTemp1{count} = strcat('rn:R',repmat('0',...
0309                             [1,4- floor(log10(UqTemp(count)+eps))])...
0310                             ,num2str(UqTemp(count)));
0311                     end
0312                     if numel(UqTemp) > 1
0313                         for count = 1:length(UqTemp1)
0314                             UqTempSum(count) = sum(ismember(cpd2rxn{2},...
0315                                 UqTemp1{count}));
0316                         end
0317                         % Restrict condition: If only the number of metabolites
0318                         % in min(UqTempSum)reaction are identical to those
0319                         % of TempMet
0320                         if min(UqTempSum)==numel(TempMet) || ...
0321                                min(UqTempSum)+1 == numel(TempMet) || ...
0322                                min(UqTempSum)-1 == numel(TempMet)  
0323                             In1 = find(UqTempSum == min(UqTempSum(:)));
0324                             for Inctr = 1:numel(In1)
0325                                 AllRxn{Uctr} = UqTemp1{In1(Inctr)}(4:end);
0326                                 BiGGID {Uctr} = D.rxns{rn};
0327                                 Uctr = Uctr + 1;
0328                             end
0329                         end
0330                     else
0331                         UqTempSum = sum(ismember(cpd2rxn{2},UqTemp1{1}));
0332                         if min(UqTempSum)==numel(TempMet) || ...
0333                                 min(UqTempSum)+1 == numel(TempMet) || ...
0334                                 min(UqTempSum)-1 == numel(TempMet)
0335                             AllRxn{Uctr} = UqTemp1{1}(4:end);
0336                             BiGGID {Uctr} = D.rxns{rn};
0337                             Uctr = Uctr + 1;
0338                         end
0339                         
0340                     end
0341                     UqTempSum = 0;
0342                     UqTemp1 = {0};
0343                     
0344                 end
0345             end
0346             Keggrm = 0;
0347         end
0348     end
0349 end
0350 % Identify transport reactions (diffusion)
0351 [TransKeggT,TransBiggT] = CheckTransRxns(TransKegg,TransBiGG);
0352 
0353 if ~isempty(TransBiggT{1})
0354     BiGGID = [BiGGID,TransBiggT];
0355     AllRxn = [AllRxn,TransKeggT];
0356 end
0357 
0358 % Identify BiGG rxns for which there is more than one KEGG rxns, and prune
0359 % BiGGIDs and AllRxns
0360 [BiGGID,AllRxn,MultiBID,MultiRxn] = RxnPrune(BiGGID, AllRxn, RxnNames);
0361 AllRxnTemp = {[]}; BiGGIDTemp = {[]}; Tctr = 1;
0362 if any ([MultiBID{:}])
0363     for ct = 1:numel(MultiBID)
0364         for ci = 1:numel(MultiRxn{ct})
0365             BiGGIDTemp{Tctr} = MultiBID{ct};
0366             AllRxnTemp{Tctr} = MultiRxn{ct}{ci};
0367             Tctr = Tctr + 1;
0368         end
0369     end
0370     BiGGID = [BiGGID,BiGGIDTemp];
0371     AllRxn = [AllRxn,AllRxnTemp];
0372 end
0373 % Check with restricted condition
0374 [B1,K1] = Bigg2KeggRestricted(D,Metkegg,rxnData,cpds,cpd2rxn);
0375 Bdiff = BiGGID(~ismember(BiGGID,B1));
0376 Kdiff = AllRxn(~ismember(BiGGID,B1));
0377 if any ([MultiBID{:}])
0378     Fileid = fopen(fullfile(Pth,'PrunedRxns.txt'),'r');
0379     Prunedata = textscan(Fileid,'%s %s %d %[^\n]');
0380     Prunedrxns = Prunedata{1};
0381     fclose(Fileid);
0382     MultiBIDtemp = MultiBID(~ismember(MultiBID,Prunedrxns));
0383     MultiRxntemp = MultiRxn(~ismember(MultiBID,Prunedrxns));
0384     Bmultidiff = MultiBIDtemp(~ismember(MultiBIDtemp,Bdiff));
0385     Kmultidiff = MultiRxntemp(~ismember(MultiBIDtemp,Bdiff));
0386     Tctr = 1; BmultidiffT = {[]}; KmultidiffT = {[]};
0387     for ct = 1:numel(Bmultidiff)
0388         for ci = 1:numel(Kmultidiff{ct})
0389             BmultidiffT{Tctr} = Bmultidiff{ct};
0390             KmultidiffT{Tctr} = Kmultidiff{ct}{ci};
0391             Tctr = Tctr + 1;
0392         end
0393     end
0394     if ~isempty(BmultidiffT{1})
0395         Bdiff = [Bdiff,BmultidiffT];
0396         Kdiff = [Kdiff,KmultidiffT];
0397     end
0398 end
0399 % Check with bigg rxn ref.
0400 [bref,kref,Bdiff,Kdiff] = biggref(Bdiff,Kdiff);
0401 % -----------------------------------
0402 if ~isempty(Bdiff{1})
0403     Diffdata = cell(numel(Bdiff),3);
0404     Diffdata(:,1) = Bdiff; Diffdata(:,2) = Kdiff;
0405     for ct = 1:numel(Bdiff)
0406         Diffdata{ct,3} = 0;
0407     end
0408     setappdata(0,'Diffdata',Diffdata);
0409     waitfor(Bigg2KeggTable)
0410     Finalv=getappdata(0,'Finalvalidity');
0411     Fileid = fopen(fullfile(Pth,'PrunedRxns.txt'),'a');
0412     checkb = Finalv(:,1);
0413     checkv = Finalv(:,3);
0414     checkv = [checkv{:}]; checkb1 = ({});
0415     for ct = 1:size(Finalv,1)
0416         if any(strcmp(Finalv{ct,1},checkb1))
0417             continue
0418         end
0419         checkb1{ct} = Finalv{ct,1};
0420         checkloci = find(ismember(checkb,Finalv{ct,1}));
0421         checkfinal = checkv(checkloci);
0422         if ~any(checkfinal)
0423             for ct1 = 1:numel(checkfinal)
0424                 fprintf(Fileid,'\n%s\t%s\t%d',Finalv{checkloci(ct1),1},...
0425                     Finalv{checkloci(ct1),2},...
0426                     0);
0427             end
0428         else
0429             for ct1 = 1:numel(checkfinal)
0430                 if checkv(checkloci(ct1))
0431                     fprintf(Fileid,'\n%s\t%s\t%d',Finalv{checkloci(ct1),1},...
0432                         Finalv{checkloci(ct1),2},1);
0433                 end
0434             end
0435         end
0436     end
0437     fclose(Fileid);
0438     AT=getappdata(0);
0439     AT1 = fieldnames(AT);
0440     for co = 1:length(AT1)
0441         if strcmp(AT1{co},'Diffdata') || strcmp(AT1{co},'Finalvalidity')
0442             rmappdata(0,AT1{co});
0443         end
0444     end
0445 end
0446 if ~isempty(bref{1})
0447     rmvtemp = ismember(BiGGID,bref);
0448     BiGGID(rmvtemp) = [];
0449     AllRxn(rmvtemp) = [];
0450     BiGGID = [BiGGID,bref];
0451     AllRxn = [AllRxn,kref];
0452 end
0453 % Identify BiGG rxns for which there is more than one KEGG rxns, and prune
0454 % BiGGIDs and AllRxns
0455 [BiGGID,AllRxn,MultiBID,MultiRxn] = RxnPrune(BiGGID, AllRxn, RxnNames);
0456 AllRxnTemp = {[]}; BiGGIDTemp = {[]}; Tctr = 1;
0457 if any ([MultiBID{:}])
0458     for ct = 1:numel(MultiBID)
0459         for ci = 1:numel(MultiRxn{ct})
0460             BiGGIDTemp{Tctr} = MultiBID{ct};
0461             AllRxnTemp{Tctr} = MultiRxn{ct}{ci};
0462             Tctr = Tctr + 1;
0463         end
0464     end
0465     BiGGID = [BiGGID,BiGGIDTemp];
0466     AllRxn = [AllRxn,AllRxnTemp];
0467 end
0468 
0469 % Extract multi-step reactions from KEGG. This step may need internet
0470 % connection
0471 [Steprxns,Insteprxns] = MultiRxns(AllRxn);
0472 [BiGGID,AllRxn] = AddLumpRxns(Steprxns,Insteprxns,BiGGID,AllRxn);
0473 
0474 % Save output data
0475 B2Kegg.B=BiGGID; B2Kegg.K=AllRxn;
0476 FileName = [ModelName1,'KEGG.mat'];
0477 fname = fullfile(Pth,'BiGG2KEGG',FileName);
0478 save (fname,'B2Kegg')
0479 
0480 if any ([MultiBID{:}])
0481     fprintf('There exists %.0f BiGG rxns with multiple KEGG IDs:\n',...
0482         numel(MultiBID));
0483     for ct = 1:numel(MultiBID)
0484         fprintf('%s:\t',MultiBID{ct});
0485         fprintf('%s  ',MultiRxn{ct}{:});
0486         fprintf('\n')
0487     end
0488 else
0489     disp('There exists no BiGG rxns with multiple KEGG IDs for this model')
0490 end
0491 
0492 %==========================================================================
0493 %%===========================Subfunctions==================================
0494 %==========================================================================
0495 function Cbs = MetCombs(Metkegg,Prd)
0496 % OutMet = A 2D cell with rows and columns corresponding to combinations
0497 % and compounds, respectively.
0498 % OutMet generates all combinations among Metkegg components. This is done
0499 % to search through KEGG REACTIONs for all possibilities.
0500 SizeCell = cell(1,numel(Prd));
0501 for count = 1:numel(Prd)
0502     TempCounter = numel(Metkegg{Prd(count)});
0503     SizeCell{count} = 1:TempCounter;
0504 end
0505 MetkeggT = Metkegg(Prd);
0506 MetkeggTemp = [MetkeggT{:}];
0507 OutMet = combvec(SizeCell{:}).';
0508 for count = 1:size(OutMet,2)-1 % Corresponds to linearized Metkegg(Prd)
0509     OutMet(:,count+1)=OutMet(:,count+1)+max(OutMet(:,count));
0510 end
0511 CbsT = MetkeggTemp(OutMet);
0512 % Check if H+ is peresent in Cbs. As KEGG reactions are not charge
0513 % balanced, several BiGG reactions will not be found. To resolve this
0514 % issue, new rows withouth H+ will be appended to Cbs.
0515 if any(strcmp(CbsT(:),'C00080'))
0516     Cbs1 = CbsT;
0517     [N1,N2] = find(strcmp(Cbs1,'C00080'));
0518     LenCbs = 1:numel(Cbs1);
0519     for count = 1:numel(N1)
0520         NonH2Cpd = find(~ismember(LenCbs,N2(count)));
0521         Cbs1{N1(count),N2(count)} = CbsT{N1(count),NonH2Cpd(1)};
0522     end
0523     Cbs = [CbsT;Cbs1];
0524 else 
0525     % On the contrary to above-mentioned case, the opposite case is also
0526     % observed. In such a case, H+ will be added as an extra metabolite to
0527     % each row in CbsT, and final Cbs will contain previous set of
0528     % metabolites plus H+.
0529     Cbs1 = CbsT;
0530     Cbs2 = Cbs1;
0531     for count = 1:size(CbsT,1)
0532         Cbs1{count,size(CbsT,2)+1} = 'C00080'; % Add H+
0533         % Keep dimensional consistency:
0534         Cbs2{count,size(CbsT,2)+1} = Cbs2{count,size(CbsT,2)};
0535     end
0536     Cbs = [Cbs1;Cbs2];
0537 end
0538 %--------------------------------------------------------------------------
0539 function [Bg,Ar,Bg2,Ar2] = RxnPrune(BiGGID, AllRxn, RxnNames)
0540 % This subfunction uses PrunedRxns.txt, a set of manually found equivalence
0541 % KEGG rxn IDs, to prune current rxn mapping. This set of reactions are
0542 % generated based on univeral reactions in BiGG databse, and can
0543 % be utilized for every desired model.
0544 Fileid = fopen('PrunedRxns.txt','r');
0545 PrAll = textscan(Fileid,'%s %s %d %[^\n]');
0546 PrB = PrAll{3};
0547 PrBY = find(PrB); % To be added
0548 PrBN = ~PrB; % To be removed
0549 TBrxns = PrAll{1}; TKrxns = PrAll{2};
0550 BrxnY = TBrxns(PrBY); KrxnY = TKrxns(PrBY);
0551 BrxnN = TBrxns(PrBN);
0552 % Which BiGGID elements are in BrxnY?
0553 % [Fx1,Fx2] = ismember(BrxnY,BiGGID);
0554 % Fx2(Fx2==0) = [];
0555 % AllRxn(Fx2) = KrxnY(Fx1);
0556 BiGGInBrxnY = BiGGID(ismember(BiGGID,BrxnY)); 
0557 KeggOfBrxnY = KrxnY(ismember(BrxnY,BiGGID));
0558 BrxnYTemp = BrxnY(ismember(BrxnY,BiGGID));
0559 SharedRxns = cell(1,numel(BiGGInBrxnY));
0560 Tcount = 1; TempSharedRxns = ({}); TempBiGGID = ({});
0561 for count = 1:numel(BrxnYTemp)
0562     T= find(ismember(BiGGInBrxnY,BrxnYTemp{count}));
0563     T1 = find(ismember(BrxnYTemp,BiGGInBrxnY(T(1))));
0564 
0565     for count2 = 1:numel(T)
0566         % Extract only the first element:: T1(1); others are stored in
0567         % TempSharedRxns and will be added to AllRxn
0568         SharedRxns(T(count2)) = KeggOfBrxnY(T1(1));
0569         if numel(T1)>1 && ~any(ismember(TempBiGGID,BiGGInBrxnY(T(1))))
0570             for count3 = 2:numel(T1)
0571                 TempSharedRxns{Tcount} = KeggOfBrxnY{T1(count3)};
0572                 TempBiGGID{Tcount} = BrxnYTemp{T1(count3)};
0573                 Tcount = Tcount + 1;
0574             end
0575         end
0576     end
0577 
0578 end
0579 AllRxn(ismember(BiGGID,BrxnY)) = SharedRxns;
0580 if ~isempty(TempSharedRxns)
0581     Addlen = numel(AllRxn)+1:numel(AllRxn)+numel(TempSharedRxns);
0582     AllRxn(Addlen) = TempSharedRxns;
0583     BiGGID(Addlen) = TempBiGGID;
0584 end
0585 % Which BrxnY elements are not in BiGGID and should be added separately?
0586 NotInBiGG = BrxnY(~ismember(BrxnY,BiGGID));
0587 RxnNot = KrxnY(~ismember(BrxnY,BiGGID));
0588 % Are NotInBiGG elements exist in the current model (Note: BrxnY contains
0589 % universal BiGG reactions) ?
0590 YaLoci = find(ismember(NotInBiGG,RxnNames));
0591 Yf1 = NotInBiGG(YaLoci);
0592 AddedRxns = RxnNot(YaLoci);
0593 ModLength = (1:numel(Yf1))+numel(BiGGID);
0594 BiGGID(ModLength) = Yf1;
0595 AllRxn(ModLength) = AddedRxns;
0596 % Remove other found rxns in BiGGID which their absence is manually
0597 % confirmed.
0598 TobeRemovedRxns = ismember(BiGGID,BrxnN);
0599 BiGGID(TobeRemovedRxns)=[]; AllRxn(TobeRemovedRxns)=[]; % Remove other rxns
0600 % Find BiGG IDs with more than one rxn id
0601 ct=1;cr=1;
0602 Bg2={0};Ar2={0};Bg={0};Ar={0};
0603 BiGGT = BiGGID; RxnT = AllRxn;
0604 while numel(BiGGT)
0605     T = find(ismember(BiGGT,BiGGT(1))); R = RxnT(T);
0606     if numel(unique(R))~=1 % Reactions with more than one KEGG ID
0607         Bg2{ct} = BiGGT{1};
0608         Ar2{ct} = unique(R);      
0609         ct = ct+1;
0610     else
0611         Bg{cr}=BiGGT{1};
0612         Ar{cr}=cell2mat(unique(R));
0613         cr=cr+1;
0614     end
0615     BiGGT(T)=[]; %Remove
0616     RxnT(T)=[];
0617 end
0618 %--------------------------------------------------------------------------
0619 function Keggrm = KeggApiMatch(TempMet)
0620 ctr=1;
0621 Keggrm=(0);
0622 for mt = 1:length(TempMet)
0623     Metstr = urlread(['http://rest.kegg.jp/link/rn/',TempMet{mt}]);
0624     Id1 =  regexp(Metstr,'rn:R'); % All rxns containing a cpd
0625     if Id1 % KEGG rxn IDs are provided for this cpd
0626         for di = 1:length(Id1) % Extracts rxns
0627             Keggrm(ctr) = str2double(Metstr(Id1(di)+4:Id1(di)+9));
0628             ctr = ctr+1;
0629         end
0630     else
0631         break % Don't generate Metstr for other compounds
0632     end
0633 end
0634 % -------------------------------------------------------------------------
0635 function Metkegg = ModelMoidfy (Metkegg,MetAbr)
0636 % Read Metkegg data for RECON1.xml
0637 Mdat = load('RECON1Metkegg.mat');
0638 RECON1k = Mdat.RECON1meta.K;
0639 RECON1m = Mdat.RECON1meta.B;
0640 [Fx1,Fx2] = ismember(RECON1m,MetAbr);
0641 Fx2(Fx2==0) = [];
0642 % Metkegg(Fx2) = RECON1k(Fx1);
0643 Fx3 = find(Fx1);
0644 for i1 = 1:numel(Fx2)
0645     if numel(Metkegg{Fx2(i1)}) > numel(RECON1k{Fx3(i1)})
0646        Tempmet = Metkegg{Fx2(i1)};
0647        whrc = ismember(Tempmet,RECON1k{Fx3(i1)});
0648        if isempty(whrc)
0649         Metkegg{Fx2(i1)} = RECON1k(Fx3(i1));
0650         end
0651     else
0652         Metkegg{Fx2(i1)} = RECON1k{Fx3(i1)};
0653     end
0654 end
0655 % -------------------------------------------------------------------------
0656 function [BiGGID,AllRxn] = AddLumpRxns(Steprxns,Insteprxns,BiGGID1,AllRxn1)
0657 % ExcRxns contains rxns which need to be removed manually from Steprxns
0658 ExcRxns = {'R03082','R01651','R01210','R08767','R01651','R03171'};
0659 FindLocs = find(ismember(Steprxns,ExcRxns));
0660 Steprxns(ismember(Steprxns,ExcRxns)) = [];
0661 Insteprxns(FindLocs)=[];
0662 PrevLen = numel(BiGGID1)+1;
0663 BiGGID = BiGGID1; AllRxn = AllRxn1;
0664 BiGGID{PrevLen} = 'MULTIR'; AllRxn{PrevLen} = 'MULTIR';
0665 CurrLen = numel(BiGGID1)+2;
0666 for count = 1:numel(Steprxns)
0667     if ~isempty(Insteprxns{count})
0668         if ~any(ismember(Insteprxns{count},AllRxn1)) %No common rxns
0669             BiggTemp = BiGGID1(ismember(AllRxn1,Steprxns{count}));
0670             for count1 = 1:numel(BiggTemp)
0671                 for count2 = 1:numel(Insteprxns{count})
0672                     BiGGID{CurrLen} = BiggTemp{count1};
0673                     AllRxn{CurrLen} = Insteprxns{count}{count2};
0674                     CurrLen = CurrLen+1;
0675                 end
0676             end
0677         else
0678             TempLoc = find(~ismember(Insteprxns{count},AllRxn1));
0679             BiggTemp = BiGGID1(ismember(AllRxn1,Steprxns{count}));
0680             for count1 = 1:numel(BiggTemp)
0681                 for count2 = 1:numel(TempLoc)
0682                     BiGGID{CurrLen} = BiggTemp{count1};
0683                     AllRxn{CurrLen} = Insteprxns{count}{TempLoc(count2)};
0684                     CurrLen = CurrLen+1;
0685                 end
0686             end
0687         end
0688     end
0689 end
0690 %--------------------------------------------------------------------------
0691 function [TransKeggT,TransBiggT] = CheckTransRxns(TransKegg,TransBiGG)
0692 % Read cpd2weight ===================
0693 Fileid = fopen('cpd2weight.txt','r');
0694 cpd2weight = textscan(Fileid,'%s %f');
0695 cpd4weight = strrep(cpd2weight{1},'cpd:','');
0696 Weight = cpd2weight{2};
0697 fclose(Fileid);
0698 % Read rxns with 2, 4 and 6 components ========
0699 Fileid = fopen('W4TranRxns2.txt','r');
0700 rxn2t = textscan(Fileid,'%s %f %f');
0701 rxn2 = rxn2t{1};
0702 W21T = rxn2t{2};
0703 W22T = rxn2t{3};
0704 W2 = zeros(numel(W22T),2);
0705 for ct = 1:numel(W21T)
0706     W2(ct,1) = W21T(ct);
0707     W2(ct,2) = W22T(ct);
0708 end
0709 fclose(Fileid);
0710 Fileid = fopen('W4TranRxns4.txt','r');
0711 rxn4t = textscan(Fileid,'%s %f %f %f %f');
0712 rxn4 = rxn4t{1};
0713 W41T = rxn4t{2};
0714 W42T = rxn4t{3};
0715 W43T = rxn4t{4};
0716 W44T = rxn4t{5};
0717 W4 = zeros(numel(W44T),2);
0718 for ct = 1:numel(W44T)
0719     W4(ct,1) = W41T(ct);
0720     W4(ct,2) = W42T(ct);
0721     W4(ct,3) = W43T(ct);
0722     W4(ct,4) = W44T(ct);
0723 end
0724 fclose(Fileid);
0725 CurLen = 1;
0726 TransKeggT = {[]}; TransBiggT = {[]};
0727 for ct = 1:numel(TransKegg)
0728     Text1 = ['Checking equivalent transport reaction for ',TransBiGG{ct},'\n'];
0729     fprintf(Text1)
0730     RxnComNo = numel(TransKegg{ct});
0731     RxnWeight = Weight(ismember(cpd4weight,TransKegg{ct}));
0732     if RxnComNo == 2
0733         rxn24 = rxn2;
0734         AllW = W2;
0735     elseif RxnComNo == 4
0736         rxn24 = rxn4;
0737         AllW = W4;
0738     end
0739     if isempty(RxnWeight)
0740         continue
0741     end
0742     Reslt = bsxfun(@eq,AllW,RxnWeight);
0743     [mID,~]=find(Reslt);
0744     if ~isempty(mID) && (RxnComNo.*numel(unique(mID)) == numel(mID))
0745         CandRxns1 = rxn24(unique(mID));
0746         for ct1 = 1:numel(CandRxns1)
0747             TransBiggT{CurLen} = TransBiGG{ct};
0748             TransKeggT{CurLen} = CandRxns1{ct1};
0749             CurLen = CurLen + 1;
0750         end
0751     end
0752 end
0753 %--------------------------------------------------------------------------
0754 function[boutref,koutref,bout,kout] = biggref(bin,kin)
0755 % Read BiGG data
0756 bout = {[]}; kout = {[]};  boutref = {[]}; koutref = {[]};
0757 Pth1 = which ('Bigg2Kegg.m');
0758 tind = find(Pth1=='\',1,'last');
0759 Pth = Pth1(1:tind-1);
0760 Fileid = fopen(fullfile(Pth,'bigg_models_reactions.txt'),'r');
0761 biggrxns = textscan(Fileid,'%s %[^\n]');
0762 fclose(Fileid);
0763 biggid = biggrxns{1}; biggdes = biggrxns{2};
0764 clear biggrxns
0765 ct1 = 1; ct11 = 1;
0766 while numel(bin)
0767     bintemp = bin{1};
0768     kintemp = kin(ismember(bin,bin{1}));
0769     bin1loc = find(ismember(bin,bin{1}));
0770     destemp = biggdes(ismember(biggid,bintemp));
0771     temploci = regexp(destemp,...
0772         'http://identifiers.org/kegg.reaction/R\S{5}','match');
0773     if isempty(temploci) || isempty(temploci{1})        
0774         for ct3 = 1:numel(bin1loc)
0775             bout{ct1} = bintemp;
0776             kout{ct1} = kintemp{ct3};
0777             ct1 = ct1 + 1;
0778         end
0779     else
0780         temploci = temploci{1};
0781         for ct2 = 1:numel(temploci)
0782             regtemp = regexp(temploci{ct2},'R\S{5}','match');
0783             boutref{ct11} = bintemp;
0784             koutref{ct11} = regtemp{1};
0785             ct11 = ct11 + 1;
0786         end
0787     end
0788     bin(bin1loc) = [];
0789     kin(bin1loc) = [];
0790 end

Generated on Sat 16-Jul-2016 20:21:30 by m2html © 2005