Home > BiKEGG > PreconsMethod.m

PreconsMethod

PURPOSE ^

PreconsMethod

SYNOPSIS ^

function PreconsMethod(handles)

DESCRIPTION ^

 PreconsMethod
 Employs Precons GUI tool for building reaction correspondences based on
 KEGG genome annotations. Output is a MS Excel spreadsheet  containing
 gene entries (KEGG), KEGG reaction identifier, BiGG reaction abbr., and 
 associated pathways (KEGG). 
 A MATLAB biograph object can also be generated for the selected organism.

 O. Jamialahmadi
 TMU, Chem. Eng. Dept., Biotech. Group 
 June 2016

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function PreconsMethod(handles)
0002 % PreconsMethod
0003 % Employs Precons GUI tool for building reaction correspondences based on
0004 % KEGG genome annotations. Output is a MS Excel spreadsheet  containing
0005 % gene entries (KEGG), KEGG reaction identifier, BiGG reaction abbr., and
0006 % associated pathways (KEGG).
0007 % A MATLAB biograph object can also be generated for the selected organism.
0008 %
0009 % O. Jamialahmadi
0010 % TMU, Chem. Eng. Dept., Biotech. Group
0011 % June 2016
0012 
0013 Org = get(handles.OrganEdit,'String');
0014 
0015 [getpathways,stat] = urlread(['http://rest.kegg.jp/list/pathway/',Org]);
0016 if ~stat
0017     msgbox('There is a problem with the internet connection!');
0018     return
0019 end
0020 pths = textscan(getpathways,'%s %[^\n]');
0021 pathnames = pths{1}; pathnames = strrep(pathnames,'path:','');
0022 % pathdescription = pths{2};
0023 
0024 % Load KEGG2BiGG reactions
0025 BiGGdata = load('UniModelKEGG.mat');
0026 Bdata = BiGGdata.B2Kegg.B; Kdata = BiGGdata.B2Kegg.K;
0027 Multir = find(ismember(Bdata,'MULTIR'));
0028 Bdata(Multir:end) = []; Kdata(Multir:end) = [];
0029 Allgenes = {[]}; Allrxns = {[]}; Allbigg = {[]}; Allpath = {[]};
0030 yct = 1;
0031 for cp = 1:numel(pathnames)
0032     fprintf('Pathway: %d of %d\n',cp,numel(pathnames))
0033     [getKGML,stat] = urlread(['http://rest.kegg.jp/get/',pathnames{cp},'/kgml']);
0034     if ~stat
0035         msgbox('There is a problem with the internet connection!');
0036         clc
0037         return
0038     end
0039     Ids = regexp(getKGML,'(?<=entry id=)[^>]*','match');
0040     if isempty(Ids)
0041         continue
0042     end
0043     for ct = 1:numel(Ids)
0044         rnsTemp = Ids{ct};
0045         checkgene = regexp(rnsTemp,'type="gene"', 'once');
0046         if ~isempty(checkgene)
0047             rnnms1 = regexp(rnsTemp,'(?<=reaction=")[^"]*','match');
0048             if ~isempty(rnnms1)
0049                 genenms1 = regexp(rnsTemp,'(?<=name=")[^"]*','match');
0050                 CurrxnNum1 = regexp(rnnms1, 'R\d{5}', 'match');
0051                 CurrxnNum = CurrxnNum1{1};
0052                 for mc = 1:numel(CurrxnNum)
0053                     biggTemp = Bdata(ismember(Kdata,CurrxnNum{mc}));
0054                     if isempty(biggTemp)
0055                         biggtemp = '';
0056                     else
0057                         if numel(biggTemp)>1
0058                              biggtemp = strjoin(biggTemp,';');
0059                         else
0060                             biggtemp = biggTemp{1};
0061                         end
0062                     end
0063                     % Collect all data together
0064                     rxntemp = CurrxnNum{mc};
0065                     genetemp = genenms1{1};
0066                     pathtemp = pathnames{cp};
0067                     if ~isempty(Allrxns{1})
0068                         if ~any(ismember(Allrxns,rxntemp))
0069                             Allgenes{yct} = genetemp;
0070                             Allrxns{yct} = rxntemp;
0071                             Allbigg{yct} =  biggtemp;
0072                             Allpath{yct} = pathnames{cp};
0073                         else
0074                             Aloci = find(ismember(Allrxns,rxntemp));
0075                             Allgenes{yct} = strjoin(unique({Allgenes{Aloci},...
0076                                 genetemp}),'|');
0077                             Allpath{yct} = strjoin(unique({Allpath{Aloci},...
0078                                 pathtemp}),';');
0079                             Allrxns{yct} = rxntemp;
0080                             Allbigg{yct} =  biggtemp;
0081                             Allgenes(Aloci) = []; Allpath(Aloci) = [];
0082                             Allrxns(Aloci) = []; Allbigg(Aloci) = [];
0083                             yct = yct - numel(Aloci);
0084                         end
0085                     else
0086                         Allgenes{yct} = genetemp;
0087                         Allrxns{yct} = rxntemp;
0088                         Allbigg{yct} =  biggtemp;
0089                         Allpath{yct} = pathnames{cp};
0090                     end
0091                     yct = yct + 1;
0092                 end                
0093             end
0094         end
0095     end
0096 end
0097 for ct2 = 1:numel(Allgenes)
0098     temp1 = unique(strsplit(Allgenes{ct2},'|'));
0099     Allgenes{ct2} = strjoin(temp1,'|');
0100     temp2 = unique(strsplit(Allpath{ct2},';'));
0101     Allpath{ct2} = strjoin(temp2,';');
0102 end
0103 % Decomposing data on the basis of pathway code
0104 AllgenesP = {[]}; AllrxnsP = {[]}; AllbiggP = {[]};
0105 for ct = 1:numel(pathnames);
0106     pathloci = regexp(Allpath,pathnames{ct});
0107     pathloci = find(~cellfun('isempty', pathloci));
0108     AllgenesP{ct} = Allgenes(pathloci);
0109     AllrxnsP{ct} = Allrxns(pathloci);
0110     AllbiggP{ct} = Allbigg(pathloci);
0111 end
0112 Biostat = get(handles.Biographcheck,'Value');
0113 if Biostat
0114     hSelectedObj=get(handles.Biographpanel, 'SelectedObject');
0115     BioType = get(hSelectedObj, 'Tag');
0116     Pathchoice = get(handles.Pathwaylistbox,'Value');
0117     gene2rxn(AllgenesP{Pathchoice},AllrxnsP{Pathchoice},AllbiggP{Pathchoice},BioType)
0118 end
0119 
0120 
0121 % Write data to an Excel file
0122 Pth1 = which ('Precons.m');
0123 tind = find(Pth1=='\',1,'last');
0124 Pth = Pth1(1:tind-1);
0125 Filename = [Pth,'\',Org,'.xlsx'];
0126 if exist(Filename,'file')
0127     Txt3 = [Org,'.xlsx already exists in the directory.\n'];
0128     fprintf(Txt3)
0129     Txt4 = 'The file will be replaced with the new one\n';
0130     fprintf(Txt4)
0131     delete(Filename)  
0132 end
0133 Txt2 = 'Writing data to Xlsx file...\n';
0134 fprintf(Txt2)
0135 Alldat = cell(numel(Allgenes)+1,4);
0136 Alldat{1,1} = 'Gene Entry'; Alldat{1,2} = 'KEGG'; Alldat{1,3} = 'BiGG';
0137 Alldat{1,4} = 'Pathway';
0138 for nt = 1:numel(Allgenes)
0139     Alldat{nt+1,1} = Allgenes{nt};
0140     Alldat{nt+1,2} = Allrxns{nt};
0141     Alldat{nt+1,3} = Allbigg{nt};
0142     Alldat{nt+1,4} = Allpath{nt};
0143 end
0144 xlswrite(Filename,Alldat)
0145 clc
0146 fprintf('\nDone!')
0147 
0148 
0149 function gene2rxn(Allgenes,Allrxns,Allbigg,BioType)
0150 AllgeneNO1 = strjoin(Allgenes,'|');
0151 AllgeneNO2 = strsplit(AllgeneNO1,'|'); AllgeneNO2 = unique(AllgeneNO2);
0152 AllgeneNO = numel(AllgeneNO2); % Number of all unique genes
0153 % AllbiggU1 = unique(Allbigg);
0154 AllbiggU = find(~cellfun('isempty', Allbigg));
0155 AllbiggStr = Allbigg(AllbiggU);
0156 AllgeneStr = num2str(1:numel(AllgeneNO2));
0157 AllgeneStr = strsplit(AllgeneStr,' '); AllgeneStr = strcat('G',AllgeneStr);
0158 AllbiggNostr = num2str(1:numel(AllbiggStr));
0159 if ~isempty(AllbiggNostr)
0160     AllbiggNostr = strsplit(AllbiggNostr,' ');
0161     AllbiggNostr = strcat('B',AllbiggNostr);
0162 end
0163 
0164 if strcmp(BioType,'G2K2Bradiobutton') %================================
0165     GprS = zeros(AllgeneNO + numel(Allrxns) + numel(AllbiggU));
0166     for ct1 = 1:numel(Allrxns)
0167         currgenes = strsplit(Allgenes{ct1},'|');
0168         for ct2 = 1:numel(currgenes)
0169             currgeneloci = find(ismember(AllgeneNO2,currgenes{ct2}));
0170             GprS(currgeneloci,AllgeneNO+ct1) = 1;
0171         end
0172         if ~isempty(Allbigg{ct1})
0173             y_coor = find(ismember(AllbiggStr,Allbigg{ct1}));
0174             GprS(AllgeneNO+ct1,AllgeneNO+numel(Allrxns)+y_coor) = 1;    
0175         end
0176     end
0177     Allcum = [AllgeneStr,Allrxns,AllbiggNostr];
0178     GprS = sparse(GprS);
0179     bg1 = biograph(GprS,Allcum);
0180     dolayout(bg1);
0181     
0182     Tetta1 = 0:pi/(numel(AllgeneStr)-1):2*pi;
0183     Tetta2 = 0:pi/(numel(Allrxns)-1):2*pi;
0184     Tetta3 = 0:pi/(numel(AllbiggStr)-1):2*pi;
0185     R1 = 350; R2 = R1*1.5; R3 = R1*2;
0186     X1 = R1.*cos(Tetta1); Y1 = R1.*sin(Tetta1);
0187     X2 = R2.*cos(Tetta2); Y2 = R2.*sin(Tetta2);
0188     X3 = R3.*cos(Tetta3); Y3 = R3.*sin(Tetta3);
0189     for ct = 1:numel(Allcum)
0190         if ct<=numel(AllgeneStr)
0191             bg1.nodes(ct).Position = [X1(ct),Y1(ct)];
0192             set(bg1.nodes(ct),'Shape','circle')
0193             set(bg1.nodes(ct),'Label',AllgeneNO2{ct})
0194         elseif ct>numel(AllgeneStr) && ct<=(numel(Allrxns)+numel(AllgeneStr))
0195             bg1.nodes(ct).Position = [X2(ct-numel(AllgeneStr)),...
0196                 Y2(ct-numel(AllgeneStr))];
0197             set(bg1.nodes(ct),'Shape','diamond')
0198         else
0199             bg1.nodes(ct).Position = [X3(ct-numel(AllgeneStr)-numel(Allrxns)),...
0200                 Y3(ct-numel(AllgeneStr)-numel(Allrxns))];
0201             set(bg1.nodes(ct),'Label',AllbiggStr{ct-numel(AllgeneStr)-numel(Allrxns)})
0202         end
0203     end
0204 
0205 elseif strcmp(BioType,'G2Bradiobutton') %====================================
0206     gene4biggRaw = Allgenes(AllbiggU);
0207     gene4bigg = strjoin(gene4biggRaw,'|');
0208     gene4bigg = strsplit(gene4bigg,'|'); gene4bigg = unique(gene4bigg);
0209     gene4biggNo = numel(gene4bigg); % Number of all unique genes
0210     AllgeneStr1 = num2str(1:numel(gene4bigg));
0211     AllgeneStr1 = strsplit(AllgeneStr1,' ');
0212     AllgeneStr1 = strcat('G',AllgeneStr1);
0213     GprS = zeros(gene4biggNo + numel(AllbiggU));
0214     for ct1 = 1:numel(gene4biggRaw)
0215         currgenes = strsplit(gene4biggRaw{ct1},'|');
0216         for ct2 = 1:numel(currgenes)
0217             currgeneloci = find(ismember(gene4bigg,currgenes{ct2}));
0218             GprS(currgeneloci,gene4biggNo+ct1) = 1;
0219         end
0220     end
0221     Allcum = [AllgeneStr1,AllbiggNostr];
0222     GprS = sparse(GprS);
0223     bg1 = biograph(GprS,Allcum);
0224     dolayout(bg1);
0225     Tetta1 = 0:pi/(numel(AllgeneStr1)-1):2*pi;
0226     Tetta2 = 0:pi/(numel(AllbiggNostr)-1):2*pi;
0227         R1 = 350; R2 = R1*1.5;
0228     X1 = R1.*cos(Tetta1); Y1 = R1.*sin(Tetta1);
0229     X2 = R2.*cos(Tetta2); Y2 = R2.*sin(Tetta2);
0230     for ct = 1:numel(Allcum)
0231         if ct<=numel(AllgeneStr1)
0232             bg1.nodes(ct).Position = [X1(ct),Y1(ct)];
0233             set(bg1.nodes(ct),'Shape','circle')
0234             set(bg1.nodes(ct),'Label',gene4bigg{ct})
0235         else
0236             bg1.nodes(ct).Position = [X2(ct-numel(AllgeneStr1)),...
0237                 Y2(ct-numel(AllgeneStr1))];
0238             set(bg1.nodes(ct),'Label',AllbiggStr{ct-numel(AllgeneStr1)})
0239         end
0240     end
0241 else %=====================================================================
0242     GprS = zeros(AllgeneNO + numel(Allrxns));
0243     for ct1 = 1:numel(Allrxns)
0244         currgenes = strsplit(Allgenes{ct1},'|');
0245         for ct2 = 1:numel(currgenes)
0246             currgeneloci = find(ismember(AllgeneNO2,currgenes{ct2}));
0247             GprS(currgeneloci,AllgeneNO+ct1) = 1;
0248         end
0249     end
0250     Allcum = [AllgeneStr,Allrxns];
0251     GprS = sparse(GprS);
0252     bg1 = biograph(GprS,Allcum);
0253     dolayout(bg1);
0254     Tetta1 = 0:pi/(numel(AllgeneStr)-1):2*pi;
0255     Tetta2 = 0:pi/(numel(Allrxns)-1):2*pi;
0256     R1 = 350; R2 = R1*1.5;
0257     X1 = R1.*cos(Tetta1); Y1 = R1.*sin(Tetta1);
0258     X2 = R2.*cos(Tetta2); Y2 = R2.*sin(Tetta2);
0259     for ct = 1:numel(Allcum)
0260         if ct<=numel(AllgeneStr)
0261             bg1.nodes(ct).Position = [X1(ct),Y1(ct)];
0262             set(bg1.nodes(ct),'Shape','circle')
0263             set(bg1.nodes(ct),'Label',AllgeneNO2{ct})
0264         else
0265             bg1.nodes(ct).Position = [X2(ct-numel(AllgeneStr)),...
0266                 Y2(ct-numel(AllgeneStr))];
0267             set(bg1.nodes(ct),'Shape','diamond')
0268         end
0269     end
0270     
0271 end
0272 
0273 dolayout(bg1, 'Pathsonly', true);
0274 view(bg1)

Generated on Sat 16-Jul-2016 20:21:30 by m2html © 2005