Home > BiKEGG > BaseMapReader.m

BaseMapReader

PURPOSE ^

BaseMapReader

SYNOPSIS ^

function basemap = BaseMapReader(Netstat)

DESCRIPTION ^

 BaseMapReader
 reads KGML file of KEGG global metaboli pathway (map01100),
 extracts all required information and organizes them in a structure
 array, with three fields of r, c, and rc for respectively reactions, 
 compounds and reaction-to-compound details for subsequent use in NetDraw.
 
 Input:
 Netstat: Operate in online (1) or offline (0) mode. In offline mode, data
 in KEGGmaps folder of BiKEGG will be used.
 
 Output:
 basemap: Structure array with three fields of r, c, and rc for reactions,
 compounds and reaction-to-compound details respectively.

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function basemap = BaseMapReader(Netstat)
0002 % BaseMapReader
0003 % reads KGML file of KEGG global metaboli pathway (map01100),
0004 % extracts all required information and organizes them in a structure
0005 % array, with three fields of r, c, and rc for respectively reactions,
0006 % compounds and reaction-to-compound details for subsequent use in NetDraw.
0007 %
0008 % Input:
0009 % Netstat: Operate in online (1) or offline (0) mode. In offline mode, data
0010 % in KEGGmaps folder of BiKEGG will be used.
0011 %
0012 % Output:
0013 % basemap: Structure array with three fields of r, c, and rc for reactions,
0014 % compounds and reaction-to-compound details respectively.
0015 
0016 % O. Jamialahmadi
0017 % TMU, Chem. Eng. Dept., Biotech. Group
0018 % July 2016
0019 % -------------------------------------------------------------------------
0020 
0021 if Netstat
0022     Dat = 'http://rest.kegg.jp/get/rn01100/kgml';
0023     Dat = urlread(Dat);
0024 else
0025     load(which('map01100.mat'))
0026 end
0027 % Extract rxns details ====================================================
0028 rxnDat = regexp(Dat,'(?<=entry) id="(\d+)" name="rn:[^"]*.*?(?=</entry>)','match');
0029 RawRNs = regexp(rxnDat,'(?<=reaction="rn:)[^"]*','match');
0030 Lineseg = regexp(rxnDat, '(?<=type="line" coords=")[^"]*', 'match');
0031 rxns = ({}); ctr = 1; rxnx = ({}); rxny = ({}); rxn_id = (0); rxn_col = ({});
0032 for i1 = 1:numel(RawRNs)
0033     temp_rxn = regexp(RawRNs{i1}{1}, 'R\d{5}', 'match');
0034     rxn_idTemp = str2double(regexp(rxnDat{i1},'(?<=id=")(\d+)[^"]*','match'));
0035     rxn_colTemp = unique(regexp(rxnDat{i1},'(?<=fgcolor="#)[^"]*','match'));
0036     temp_coord =regexp(Lineseg{i1}, '\d+', 'match');
0037     line_coord1 = cellfun(@str2double, temp_coord, 'UniformOutput', false);
0038     line_x = ({}); line_y = ({});
0039     for i3 = 1:numel(line_coord1) % Extract all lines for each rxn
0040         temp_coord1 = line_coord1{i3};
0041         x_temp = temp_coord1(1:2:end);
0042         y_temp = temp_coord1(2:2:end);
0043         line_x{i3} = x_temp;
0044         line_y{i3} = y_temp;
0045         clear x_temp y_temp
0046     end
0047     for i2 = 1:numel(temp_rxn)
0048         rxns{ctr} = temp_rxn{i2}; % All rxn details !!!!!!!!!!!!!
0049         rxn_id(ctr) = rxn_idTemp;
0050         rxn_col(ctr) = rxn_colTemp;
0051         rxnx{ctr} = line_x;  % All rxn details !!!!!!!!!!!
0052         rxny{ctr} = line_y;  % All rxn details !!!!!!!!!!!
0053         ctr = ctr + 1;
0054     end
0055 end
0056 clear rxnDat RawRNs Lineseg
0057 % =========================================================================
0058 % Extract compound details ================================================
0059 cpdDat = regexp(Dat,'(?<=entry) id="(\d+)" name="cpd:[^"]*.*?(?=</entry>)','match');
0060 glDat = regexp(Dat,'(?<=entry) id="(\d+)" name="gl:[^"]*.*?(?=</entry>)','match');
0061 cpd_coord = (0); cpd_id = (0); cpds = ({});
0062 for i1=1:numel(cpdDat)
0063     cpds(i1) = regexp(cpdDat{i1},'(?<=name="cpd:)[^"]*','match');
0064     cpd_id(i1) = str2double(regexp(cpdDat{i1},'(?<=id=")(\d+)[^"]*','match'));
0065     cpd_coords = regexp(cpdDat{i1},'type="circle" x="(\d+)" y="(\d+)" width="(\d+)" height="(\d+)"','tokens');
0066     cpd_coord(i1,1) = str2double(cpd_coords{1}{1});
0067     cpd_coord(i1,2) = str2double(cpd_coords{1}{2});
0068     cpd_coord(i1,3) = str2double(cpd_coords{1}{3});
0069     cpd_coord(i1,4) = str2double(cpd_coords{1}{4});
0070 end
0071 gl_coord = (0); gl_id = (0); gls = ({});
0072 for i1 = 1:numel(glDat)
0073     gls(i1) = regexp(glDat{i1},'(?<=name="gl:)[^"]*','match');
0074     gl_id(i1) = str2double(regexp(glDat{i1},'(?<=id=")(\d+)[^"]*','match'));
0075     gl_coords = regexp(glDat{i1},'type="circle" x="(\d+)" y="(\d+)" width="(\d+)" height="(\d+)"','tokens');
0076     gl_coord(i1,1) = str2double(gl_coords{1}{1});
0077     gl_coord(i1,2) = str2double(gl_coords{1}{2});
0078     gl_coord(i1,3) = str2double(gl_coords{1}{3});
0079     gl_coord(i1,4) = str2double(gl_coords{1}{4});
0080 end
0081 % =========================================================================
0082 % Extract rxn-to-compound details =========================================
0083 % NOTE: rxn-to-compound details are collected based on rxn ID, meaning all
0084 % rxns correlated with one certain ID are sotred together.
0085 rxn4cmp = regexp(Dat,'(?<=<reaction)\s+id="(\d+)" (.+?/>)*?\s*(?=</reaction>)','tokens');
0086 rxn4cmp_id = (0); rxn4cmp_subid= ({}); rxn4cmp_subname= ({});
0087 rxn4cmp_prodid= ({}); rxn4cmp_prodname= ({}); rxn4cmp_name = ({});
0088 for i1 = 1:numel(rxn4cmp)
0089     rxn4cmp_det = regexp(rxn4cmp{i1}{2},'(?<="rn:)[^"]*','match');
0090     temp_rxn1 = regexp(rxn4cmp_det{1}, 'R\d{5}', 'match');
0091     for r1 = 1:numel(temp_rxn1)
0092         rxn4cmp_name{i1}{r1} = temp_rxn1{r1};
0093     end
0094     rxndet_sub = regexp(rxn4cmp{i1}{2},'(?<=<substrate)\s+id="(\d+)"\s+name="cpd:C(\d+)"[^/]*','tokens');
0095     rxndet_sub1 = regexp(rxn4cmp{i1}{2},'(?<=<substrate)\s+id="(\d+)"\s+name="gl:G(\d+)"[^/]*','tokens');
0096     rxndet_prod = regexp(rxn4cmp{i1}{2},'(?<=<product)\s+id="(\d+)"\s+name="cpd:C(\d+)"[^/]*','tokens');
0097     rxndet_prod1 = regexp(rxn4cmp{i1}{2},'(?<=<product)\s+id="(\d+)"\s+name="gl:G(\d+)"[^/]*','tokens');
0098     for i2 = 1:numel(rxndet_sub)
0099         rxn4cmp_subid{i1}(i2) = str2double(rxndet_sub{i2}(1));
0100         rxn4cmp_subname{i1}{i2} = strcat('C',rxndet_sub{i2}{2});
0101     end
0102     for i2 = numel(rxndet_sub)+1:numel(rxndet_sub1)+numel(rxndet_sub)
0103         rxn4cmp_subid{i1}(i2) = str2double(rxndet_sub1{i2-numel(rxndet_sub)}(1));
0104         rxn4cmp_subname{i1}{i2} = strcat('G',rxndet_sub1{i2-numel(rxndet_sub)}{2});
0105     end
0106     for i3 = 1:numel(rxndet_prod)       
0107         rxn4cmp_prodid{i1}(i3) = str2double(rxndet_prod{i3}(1));
0108         rxn4cmp_prodname{i1}{i3} = strcat('C',rxndet_prod{i3}{2});       
0109     end
0110     for i3 = 1+numel(rxndet_prod):numel(rxndet_prod1)+numel(rxndet_prod)      
0111         rxn4cmp_prodid{i1}(i3) = str2double(rxndet_prod1{i3-numel(rxndet_prod)}(1));
0112         rxn4cmp_prodname{i1}{i3} = strcat('G',rxndet_prod1{i3-numel(rxndet_prod)}{2});
0113     end
0114     rxn4cmp_id(i1) = str2double(rxn4cmp{i1}{1});
0115 end
0116 basemap.r.rxn = rxns;
0117 basemap.r.rxnid = rxn_id;
0118 basemap.r.col = rxn_col;
0119 basemap.r.rxnx = rxnx;
0120 basemap.r.rxny = rxny;
0121 basemap.c.cpd = cpds;
0122 basemap.c.cpdid = cpd_id;
0123 basemap.c.cpdxy = cpd_coord;
0124 basemap.c.gl = gls;
0125 basemap.c.glid = gl_id;
0126 basemap.c.glxy = gl_coord;
0127 basemap.rc.rxn = rxn4cmp_name;
0128 basemap.rc.rxnid = rxn4cmp_id;
0129 basemap.rc.sub = rxn4cmp_subname;
0130 basemap.rc.subid = rxn4cmp_subid;
0131 basemap.rc.prod = rxn4cmp_prodname;
0132 basemap.rc.prodid = rxn4cmp_prodid;
0133 
0134 % Modify basemap: There are some discrepancies between KGML and basemap
0135 % Error in R08940
0136 basemap.rc.sub{basemap.rc.rxnid==18} = 'C00019';
0137 basemap.rc.subid{basemap.rc.rxnid==18} = 4254;
0138 % Error in R04097
0139 basemap.rc.prod{basemap.rc.rxnid==718} = 'C15975';
0140 basemap.rc.prodid{basemap.rc.rxnid==718} = 2997;

Generated on Sat 16-Jul-2016 20:21:30 by m2html © 2005