% Feb. 18, 2009

% utility that reads a tab delimited file and creates a cell array with all
% the information:

% First column: Gene Set Database source (e.g. KEGG) (1st entry in cell
% array)
% Second column: Pathway name (2nd entry in cell array)
% Third-Xth columns: Tab delimited rows of genes in pathways (Gene IDs)
% (vector in 3rd entry in cell array)



% a{i,j} = text for line i, in field j




function [output_cellarray num_genes_per_gene_set]=multi_list2cell_GeneSetDB(filename)



separator=9; % tab

%separator=32; % space



fid=fopen(filename);



counter=0;

while 1

    vec=[];

    counter=counter+1;

    tline = fgetl(fid);

    if ~ischar(tline), break, end

    find_tab=find(abs(tline)==separator);

    beginnings=[1 find_tab+1];

    ends=[find_tab-1 length(tline)];

    where_words_are=[beginnings ; ends];

    ntokens=length(beginnings);

    num_genes_per_gene_set(counter)=ntokens-2;  % number of genes per pathway

    i=1;

     first_col=tline(where_words_are(1,i):where_words_are(2,i));
     second_col=tline(where_words_are(1,i+1):where_words_are(2,i+1));
        
    for i=3:ntokens,

      vec(i-2)=str2num(tline(where_words_are(1,i):where_words_are(2,i)));

    end
    output_cellarray{counter}={first_col,second_col,vec};
%    output_cellarray{counter}{1}={first_col};
%    output_cellarray{counter}{2}={second_col}; 
%  output_cellarray{counter}{3}={vec};
end



fclose(fid);


% Cell array output example:

% Input file:

% KEGG    PATHWAY1    1  2  3  4
% KEGG    PATHWAY2    9
% KEGG    PATHWAY3    8  10


% Output cell array:
%output{1,:}

%ans = 

 %   'KEGG'    'PATHWAY1'    [1x4 double]
 
%ans = 

 %   'KEGG'    'PATHWAY2'    [9]

%ans = 

%    'KEGG'    'PATHWAY3'    [1x2 double]


% output{1}{1} = KEGG

% output{1}{2} = PATHWAY1

% output{1}{3} = 1     2     3     4

% output{1}{3}(1) = output{1}{3}(2)

% length(output{1}{3}) = 4