%% BioMed_Central_Tex_Template_v1.06
%%                                      %
%  bmc_article.tex            ver: 1.06 %
%                                       %

%%IMPORTANT: do not delete the first line of this template
%%It must be present to enable the BMC Submission system to 
%%recognise this template!!

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                     %%
%%  LaTeX template for BioMed Central  %%
%%     journal article submissions     %%
%%                                     %%
%%         <14 August 2007>            %%
%%                                     %%
%%                                     %%
%% Uses:                               %%
%% cite.sty, url.sty, bmc_article.cls  %%
%% ifthen.sty. multicol.sty		   %%
%%				      	   %%
%%                                     %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                                                 %%	
%% For instructions on how to fill out this Tex template           %%
%% document please refer to Readme.pdf and the instructions for    %%
%% authors page on the biomed central website                      %%
%% http://www.biomedcentral.com/info/authors/                      %%
%%                                                                 %%
%% Please do not use \input{...} to include other tex files.       %%
%% Submit your LaTeX manuscript as one .tex document.              %%
%%                                                                 %%
%% All additional figures and files should be attached             %%
%% separately and not embedded in the \TeX\ document itself.       %%
%%                                                                 %%
%% BioMed Central currently use the MikTex distribution of         %%
%% TeX for Windows) of TeX and LaTeX.  This is available from      %%
%% http://www.miktex.org                                           %%
%%                                                                 %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\NeedsTeXFormat{LaTeX2e}[1995/12/01]
\documentclass[10pt]{bmc_article}    



% Load packages
\usepackage{cite} % Make references as [1-4], not [1,2,3,4]
\usepackage{url}  % Formatting web addresses  
\usepackage{ifthen}  % Conditional 
\usepackage{multicol}   %Columns
\usepackage[utf8]{inputenc} %unicode support
%\usepackage[applemac]{inputenc} %applemac support if unicode package fails
%\usepackage[latin1]{inputenc} %UNIX support if unicode package fails
\urlstyle{rm}
 
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%	
%%                                             %%
%%  If you wish to display your graphics for   %%
%%  your own use using includegraphic or       %%
%%  includegraphics, then comment out the      %%
%%  following two lines of code.               %%   
%%  NB: These line *must* be included when     %%
%%  submitting to BMC.                         %% 
%%  All figure files must be submitted as      %%
%%  separate graphics through the BMC          %%
%%  submission process, not included in the    %% 
%%  submitted article.                         %% 
%%                                             %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%                     


\def\includegraphic{}
\def\includegraphics{}



\setlength{\topmargin}{0.0cm}
\setlength{\textheight}{21.5cm}
\setlength{\oddsidemargin}{0cm} 
\setlength{\textwidth}{16.5cm}
\setlength{\columnsep}{0.6cm}

\newboolean{publ}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                              %%
%% You may change the following style settings  %%
%% Should you wish to format your article       %%
%% in a publication style for printing out and  %%
%% sharing with colleagues, but ensure that     %%
%% before submitting to BMC that the style is   %%
%% returned to the Review style setting.        %%
%%                                              %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 

%Review style settings
%\newenvironment{bmcformat}{\begin{raggedright}\baselineskip20pt\sloppy\setboolean{publ}{false}}{\end{raggedright}\baselineskip20pt\sloppy}

%Publication style settings
%\newenvironment{bmcformat}{\fussy\setboolean{publ}{true}}{\fussy}

%New style setting
\newenvironment{bmcformat}{\baselineskip20pt\sloppy\setboolean{publ}{false}}{\baselineskip20pt\sloppy}


% definitions
\def\cL{{\mathcal L}}


% Begin ...
\begin{document}
\begin{bmcformat}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                          %%
%% Enter the title of your article here     %%
%%                                          %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\title{The Partitioned LASSO-Patternsearch Algorithm 
with Application to Gene Expression Data}
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                          %%
%% Enter the authors here                   %%
%%                                          %%
%% Ensure \and is entered between all but   %%
%% the last two authors. This will be       %%
%% replaced by a comma in the final article %%
%%                                          %%
%% Ensure there are no trailing spaces at   %% 
%% the ends of the lines                    %%     	
%%   Weiliang Shi, Grace Wahba, Rafael Irizarry,Hector Corrada Bravo, Stephen Wright                                       %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\author{ Weiliang Shi\correspondingauthor$^1$
         \email{Weiliang Shi\correspondingauthor - weiliang.shi@sanofi\--aventis.com},
         Grace Wahba$^2$
         \email{Grace Wahba - wahba@stat.wisc.edu},
Rafael A. Irizarry$^3$
	   \email{Rafael A. Irizarry - ririzarry@jhsph.edu},
	   Hector Corrada Bravo$^4$
	   \email{Hector Corrada Bravo - hcorrada@umiacs.umd.edu}%
and 
	   Stephen J. Wright$^5$
	   \email{ Stephen J. Wright - swright@cs.wisc.edu}%         
      }


      

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 	   
                                %%
%% Enter the authors' addresses here        %%
%%                                          %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\address{%
    \iid(1)Sanofi-Aventis, Cambridge, Massachusetts, USA\\
    \iid(2)Department of Statistics, University of Wisconsin-Madison, Madison, Wisconsin, USA\\
    \iid(3)Department of Biostatistics, Johns Hopkins University, Baltimore, Maryland, USA\\
    \iid(4)Center for Bioinformatics and Computational Biology, Computer Science Department, University of Maryland-College Park, College Park, Maryland, USA\\
    \iid(5)Department of Computer Sciences, University of Wisconsin-Madison, Madison, Wisconsin, USA
}%

\maketitle

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                          %%
%% The Abstract begins here                 %%
%%                                          %%  
%% Please refer to the Instructions for     %%
%% authors on http://www.biomedcentral.com  %%
%% and include the section headings         %%
%% accordingly for your article type.       %%   
%%                                          %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\begin{abstract}
        % Do not use inserted blank lines (ie \\) until main body of text.
\noindent \textbf{Background:} In systems biology, the task of reverse engineering gene pathways from data has been limited not just by the curse of dimensionality (the interaction space is huge) but also by systematic error in the data. The gene expression barcode reduces spurious association driven by batch effects and probe effects.  The binary nature of the resulting expression calls lends itself perfectly for modern regularization approaches that thrive with dimensionality.   

\noindent \textbf{Results:} The Partitioned LASSO-Patternsearch algorithm is proposed to identify patterns of multiple dichotomous risk factors for outcomes
  of interest in genomic studies. A partitioning scheme is used to
  identify promising patterns by solving many LASSO-Patternsearch
  subproblems in parallel. All variables that survive this stage
  proceed to an aggregation stage where the most significant patterns
  are identified by solving a reduced LASSO-Patternsearch problem in
  just these variables. This approach was applied to genetic data sets
  with expression levels dichotomized by gene expression bar code.
  Most of the genes and second-order interactions thus selected and
  are known to be related to the outcomes. 

\noindent\textbf{Conclusions:} We demonstrate with simulations and data analyses that the proposed method not only selects variables and patterns more accurately, but also provides smaller models with better prediction accuracy, in comparison to several competing methodologies.       
\end{abstract}



\ifthenelse{\boolean{publ}}{\begin{multicols}{2}}{}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                                          %%
%% The Main Body begins here                %%
%%                                          %%
%% Please refer to the instructions for     %%
%% authors on:                              %%
%% http://www.biomedcentral.com/info/authors%%
%% and include the section headings         %%
%% accordingly for your article type.       %% 
%%                                          %%
%% See the Results and Discussion section   %%
%% for details on how to create sub-sections%%
%%                                          %%
%% use \cite{...} to cite references        %%
%%  \cite{koon} and                         %%
%%  \cite{oreg,khar,zvai,xjon,schn,pond}    %%
%%  \nocite{smith,marg,hunn,advi,koha,mouse}%%
%%                                          %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




%%%%%%%%%%%%%%%%
%% Background %%
%%
\input{glps.tex}
\bigskip

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Author's contributions}
WS conceived the method, designed and implemented the pLPS algorithm, analyzed the data and drafted the manuscript.  GW supervised the project.  RAI provided the data and supervised the project.  HCB participated the initial discussion of the method.  SJW designed and implemented the algorithm of the minimization problem and participated the writing of the manuscript. All authors reviewed and approved the final manuscript. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Acknowledgements}
  \ifthenelse{\boolean{publ}}{\small}{}
WS - Research supported in part by NIH Grant EY09946 and NSF Grant DMS-00604572. GW - Research supported in part by NIH Grant EY09946, NSF Grant DMS-0906818 and ONR Grant N0014-09-1-0655.
RAI - Research supported in part by NIH Grant GM083084.
HCB - Research supported in part by NIH Grant GM083084, NIH Grant EY09946 and NSF Grant DMS-0604572.
SJW - Research supported in part by NSF Grants DMS-0914524 and DMS-0906818.
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                  The Bibliography                       %%
%%                                                         %%              
%%  Bmc_article.bst  will be used to                       %%
%%  create a .BBL file for submission, which includes      %%
%%  XML structured for BMC.                                %%
%%  After submission of the .TEX file,                     %%
%%  you will be prompted to submit your .BBL file.         %%
%%                                                         %%
%%                                                         %%
%%  Note that the displayed Bibliography will not          %% 
%%  necessarily be rendered by Latex exactly as specified  %%
%%  in the online Instructions for Authors.                %% 
%%                                                         %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\newpage
{\ifthenelse{\boolean{publ}}{\footnotesize}{\small}
 \bibliographystyle{bmc_article}  % Style BST file
%  \bibliography{bmc_article} }     % Bibliography file (usually '*.bib' ) 
  \bibliography{mylpsr.ed,sjw-refs.1}

%%%%%%%%%%%

\ifthenelse{\boolean{publ}}{\end{multicols}}{}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                               %%
%% Figures                       %%
%%                               %%
%% NB: this is for captions and  %%
%% Titles. All graphics must be  %%
%% submitted separately and NOT  %%
%% included in the Tex document  %%
%%                               %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%
%% Do not use \listoffigures as most will included as separate files

\section*{Figures}
  \subsection*{Figure 1 - Diagram of the subproblems in the first stage of pLPS, assuming 5 partitions.}
 Side length of a square is the partition
   size, while the horizontal axis contains the labels of the first
   effect and the vertical axis the label of the second effect.
   Squares filled with red dots are ``type-one'' subproblems while
   the triangles filled with green dots are ``type-two''
   subproblems.




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                               %%
%% Tables                        %%
%%                               %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% Use of \listoftables is discouraged.
%%
\section*{Tables}
  \subsection*{Table 1 - Simulation Example 1}
$n = 700$ and $p = 400$ with no correlations. Tabulated numbers show the number of tests (out of 100) in which the pattern was detected by each algorithm. The number outside the parentheses is the number of times the given pattern was selected; the numbers inside the parentheses shows how many times the variables in the pattern are detected in the model, as a main effect or in some interaction. The final column shows the total number of times (in 100 tests) that the algorithms selected patterns (variables for RF) that  do not appear in our model.
    \par
%    \mbox{

\label{simglps1}
\begin{center}
\begin{tabular}{c|cccc}
 \hline
 Methods&$X_{50}$&$X_{150}X_{250}$&$X_{251}X_{252}$&noise\\\hline
 pLPS &94 (100)&99 (99,99)&96 (97,97)&153\\
 Logic&100 (100)&70 (88,91) &65 (84,90)&190\\
 RF&NA (100)&NA (96,97)&NA (94,96)&(517)\\
 SPLR &100 (100)&97 (100,97)&91 (100,98)&712\\\hline
\end{tabular}
\end{center}


\subsection*{Table 2 - Simulation Example 2} 
$n =1000$ and $p = 8000$ with correlations among neighboring variables. Tabulated numbers show the number of tests (out of 50) in which the pattern was detected by each algorithm.  The number outside the parentheses is the number of times the given pattern was selected; the numbers inside the parentheses shows how many times the variables in the pattern are detected in the model, as a main effect or in some interaction. The final column shows the total number of times (in 50 tests) that the algorithms selected patterns (variables for RF) that  do not appear in our model.

\label{simglps2}
    \par
%    \mbox{

\begin{center}
\begin{tabular}{c|ccccc}
 \hline
 Methods&$X_{500}$&$X_{5000}$&$X_{1000}X_{3000}$&$X_{7000}X_{7002}$&noise\\\hline
 pLPS &50 (50)&50 (50)&48 (48,50)&50 (50,50)&278\\
 RF&NA (50)&NA (50)&NA (28,37) & NA (50,50)&(335)\\
 SPLR &50 (50)&50 (50)&50 (50,50)&50 (50,50)&800\\\hline
\end{tabular}
\end{center}

\subsection*{Table 3 - Simulation Example 3}
$n =1000$ and $p = 500$ with correlations among neighboring variables. Tabulated numbers show the number of tests (out of 50) in which the pattern was detected by each algorithm. The number outside the parentheses is the number of times the given pattern was selected; the numbers inside the parentheses shows how many times the variables in the pattern are detected in the model, as a main effect or in some interaction. The final column shows the total number of times (in 50 tests) that the algorithms selected patterns (variables for RF) that  do not appear in our model.
\label{simglps3}

\par

\begin{center}
\begin{tabular}{c|ccccc}
 \hline
 Methods&$X_{100}$&$X_{200}$&$X_{300}X_{400}$&$X_{150}X_{450}X_{451}$&noise\\\hline
 pLPS3 &47 (50) &   50 (50) &  47 (50,50) &  47 (50,49,48)& 204\\
 Logic & 50 (50) &  50 (50) &  34 (43,44) &  30 (50,44,41)&151\\
 RF&NA (50)&NA (50)&NA (36,40) & NA (49,47,49)&(279)\\
 SPLR &50 (50)& 50 (50)& 45 (49,50) & 50 (50,50,50)&554\\\hline
\end{tabular}
\end{center}

\subsection*{Table 4 - Cancer data: Summary of results from five-fold cross validation}

``Total'' sums the number of selected genes, the number of non-zero coefficients in the model, and the highest order of interactions. AUC indicates the area under the ROC curve.
\label{tabcancersum}
\par

\begin{center}
\begin{tabular*}{0.85\textwidth}{@{\extracolsep{\fill}}c|ccccc}
 \hline
Methods&$\#$ Gene & $\#$ Para& $q$ & Total &AUC\\
\hline
pLPS &9.2&6.6&{\bf 2.0}&{\bf 17.8}&{\bf 0.982}\\
pLPS3&{\bf 8.4}&6.4&3.0&{\bf 17.8}&0.945\\
Logic&14.0&{\bf 5.2}&5.0&24.2&0.956\\
SPLR&17.2&20.6&5.6&43.4&0.962\\
\hline
\end{tabular*}
\end{center}

\subsection*{Table 5 - Breast cancer survival data: Summary of results from five-fold cross validation}

``Total'' sums the number of selected genes, the number of non-zero coefficients, and the highest order of interactions. AUC indicates the area under the ROC curve.
\label{tabbreastsum}
\par
\begin{center}
\begin{tabular*}{0.85\textwidth}{@{\extracolsep{\fill}}c|cccccccc}
 \hline
Methods&$\#$ Gene & $\#$ Para& $q$ & Total & AUC\\
\hline

pLPS&10.0&6.8&{\bf 2.0}&18.8&{\bf 0.824}\\
pLPS3&10.2&6.6&3.0&19.8&0.780\\
Logic&{\bf 4.4}&{\bf 2.6}&3.8&{\bf 10.8}&0.721\\
SPLR&19.4& 20.6& 5.0& 45.0&0.793\\
\hline
\end{tabular*}
\end{center}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%                               %%
%% Additional Files              %%
%%                               %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



}
\end{bmcformat}
\end{document}







