\hlavicka{Tue+Thu}{Feb 2+4}{5}{Direct interconnection networks I+II}

\font\bfsl=cmbxsl10 scaled 2488 %scaled 2074
\def\em{\bfsl}
\huge

\vglue 3cm
\bcen
\btab{|c|}
\hline
{\bf Direct interconnection network\/} (IN)\\[5mm]
{$=$}\\[5mm]
{\bf connected graph}\\
{\bf vertices  = processing nodes}\\
{\bf edges = communication links} \\
\hline
\multicolumn{1}{c}{\relax}\\[1cm]
\hline
{\bf processing node}\\[5mm]
{$=$}\\[5mm]
one or more processors + local memory +communication router 
\\\hline
\etab
\ecen

\newpage
\nadpis{Basic notions and terminology}

\nadpis{Alphabets and strings}
\bdesc\itemsep=5mm
\item[{\em $d$-ary alphabet}]:\quad $\Zs_d=\{0,1,\twodots,d-1\}$ 
\item[{\em $n$-letter $d$-ary strings}]:\quad  $\Zs_d^n=\{x_{n-1}\ldots x_0; x_i\in\Zs_d\}$, $n\geq 1$ \\[5mm]
$\len(x)$ = the length of string $x$\\[5mm]
$\varepsilon$ = the {\em empty\/} string\\[5mm]
$x^i$ = the $i$-{\em fold concatenation} of $x$ 
\item[{\em binary alphabet}] $\Bs$\\[5mm]
$\overline{b_i}=1-b_1$ = the {\em inversion} of bit $b_i$\\[5mm]
$\non_i(b)=b_{n-1}\twodots b_{i+1}\overline{b_i}b_{i-1}\twodots b_0$
\edesc

\newpage
\nadpis{Graph theory}

\bdesc\itemsep=5mm
\item[{\em vertex and edge set}] of graph  $G$ :\quad  $V(G)$, $E(G)$, $N=|V(G)|$ 
\item[{\em adjacent vertices}] $u$ and $v$ = {\em edge} $\la u,v\ra$ 
\item[{\em $H$ = subgraph}] of $G$, $H\subset G$ :\quad $V(H)\subset V(G)$ and $E(H)\subset E(G)$
\item[{\em $H$ = induced subgraph}] of $G$ :\quad {\em maximal\/} subgraph with $V(H)$
\item[{\em $H$ = spanning subgraph}]of $G$ :\quad $V(H)=V(G)$
\item[{\em cartesian product}] $G=G_1\times G_2$ :\\[2mm]
$V(G)=\{(x,y);  x\in V(G_1), y\in V(G_2)\}$\\[2mm]
$E(G)=\{\la (x_1,y),(x_2,y)\ra;  \la x_1,x_2\ra\in E(G_1)\}$\\
\hglue 25mm $\cup \{\la (x,y_1),(x,y_2)\ra;  \la y_1,y_2\ra\in E(G_2)\}$\\[15mm]
\cobrx{160mm}{cartesian}
\newpage
\item[{\em degree of vertex\/}] $u$, $\deg_G(u)$ :\quad \# of neighbors of $u$
\item[{\em degree set of graph\/}] $G$, $\deg(G)$ :\quad  $\{\deg_G(u);  u\in V(G)\}$
\item[{\em maximum degree\/}] of $G$ :\quad $\triangle(G)=\max(\deg(G))$
\item[{\em minimum degree\/}] of $G$ :\quad $\delta(G)=\min(\deg(G))$
\item[{\em $k$-regular graph\/}] $G$ :\quad $\triangle(G)=\delta(G)=k$
\item[{\em path length\/}]\ $\len(P(u,v))$ :\quad  \# of edges in $P(u,v)$
\item[{\em distance\/}] $\dist_G(u,v)$ :\quad the length of a shortest $P(u,v)$
\item[{\em average distance\/}] in $N$-vertex $G$ : \quad $\Sigma_{u,v}\dist_G(u,v)/(N(N-1))$
\item[{\em diameter\/}] of $G$ :\quad $\diam(G)=\max_{u,v}\dist_G(u,v)$
\item[{\em vertex-disjoint paths}]:\quad $V(P_1(u,v))\cap V(P_2(u,v))=\{u,v\}$ 
\item[{\em edge-disjoint paths}]:\quad  $E(P_1(u,v))\cap E(P_2(u,v))=\emptyset$ 
\newpage
\item[{\em (vertex) connectivity\/}] of $G$, $\kappa(G)$ :\\
\hglue 1cm the {\em minimum\/} \# of vertices whose removal disconnects $G$
\item[{\em edge connectivity\/}] of $G$, $\lambda(G)$ :\\
\hglue 1cm the {\em minimum\/} \# of edges whose removal disconnects $G$
\[
\kappa(G)\leq \lambda(G)\leq\delta(G)
\]

\hbox{\obrx{120mm}{connectivity}\quad $\kappa(G)=1$, $\lambda(G)=\delta(G)=2$}

\item[{\em $k$-connected graph}]: \quad $\kappa(G)=k$  
\item[$k$-{\em edge-connected graph}]: \quad $\lambda(G)=k$
\item[{\em optimal connectivity}]:\quad $\kappa(G)=\lambda(G)=\delta(G)$
\item[{\em Menger's theorem}]: {\it Between any 2 vertices of $G$, $\exists$ at least $\kappa(G)$ vertex-disjoint and at least $\lambda(G)$ edge-disjoint paths.}
\item[{\em fault distance\/}] between $u$ and $v$ : the {\em maximum\/} over lengths of all shortest vertex-disjoint paths between $u$ and $v$.
{\itemsep=-2pt
\item{\em fault diameter\/} : maximum over fault distances.}
\newpage
\item[{\em bipartite and balanced bipartite graph}]:\\[5mm]
\cobrx{140mm}{2coloring}
\item[{\em hamiltonian path/cycle\/}] : \\
(closed) path over all vertices (vertex permutation)
\\[1mm] {\it A bipartite graph can have a hamiltonian cycle only if balanced.}
\item[{\em (edge) bisection width\/}] of $G$, $\bw_{\rm e}(G)$ : \\
the smallest \# of edges removal of which splits $G$ into two halves
\item[{\em vertex bisection width\/}] of $G$, $\bw_{\rm v}(G)$ :\\
the smallest \# of vertices removal of which splits $G$ into two halves\\
(of size  at most $\lceil|V|/2\rceil$ vertices)\\[5mm]
\cobrx{140mm}{bisection}
\newpage
\item[{\em isomorphism}]:\hbox{\hglue 4cm\obrx{100mm}{isomorphism}}
\item[{\em automorphism\/}] of $G$ :\quad any isomorphic mapping of $G$ to itself
\item[{\em vertex-symmetric, edge-symmetric graphs}]:\\[5mm] 
\cobrx{160mm}{symmetry}
\item[{\em digraphs = oriented graphs}]:
\bitem
\item {\em arc set\/} $A(G)$ 
\item arc $\la u{\to}v\ra$ is {\em incident from}  $u$ {\em to} $v$  
\item $\indeg_G(u)$ and $\outdeg_G(u)$ :\quad {\em in-degree} and {\em out-degree}
\item dipaths
\item oriented diameters
\item strong connectedness
\eitem
\edesc

\newpage
\nadpis{Requirements on interconnection networks}

\bdesc
\item[{\em small diameter and small average distance}]:
  \bitem
    \item decreases {\it communication latency\/} for 
      \bitem
        \item both {\it distance sensitive\/} routing ({\it store-and-forward}) 
        \item and {\it distance insensitive\/} routing ({\it wormhole routing})
      \eitem
  \eitem

\item[{\em small and fixed vertex degree}]:\\
 low-cost and universal routers $\times$ low connectivity and large distances
\bcen
{\it The diameter of an $N$-node fixed-degree network is $\Omega(\log N)$.} 
\ecen
{\bf Proof:} 
 Given $N$-vertex graph with  $\triangle(G)\leq k$, $k$ is constant,\\[5mm]
 $\Rightarrow$ the \# of vertices reachable in $G$ within $i$ steps is $O(k^i)$\\[5mm]
 $\Rightarrow$ $N=O(k^{\diam(G)})$ \\[5mm]
 $\Rightarrow$ $\diam(G)=\Omega(\log N)$.
\newpage
\item[{\em large bisection width}]: 
  \bitem 
    \item parallel {\it binary divide-and-conquer\/} algorithms need {\it high bandwidth\/} between both halves
      \bitem
        \item split the input into two halves
        \item solve them recursively on both halves of the IN in parallel
        \item merge the results from both halves into the final result
      \eitem
    \item VLSI design: large bisection width $\Rightarrow$ {\it many interchip wires}
  \eitem
\item[{\em high connectivity and fault tolerance}]:
  \bitem
    \item {\it redundant\/} short paths  in case of link or node {\it faults\/} 
    \item {\it bypassing congested parts or hotspots} 
    \item {\it spliting large packets\/} into smaller chunks sent along {\it parallel disjoint\/} paths 
  \eitem
\item[{\em small fault average distance and diameter}]\ 

\newpage

\item[{\em hamiltoniaty}]:\quad labeling processors with numbers $1,\ldots,p$ (sorting)

\item[{\em hierarchical recursivity}]: 
  \bitem
     \item  {\em topology\/} = \\
          \hglue 1cm set of equally defined graphs of different {\em dimensions}      
     \item {\em hierarchically recursive topology} :\\\hglue 1cm
            lower-dim. instances are {\it subgraphs\/} of higher-dim. instances
     \item makes the design and manufacturing of INs easier
     \item allows {\it inductive design\/} and mapping of parallel algorithms
   \eitem

\item[{\em incremental extendability and incremental scalability}]:
   \bitem
     \item {\em extendable\/} topology:\ defined for specific values of $N$
     \item {\em incrementally extendable\/} topology:\ defined for any $N$
     \item {\em incrementally scalable\/} topology:\\ 
     \hglue 1mm $N$-vertex instance $\Longrightarrow$ $(N+k)$-vertex instance needs $O(k)$ work 
   \eitem
\cobrx{110mm}{scalability}

\newpage

\item[{\em symmetry}]: makes parallel algorithms design easier
   \bitem
      \item doesn't matter where the computation starts
      \item doesn't matter in which directions communication evolves
      \item VLSI design
   \eitem 
\item[{\em support for routing and collective communication}]: 
  \bitem 
    \item shortest-path one-to-one routing
    \item permutation routing 
    \item one-to-all communication operations 
    \item all-to-all communication operations
  \eitem

\item[{\em embeddability of and into other topologies}]: 
  \bitem
    \item efficient  mapping of the process graph into the topology
    \item ability to simulate efficiently other topologies
  \eitem

\item[{\em simple VLSI or 3-D layout}]

\edesc

\newpage
\nadpis{Mesh-based (strictly orthogonal) topologies}

\bcen
hypercubes, meshes, and tori
\ecen

\nadpis{Binary hypercube of dimension $n$, $Q_n$}
\vglue 5mm
\begin{center}
\hglue 1mm\raise 2.5cm\hbox{$Q_4$:}\hglue 1cm\obrx{160mm}{cube4}\\[5mm]
\begin{tabular}{lp{1cm}l}
$V(Q_n)=\Bs^n$ & & $|V(Q_n)|=2^n$\\
$E(Q_n)=\{\la x,\non_i(x)\ra;  x\in V(Q_n),0\leq i<n\}$ & & $|E(Q_n)|=n2^{n-1}$\\
$\diam(Q_n)=n$ & & $\deg(Q_n)=\{n\}$\\
$\bw_{\rm e}(Q_n)=2^{n-1}$ & &
\end{tabular}
\end{center}
\newpage
\nadpis{Basic properties of the hypercube}
\bitem
\item regular, balanced bipartite, hamiltonian
\item {\it Hamming distance $\varrho$}
\item the average distance in $Q_n$ is $n/2$
\item logarithmic degree $\Rightarrow$  {\em hypercubic networks}
\item the {\it largest\/} possible bisection width
\item $\lambda(Q_n)=\kappa(Q_n)=n$
\item if $\varrho(u,v)=k$, then $\exists$ $n$ vertex-disjoint paths $P(u,v)$ among which
   \bitem
     \item[$\circ$]  $k$ are of length  $k$ 
     \item[$\circ$]  $n-k$ are of length $k+2$
   \eitem
\item the fault diameter of $Q_n$ is $n$
\item $\exists$ $k!$ different shortest paths between two vertices in distance $k$
\item hierarchical recursivity
\[
Q_n=Q_p\times Q_{n-p} = Q_p\times Q_q\times Q_{n-p-q} = Q^n_1 
\]
\item {\em canonical decomposition} : $Q_n=Q_{n-1}[j=0]\times Q_{n-1}[j=1]$ 
\item {\em subcubes} : $s_{n-1}\ldots s_1s_0$, where $s_i\in\{0,1,{*}\}$, $*$  = don't care \\
( subcubes = terms in boolean algebra) 
\item only extendable $\Rightarrow$ {\em incomplete hypercubes}
\item vertex- and edge-symmetric: $2^n\times n!$ automorphisms
   \bitem
      \item {\em translation} $u\to v$: mapping $\forall x\in V(Q_n)\ (x\mapsto (u\XOR v))$ 
      \item {\em rotation}: a permutation (renaming) of dimensions
   \eitem
\item $e$-{\em cube\/} routing
\item {\it optimal\/} algorithms for collective communication in almost all communication models
\item {\it simulates efficiently almost any other topology}
\eitem

\centerline{\hglue 1mm\raise 3cm\hbox{$Q_6=Q_3\times Q_3$:}\hglue 1cm\obrx{90mm}{q6isq3xq3}}

\newpage
\nadpis{Remarks to the hypercube}
\bitem
\item {\em subcube allocation problem\/} in multiuser hypercube machines:
  \bitem 
     \item users declare numbers of required nodes for their tasks
     \item OS allocates, if possible, subcubes of appropriate sizes
     \item OS releases subcubes upon task completion
     \item {\em fragmentation\/}  and {\em compaction} 
  \eitem
\item testbed for parallel feasibility of problems on distributed memory architectures
\item main drawbacks = logarithmic degree and lack of scalability
\item commercial hypercube MPPs: nCUBE 1 and 2, Intel's iPSC/2 and iPSC/860, TMC's CM-2, SGI Origin
\eitem
\newpage
\nadpis{$n$-dimensional mesh of dimensions $z_1,z_2,\twodots,z_n$, $M(z_1,z_2,\twodots,z_n)$}
\label{meshes}

\begin{center}
\begin{tabular}{l}
$V(M(\ldots))=\{(a_1,a_2,\twodots,a_n) ;  0\leq a_i\leq z_i-1\,\forall i\in\{1,\twodots,n\}\}$\\ 
$E(M(\ldots))=\{\la (\twodots,a_i,\twodots),(\twodots,a_i+1,\twodots)\ra;  0\leq a_i\leq z_i-2\}$\\[2mm]
$|V(M(\ldots))|=\Pi_{i=1}^n z_i$ \qquad $|E(M(\ldots))|=\Sigma_{i=1}^n(z_i-1)\Pi_{j=1\atop j\not=i}^n z_j$\\[2mm]
$\diam(M(\ldots))=\Sigma_{i=1}^n(z_i-1)=\Omega(\sqrt[n]{|V(M(\ldots))|})$\\[3mm]
$\deg(M(\ldots))=\{n,\twodots,n+j\}$, $j=|\{z_i;  z_i>2\}|$ \\[3mm]
$\bw_{\rm e}(M(\ldots))=\cases{(\Pi_{i=1}^n z_i)/\max_iz_i &if $\max_iz_i$ is even,\cr
                           \Omega((\Pi_{i=1}^n z_i)/\max_iz_i) &otherwise.\cr}$\\[10mm]
\raise 3cm\hbox{Mesh $M(3,3,4)$:}\quad \obrx{95mm}{mesh334}
\end{tabular}
\end{center}

\newpage
\nadpis{Basic properties of the mesh}
\bitem
\item we assume that the mesh dimension $n$ is {\it fixed}
\item the most practical meshes are {\it 2-D\/} and {\it 3-D\/}
\item 1-D meshes = {\em linear arrays\/} (incrementally scalable)
\item $M(k,k,\twodots,k)$ = $k$-{\em ary} $n$-{\em cube}
\item bipartite (not necessarily balanced)
\item hamiltonian if at least one side has {\it even\/} length
\item a hamiltonian path always exists
\item not  regular $\Rightarrow$  not vertex symmetric
\item large diameter for small $n$
\item the {\it exact\/} value of bisection width is open problem for $n\geq 4$
\item easy for 2- or 3-D meshes (very low = perimeter problem) \\
\hglue 5cm$\bw_{\rm e}(M(11,9,6))=64$
\item optimal connectivity
\item optimal fault diameter
\item fault distances = by at most 4 larger than nonfaulty ones
\newpage
\item hierarchical recursiveness\\
$M(z_1,z_2,\twodots,z_k)=M(z_1,z_2,\allowbreak\twodots,\allowbreak z_{k-1})\times M(z_k)=$\\
\phantom{$M(z_1,z_2,\twodots,z_k)=$}$M(z_1,\twodots,z_i)\times M(z_{i+1},\twodots,z_j)\times M(z_{j+1},\twodots,z_k)=$\\
\phantom{$M(z_1,z_2,\twodots,z_k)=$}$M(z_1)\times M(z_2)\times\cdots\times M(z_n)$. 
\item {\em submeshes} = strings with don't-care symbols
\item incremental extendability, but not  scalability 
\item the basic shortest-path routing = {\em dimension ordered\/} routing\\
2- and 3-D meshes : {\em XY\/} and {\em XYZ routing}
\item $\exists$ many communication and parallel algorithms {\it matching\/} the mesh {\it lower bounds} (large diameter, low bisection width)
\item modifications: {\em reconfigurable meshes, meshes of buses}
\eitem

\nadpis{Remarks on meshes}
\bitem
\item commercial mesh-based parallel computers:  Intel's Paragon (2-D mesh), MIT J-Machine (3-D mesh), transputers (2-D mesh) 
\item {\it submesh\/} allocation, fragmentation and  compaction problem
\eitem


\newpage
\nadpis{$n$-dimensional torus of dimensions $z_1,z_2,\twodots,z_n$, $T(z_1,z_2,\twodots,z_n)$}

\begin{center}
\begin{tabular}{l}
\multicolumn{1}{c}{torus = {\em toroidal\/} or {\em wrapped mesh\/}}\\
$V(T(\ldots))=V(M(\ldots))$\\[1mm]
$E(T(\ldots))=\{\la (\twodots,a_i,\twodots),(\twodots,a_i\oplus_{z_i}1,\twodots)\ra;  0\leq a_i<z_i\}$\\[1mm]
$|E(T(\ldots))|=n\times \Pi_{i=1}^n z_i$\\[1mm]
$\diam(T(\ldots))=\Sigma_{i=1}^n\left\lfloor {z_i/2}\right\rfloor$\\[1mm]
$\deg(T(\ldots))=\{2n\}$\\[1mm]
$\bw_{\rm e}(T(\ldots))=2\bw_{\rm e}(M(\ldots))$ \\[1cm]
\raise 3cm\hbox{Torus $T(3,3,4)$:}\quad \obrx{110mm}{torus334}
\end{tabular}
\end{center}


\newpage
\nadpis{Basic properties of the torus}
\bitem
\item a 1-dimensional torus = {\em cycle\/} or {\em ring}
\item $T(k,k,\twodots,k)$ = $k$-{\em ary} $n$-{\em torus}
\item bipartite iff all side lengths are even (bipartite $\Rightarrow$ balanced)
\item hamiltonian
\item regular and vertex symmetric (automorphisms = translations) 
\item $k$-ary $n$-tori are edge symmetric (automorphisms = rotations)
\item diameter/average distance drop to 1/2  w.r.t. equal mesh
\item connectivity/bisection width double w.r.t. equal mesh
\item hierarchically recursive, up to cartesian product of cycles
\item scalability properties are even slightly worse comapred to meshes
\item {\it dimension ordered\/} shortest-path routing (cycles make it more complicated)
\item topologically optimal algorithms exist for many fundamental problems on tori
\item commercial MPPs:  Cray/SGI T3D and T3E, Convex Exemplar (3-D), Intel/CMU iWarp (2-D), KSR (1-D)
\eitem

\newpage
\nadpis{Comparisons of hypercubes, meshes, and tori}

\bitem
\item $M(2,2,\twodots,2)$ is isomorphic $Q_n$
\item $n$-dimensional meshes and tori are generalizations of $Q_n$
\item For some $k$ and $n$, $k<n$, a $k$-dimensional mesh/torus can be a subgraph of $Q_n$.
\eitem

\centerline{\hglue 1mm\raise 4cm\hbox{$T(4,4)= Q_4$:}\quad \obrx{80mm}{torus4x4}}

\newpage

\nadpis{Tradeoff between diameter, number of edges, and bisection width} 
\vglue 1cm
$M(8,8,4)\subset T(8,8,4)\subset Q_8$ and for all of them, $|V()|=256$.
\vglue 2cm
\bcen
\tabcolsep=1.5mm
\begin{tabular}{|c|c|c|c|}
\cline{2-4}
\multicolumn{1}{c|}{} & $M(8,8,4)$ & $T(8,8,4)$ & $Q_8$\\
\hline
$\diam()$ & 17 & 10 & 8 \\
\hline
$|E()|$ & 640 & 768 & 1024 \\
\hline
$\bw_{\rm e}()$ & 32 & 64 & 128 \\
\hline
\end{tabular}
\ecen

\newpage
\nadpis{Hypercubic topologies}

\bitem
\item logarithmic-diameter constant-degree derivatives of the hypercube
\item worse extendability: the \# of vertices is $n2^n$ or similar
\item bisection width $\Omega(N/\log N)$
\item two main representants: {\it cube-connected cycles} and {\it butterflies}
\eitem

\newpage
\nadpis{Cube-connected cycles of dimension $n$, $\CCC_n$}


\begin{center}
\begin{tabular}{l}
$V(\CCC_n)=\{(i,x) ;  0\leq i<n \wedge x\in\Bs^n\}$\\
$E(\CCC_n)=\{\la(i,x),(i\oplus_n1,x)\ra,\la(i,x),(i,\non_i(x))\ra;  (i,x)\in V(\CCC_n)\}$\\
 $|V(\CCC_n)|=n2^n$\\
 $|E(\CCC_n)|=n2^{n-1}+n2^n$\\
$\diam(\CCC_n)=(2n-2)+\lfB n/2\rfB$ for $n>3$, $\diam(\CCC_3)=6$\\
$\deg(\CCC_n)=\{3\}$\\
$\bw_{\rm e}(\CCC_n)=2^{n-1}$\\
\end{tabular}
\end{center}

\centerline{\hglue 1mm\raise 4cm\hbox{Cube Connected Cycles $\CCC_3$:}\obry{100mm}{ccc3}}


\newpage
\nadpis{Basic properties of $\CCC$}
\bitem
\item $\CCC_n$ is balanced bipartite iff $n$ is even
\item regular, vertex-symmetric, hamiltonian
\item not edge-symmetric ({\em hypercube\/} and {\em cycle\/} edges)
\item optimal connectivity 3 (between two  vertices, $\exists$ at most two shortest paths)
\item shortest-path routing : based on $e$-cube routing
\item not hierarchically recursive
\eitem

\nadpis{Another view of $\CCC$}

\cobrx{.9\textwidth}{flatccc3}


\newpage
\nadpis{Wrapped butterfly of dimension $n$, $\wBF_n$}

\begin{center}
\begin{tabular}{l}
$V(\wBF_n)=\{(i,x);  0\leq i<n \wedge x\in\Bs^n\}$\\
$E(\wBF_n)=\{\la (i,x),(i\oplus_n1,x)\ra, \la(i,x),(i\oplus_n1,\non_i(x))\ra\mid(i,x)\in V(\wBF_n)\}$\\
 $|V(\wBF_n)|=n2^n$\\
$|E(\wBF_n)|=n2^{n+1}$\\
$\diam(\wBF_n)=n+\lfB n\over 2\rfB$\\
 $\deg(\wBF_n)=\{4\}$\\
$\bw_{\rm e}(\wBF_n)=2^n$
\end{tabular}
\end{center}

\cobrx{.9\textwidth}{flatbf3}

\noindent Basically the same properties as $\CCC$, except that:  more edges, larger bisection width, and lower diameter.

\newpage
\nadpis{Ordinary butterfly of dimension $n$, $\oBF_n$}

\begin{center}
\begin{tabular}{l}
$V(\oBF_n)=\{(i,x);  0\leq i\leq n \wedge x\in\Bs^n\}$\\
$E(\oBF_n)=\{\la(i,x),(i+1,x)\ra, \la(i,x),(i+1,\non_i(x))\ra\mid i<n\}$\\
$|V(\oBF_n)|=(n+1)2^n$\\
$|E(\oBF_n)|=n2^{n+1}$\\
$\diam(\oBF_n)=2n$\\
$\deg(\oBF_n)=\{2,4\}$\\
$\bw_{\rm e}(\oBF_n)=2^n$\\
\end{tabular}
\end{center}


\centerline{\hglue 3cm\raise 3cm\hbox{$\oBF_3$:}\hglue 1cm\obrx{140mm}{bf3}}

\newpage
\nadpis{Basic properties of the ordinary butterfly}
\bitem
\item $\oBF_n$ is organized into {\em columns} ({\em stages}) $0\leq i\leq n$ and {\em rows} $0\leq x\leq 2^n-1$
\item two kinds of edges: {\em straight\/} and {\em cross\/} ({\em hypercube}) edges 
\item not vertex symmetric and not regular
\item not hamiltonian
\item hierarchically recursive: $\oBF_n$ contains two $\oBF_{n-1}$ as subgraphs
\item optimal connectivity 2
\item $\exists$ only one shortest path between $(0,x)$ and $(n,y)$ (= $e$-cube routing)
\eitem

\newpage
\nadpis{Tree-based topologies}

\bitem
\item {\em fat tree}
\item {\em pyramidal machines}
\item {\em  mesh of trees} 
\eitem

\newpage
\nadpis{2-D mesh of trees of height $n$, $\MT_n$}

For integer $n\geq 1$, let $N=2^n$.

\begin{tabular}{ll}
\multicolumn{2}{l}{$V(\MT_n)=(\Bs^n\times\cup_{i=0}^n\Bs^i)\cup(\cup_{i=0}^n\Bs^i\times\Bs^n)$}\\[1mm]
\multicolumn{2}{l}{$E(\MT_n)=\{\la(x,y),(xa,y)\ra;  \len(x){<} n\wedge a\in\Bs\}$}\\[1mm]
\multicolumn{2}{l}{\phantom{$E(\MT_n)=$}$\cup \{\la(x,y),(x,ya)\ra;  \len(y)< n\wedge a\in\Bs\}$}\\[1mm]
$|V(\MT_n)|=3N^2-2N$ & $|E(\MT_n)|=4N(N-1)$\\[1mm]
$\deg(\MT_n)=\{2,3\}$ & $\diam(\MT_n)=4\log N=4n$\\[1mm]
$\bw_{\rm e}(\MT_n)=N=\Theta(\sqrt{|V(\MT_n)|})$ & 
\end{tabular}
\vfill
\centerline{\hglue 1mm\raise 4cm\hbox{Mesh-of-trees $\MT_2$:}\hglue 1cm\obrx{120mm}{meshtree2}}

\newpage
$\MT$ can be defined for any 2-D mesh $M(2^k,2^l)$ and for higher dimensions. We will discuss only the 2-D square mesh case.
\vglue 5mm
\nadpis{Basic properties of the mesh-of-trees topology}

\bitem
\item $\MT_n$ is a hybrid topology: it is a 2-D mesh $M(N,N)$ whose linear rows and columns are replaced with {\em row\/} and {\em column\/} complete binary {\em trees} of height $n$, $\CBT_n$ 
\item $M(N,N)$ has {\it large diameter\/} $O(N)$. $\CBT_n$ has extremely small {\it connectivity\/} and {\it bisection width} equal to 1. $\MT_n$  has ($\log N$)-diameter of $\CBT_n$ and $\sqrt{N}$-bisection width of $M(N,N)$. 
\item mesh vertices = {\it leaves\/} in intersections of row and column trees
\item the additional vertices = the internal vertices of the trees  
\item $\MT_n\subset\CBT_n\times \CBT_n$
\item bipartite, but {\it not\/} balanced, therefore it is {\it not\/} hamiltonian
\item hierarchically recursive (it forms a cluster of disjoint submeshes-of-trees  connected via row- or column-tree roots) 
\item neither regular neither vertex symmetric
\item poor extendability
\newpage
\item simple shortest-path routing, it alternates routing in 2-D meshes and trees. Assume source vertex $u$ and destination vertex $v$.
 \benum
    \item If $u$ is in a {\it row\/} tree and $v$ in a {\it column\/} tree, the shortest path from $u$ to $v$ is uniquely defined via the intersection leaf of both trees.
    \item If $u$ and $v$ are in two {\it different row\/} trees $T_u$ and $T_v$, 
      \benum
        \item compare the level of $u$ and $v$ in their trees. 
        \item if $u$ is lower than $v$, then (see the figure bellow) 
          \benum
            \item go down in $T_u$ to any leaf in the subtree
            \item  use the corresponding column tree to get to $T_v$, 
            \item use  tree routing in $T_v$ to get to $v$ by passing through the root of the 
                   smallest common subtree of $v$ and of the homothetic image 
                   $\overline{u}$ of $u$ in $T_v$
          \eenum
        \item If $u$ is higher than $v$, then 
          \benum
            \item go first to the image of $v$ in $T_u$
            \item via any leaf get to $T_v$ then (see the figure with vertices $u$ and $v$ exchanged).
          \eenum
       \eenum
           The number of different shortest paths depends on the heights of the vertices.
    \item The routing between two internal nodes of column trees is similar.
  \eenum
\item $\MT$ is optimal for parallel solving of many problems similarly to the hypercube.
\item No commercial machine has been  built upon this topology yet. 
\eitem

\cobrx{.95\textwidth}{mtrouting}

\newpage
\nadpis{Shuffle-based topologies}

\bitem
\item {\it orthogonal\/} and {\it hypercubic\/} topologies: the adjacency  is based on {\it incrementing\/} and/or {\it inverting\/} coordinates. 
\item {\it shuffle\/} topologies:  the adjacency is based on {\em shifting\/} the whole strings. {\it Rich\/} and {\it dence\/}, but also rather {\it complicated\/} topologies. 
\eitem

\nadpis{Common properties of shuffle-based topologies}

\bitem
\item {\it logarithmic\/} diameter and {\it constant\/} vertex degree 
\item {\it optimal\/} or nearly optimal connectivity
\item fault diameter {\it nearly\/} the same as non faulty one, similarly for fault average distances
\item {\it large\/} bisection width of order $N/\log N$
\item {\it simple\/} shortest-path routing
\item they are {\it neither\/} symmetric {\it nor\/} hierarchically recursive
\item they are rich of {\it cycles\/} of various lengths, since the rotation induces equivalence classes of vertices called {\em necklaces}
\item at the same time, they are rich of {\it subtrees\/} and spanning trees, since the general shift operation induces subtrees 
\eitem

\newpage
\nadpis{Binary shuffle-exchange}

\bitem 
\item {\em shuffle\/} of binary string $x$, $\sigma(x)$ = {\em left rotation by one bit}. 
\item {\em exchange\/} operation on $x$ = {\em inversion\/} of $LSB(x)$ 
\eitem

\begin{center}
\begin{tabular}{l}
$V(\SE_n)=\Bs^n$\\[1mm]
$A(\SE_n)=\{\la x{\rightarrow}\non_0(x)\ra;  x\in\Bs^n\}\cup\{\la x{\rightarrow}\sigma(x)\ra;  x\in\Bs^n\}$\\[1mm]
$|V(\SE_n)|=2^n$\\[1mm]
$|A(\SE_n)|=2^{n+1}$\\[1mm]
$\indeg(\SE_n)=\outdeg(\SE_n)=\{2\}$\\[1mm]
$\diam(\SE_n)=2n-1$\\[1mm]
$\bw_{\rm e}(\SE_n)=\Theta(\frac{2^n}{n})$
\end{tabular}
\end{center}
\vfill
\centerline{\hglue 1mm\raise 2cm\hbox{Digraph $\SE_3$:}\hglue 1cm\obrx{160mm}{SE3}}


\newpage
\nadpis{Basic properties of binary shuffle-exchange topology}
\bitem
\item  $\la x{\rightarrow}\sigma(x)\ra$ = {\em shuffle arc}, $\la x{\rightarrow}\non_0(x)\ra$ = {\em exchange arc}.
\item $d_{\SE}(0^n,1^n)=2n-1=\diam(\SE_n)$
\item $d_{\SE}(u,v)$: (1) find the longest substring $\alpha$ s.t. $u=u'\alpha$ and $v=\alpha v'$, (2) convert prefix $u'$ into postfix $v'$ by shuffling and/or exchanging
\item 2-regular digraph
\item Necklace with vertex $u$ has length $i$ =  {\it periodicity\/} of $u$.\\
For example, if $|\mbox{necklace}(111010)|=6$ and $|\mbox{necklace}(101101)|=3$.
\item due to necklaces, $\SE_n$ does not contain as subgraphs any $\SE_i$, $i<n$. 
\item Also {\it not\/} vertex symmetric
\item The exact value of $\bw_{\rm e}(\SE_n)$ is  known up to a small constant multiplicative factor.
\item Undirected $\SE$ is similar: Strings can be rotated to the right by {\em unshuffle operation\/} $\sigma^{-1}(x)$. The diameter, bisection width, and necklaces remain the same. Average distance drops. For example, $u=1011101010$ and $v=1000010111$, the unoriented distance is only 8 instead of 13.
\eitem

\newpage
\nadpis{De Bruijn digraph of degree $d$ and dimension $n$, $\dB_{d,n}$}

\bitem
\item adjacency operation = general {\em left shift} by 1 position:\\
 $\lambda(x,a)=x_{n-2}\twodots x_0a$
\eitem


\begin{center}
\begin{tabular}{lp{1cm}l}
$V(\dB_{d,n})=\Zs_d^n$ & & $|V(\dB_{d,n})|=d^n$\\
$E(\dB_{d,n})=\{\la x{\rightarrow}\lambda(x,a)\ra;  x\in\Zs_d^n,a\in\Zs_d\}$ & & $|E(\dB_{d,n})|=d^{n+1}$\\
$\indeg(\dB_{d,n})=\outdeg(\dB_{d,n})=\{d\}$ & & $\diam(\dB_{d,n})=n$\\
$\bw_{\rm e}(\dB_{d,n})=\Theta(d^n/n)$ & &
\end{tabular}
\end{center}
\vfill
\cobrx{250mm}{dB23}


\newpage
\nadpis{Basic properties of de Bruijn topology}
\bitem
\item vertex $u$ is adjacent to $v$ if the {\it first\/} $n-1$ letters of $v$ equal to the {\it last\/} $n-1$ letters of $u$ 
\item $d$-regular
\item $\dB_{2,n}$ is the {\it state diagram\/} of a shift register
\item {\it extremely low\/} diameter: Any vertex can be obtained from another one by at most $n$ left shifts, hence the diameter. $\dB_{2,n}$ has the same diameter as $Q_n$ with 4 arcs per vertex instead of $\log N$
\item BUT: the {\em average distance\/} is very {\it close\/} to the diameter (in contrast to mesh-based topologies) 
\item the structure of necklaces depends on periodicity of strings, similarly to shuffle-exchange topology
\item no recursivity. BUT: $\dB_{d,n}$ is decomposable into {\it isomorphic universal building blocks\/} (project Galileo at JPL, $\dB_{2,16}$ machine)
\item no hierarchical recursivity. BUT: $\dB_{d,n}$ is the {\em line digraph\/} of $\dB_{d,n-1}$, i.e., if we replace all arcs in $\dB_{d,n-1}$ with vertices and preserve adjacency, we get $\dB_{d,n}$.  It can be used for optimal {\it simulations\/} of large de Bruijn networks on small ones 
\item the connectivity is {\em nearly\/} optimal: $\kappa(\dB_{d,n})=\lambda(\dB_{d,n})=d-1$ (loops at vertices $a^n$)
\item the fault diameter of $\dB_{d,n}$ is only $n+1$
\item the exact value of $\bw_{\rm e}(\dB_{d,n})$ is known up to a small constant multiplicative factor
\item no incremental extendability or scalability. BUT:  there exist incrementally extendable {\em generalized\/} de Bruijn networks and incrementally scalable {\em partial line digraphs\/}
\eitem