\hlavicka{Tue+Thu}{Feb 2+4}{5}{Direct interconnection networks I+II}

A {\em direct interconnection network\/} (IN) of a multiprocessor system is represented by a {\em connected graph\/} whose vertices represent {\em processing nodes\/} and edges represent {\em communication links}. A processing node (PN) usually consists of one or more processors, local memory, and communication router. This section is devoted to the description and analysis of topologies and properties of important INs.

\section{Basic notions and terminology}

\subsection{Alphabets and strings}

\bdesc
\item[{\em $d$-ary alphabet}] is denoted by $\Zs_d=\{0,1,\twodots,d-1\}$. The operation $+$ modulo $n$ is denoted by $\oplus_n$.
\item[{\em $n$-letter $d$-ary strings}:] $\Zs_d^n=\{x_{n-1}\ldots x_0; x_i\in\Zs_d\}$, $n\geq 1$. The length of string $x$ is denoted by $\len(x)$. The {\em empty\/} string (i.e., of length 0) is denoted by $\varepsilon$. The $i$-{\em fold concatenation} of string $x$ is denoted by $x^i$.
\item[{\em Binary alphabet}] is denoted by $\Bs$. The {\em inversion} of bit $b_i$ is $\overline{b_i}=1-b_1$. If $b=b_{n-1}\twodots b_{i+1}b_ib_{i-1}\twodots b_0\in \Bs^n$, then $\non_i(b)=b_{n-1}\twodots b_{i+1}\overline{b_i}b_{i-1}\twodots b_0$.
\edesc

\subsection{Graph theory}

\bdesc
\item[{\em Vertex and edge set}] of graph  $G$ is denoted by $V(G)$ and $E(G)$, respectively. Two {\em adjacent\/} vertices $u$ and $v$ form edge $\la u,v\ra$. They are {\em incident\/} with the edge. Edges $\la u,v\ra$ and $\la v,w\ra$ are {\em adjacent}.
\item[{\em $H$ = subgraph}] of $G$, $H\subset G$, if $V(H)\subset V(G)$ and $E(H)\subset E(G)$.
\item[{\em $H$ = induced subgraph}] of $G$ if it is a {\em maximal\/} subgraph of $G$ with vertices $V(H)$.
\item[{\em $H$ = spanning subgraph}]of $G$ if $V(H)=V(G)$.
\item[{\em Union}] of two disjoint graphs $G_1$ and $G_2$, $G_1\cup G_2$, is a graph with vertices $V(G_1)\cup V(G_2)$ and edges $E(G_1)\cup E(G_2)$.
\item[{\em Cartesian product}] of $G_1$ and $G_2$ is graph $G=G_1\times G_2$ with $V(G)=\{(x,y);  x\in V(G_1), y\in V(G_2)\}$ and $E(G)=\{\la (x_1,y),(x_2,y)\ra;  \la x_1,x_2\ra\in E(G_1)\}\cup \{\la (x,y_1),(x,y_2)\ra;  \la y_1,y_2\ra\in E(G_2)\}$. See an example bellow.
\begin{figure}[ht]
\cobrx{60mm}{cartesian}
\caption{An example of a cartesian product}
\end{figure}
\item[{\em Degree of vertex}] $u$, $\deg_G(u)$, is the number of neighbors of $u$.
\item[{\em Degree set of graph}] $G$, $\deg(G)$, is the set $\{\deg_G(u);  u\in V(G)\}$.
\item[{\em Maximum degree}] of $G$ is $\triangle(G)=\max(\deg(G))$.
\item[{\em Minimum degree}] of $G$ is $\delta(G)=\min(\deg(G))$.
\item[{\em $k$-regular graph}] $G$ has $\triangle(G)=\delta(G)=k$.
\item[{\em Connected graph}:] Every pair $u$, $v$ of vertices is joined by a {\em path} $P(u,v)$, a sequence of adjacent edges.
\item[{\em Path length}] $\len(P(u,v))$, is the number of edges $P(u,v)$.
\item[{\em Distance}] between vertices $u$ and $v$, $\dist_G(u,v)$, is the length of a shortest path joining $u$ and $v$.
\item[{\em Average distance}] in $G$, $\dist_G(u,v)$, is $\Sigma_{u,v}\dist_G(u,v)/(N(N-1))$.
\item[{\em Diameter}] of $G$, $\diam(G)$, is the maximum distance between any two vertices of $G$.
\item[{\em Cycle}] is a closed path.
\item[{\em Vertex-disjoint paths}] $P_1(u,v)$ and $P_2(u,v)$ have no vertices in common except for $u$ and $v$
\item[{\em Edge-disjoint paths}] $P_1(u,v)$ and $P_2(u,v)$ have no edges in common.
\item[{\em Connectivity}:] {\em (Vertex) connectivity\/} of $G$, $\kappa(G)$, is the {\em minimum\/} number of vertices whose removal results in a disconnected graph. {\em Edge connectivity\/} of $G$, $\lambda(G)$, is the {\em minimum\/} number of edges whose removal results in a disconnected graph. It follows that
\[
\kappa(G)\leq \lambda(G)\leq\delta(G).
\]
\begin{figure}[ht]
\cobrx{60mm}{connectivity}
\caption{Graph $G$ with $\kappa(G)=1$ and $\lambda(G)=\delta(G)=2$.}
\label{connectivity}
\end{figure}
\item[{\em $k$-connected graph}] has $\kappa(G)=k$. Similarly, $k$-{\em edge-connected\/} graph has $\lambda(G)=k$.
\item[{\em Optimal connectivity}:] $\kappa(G)=\lambda(G)=\delta(G)$.
\item[{\em Menger's theorem}:] {\it Between any two distinct vertices of $G$, there are at least $\kappa(G)$ vertex-disjoint paths and at least $\lambda(G)$ edge-disjoint paths}.
\item[{\em Fault diameter}] of a connected graph $G$ is the maximum over the lengths of all the shortest vertex-disjoint paths between any two vertices in $G$. Similarly, we can define the {\em fault distance\/} between any two vertices.
\item[{\em Bipartite graph}:] There exists a {\em bipartition\/} (or 2-{\em coloring\/}) of its vertex set $V$, which is a partitioning of $V$ into two disjoint subsets $V_1$ and $V_2$ such that each edge of $G$ has one vertex in $V_1$ and the other vertex in $V_2$.
\item[{\em Balanced bipartite graph}] has a bipartition $(V_1,V_2)$ with $|V_1|=|V_2|$.\\[2mm]
\begin{figure}[ht]
\cobrx{80mm}{2coloring}
\caption{Examples of bipartite graphs, the first one is not balanced}
\end{figure}
\item[{\em Hamiltonian path/cycle}] of a connected $G$ is a path/cycle joining all vertices of $G$. A graph having a hamiltonian cycle is {\em hamiltonian}. {\it A bipartite graph can have a hamiltonian cycle only if it is balanced}.
\item[{\em (Edge) bisection width}] of $G$, $\bw_{\rm e}(G)$, is the smallest number of edges removal of which divides $G$ into two parts of equal size (up to one vertex).
\item[{\em Vertex bisection width}] of $G$, $\bw_{\rm v}(G)$, is the smallest number of vertices removal of which divides $G$ into two parts having at most $\lceil|V|/2\rceil$ vertices each. To construct a minimal bisection is an NP-complete problem.\\[5mm]
\begin{figure}[ht]
\cobrx{80mm}{bisection}
\caption{An example of a graph with both edge and vertex bisection width 3}
\end{figure}
\item[{\em Isomorphism}:] Two graphs are isomorphic if they can be made identical by relabeling vertices.
\item[{\em Automorphism}] of $G$ is any isomorphic mapping of $G$ to itself.
\item[{\em Vertex-symmetric}] graph looks the same independently of from which {\em vertex\/} you look at it, i.e., for any two distinct vertices $u$ and $v$, there is an automorphism of the graph sending $u$ to $v$.
\item[{\em Edge-symmetric}] graph looks the same independently of from which {\em edge\/} you look at it, i.e., for any two distinct edges $e_1$ and $e_2$ in $E(G)$, there is an automorphism of $G$ sending $e_1$ to $e_2$.

Graph $G$ on Figure \ref{connectivity} is edge symmetric, but it is not vertex symmetric. It becomes vertex symmetric if the vertices 1 and 3 are merged. A vertex symmetric graph must be {\em regular}.
\item[{\em Digraphs = Oriented graphs}:] A digraph $G$ has {\em arc set\/}  $A(G)$. Arc $\la u{\to}v\ra$ is {\em incident from}  $u$ {\em to} $v$ $u,v\in V(G)$. Vertex $u$ ($v$) is {\em incident from\/} ({\em to\/}) arc $\la u{\to}v\ra$, respectively. Arcs $\la u{\to}v\ra$ and $\la v{\to}w\ra$ are {\em adjacent}. The {\em in-degree} and {\em out-degree} of $u\in V(G)$, denoted by $\indeg_G(u)$ and $\outdeg_G(u)$, is the number of arcs incident to and incident from $u$, respectively. The other notions are derived similarly, digraphs have oriented paths (dipaths), oriented diameters, strong connectedness, and strong connectivity.
\edesc

While describing INs, we will often use terms {\em nodes\/} and {\em links} (or {\em channels}) instead of vertices and edges,  terms {\em input\/} ({\em output\/}) links or channels, respectively, instead of arcs incident from (incident to) a vertex. We will often use the terms vertices/nodes or edges/links interchangeably.

All logarithms are {\em binary\/} if not stated otherwise.

\section{Requirements on interconnection networks}

An IN  should transfer a {\em maximum\/} number of messages in the {\em shortest time\/} with {\em minimum cost\/} and {\em maximal reliability}. Clearly, any design of an IN is a tradeoff of various {\em contradictory requirements\/}. The most important  requirements are the following:

\bdesc
\item[{\em Small diameter and small average distance}:] Small average distance allows small communication {\em latency}, especially for {\em distance sensitive\/} routing, such as {\em store-and-forward}. But it is also crucial for {\em distance insensitive\/} routing, such as {\em wormhole routing}, since short distances imply less used links and buffers, and therefore less communication {\em contention}.

\item[{\em Small and fixed vertex degree}:] Small and constant vertex degree allows simple and low-cost universal (i.e., independent of the network size) routers which amortizes the design costs. On the other hand, it implies less links and lower connectivity and larger distances. Ideally, we would like to get {\em low constant vertex degree\/} and at most {\em logarithmic diameter\/} simultaneously. Given $N$-vertex graph such that  $\triangle(G)\leq k$ for some constant $k$, the number of vertices reachable in $G$ within $i$ steps from any vertex is $O(k^i)$. Hence, $N=O(k^{\diam(G)})$, which is equivalent to $\diam(G)=\Omega(\log N)$. {\it Fixed-degree networks cannot have better than logarithmic diameter}.

\item[{\em Large bisection width}:] Many problems can be solved in parallel using {\em binary divide-and-conquer\/}: split the input data set into two halves and solve them recursively on both halves of the IN in parallel, and then merge the results in both halves into the final result. Small bisection width implies {\em low bandwidth\/} between both halves and it can slowdown the final merging phase. On the other hand, a large bisection width is undesirable for a VLSI design of the IN, since it implies a lot of {\em extra chip wires}.

\item[{\em High connectivity and fault tolerance}:] The network should provide {\em alternative paths\/} for delivering messages in case of link or node {\em faults\/} or {\em communication congestions}. {\em Large packets\/} can be delivered faster if they can be {\em split\/} into smaller chunks sent along disjoint paths. 

\item[{\em Small fault average distance and diameter}:] To have these alternative or parallel disjoint paths as short as possible, small fault average distances and small fault diameter are naturally desirable.

\item[{\em Hamiltoniaty}:] The existence of at least a hamiltonian path is not the most important requirement, but it is useful whenever we need to label processors with numbers $1,\ldots,p$ so that adjacent processors get successive numbers (for example in sorting algorithms).

\item[{\em Hierarchical recursivity}:] INs are usually defined using some independent parameters, called {\em dimensions}. The set of all graphs of different dimensions based on a given definition forms a {\em topology\/}. A topology is {\em hierarchically recursive\/} if a higher-dimensional graph contains lower-dimensional graphs with the same topology as subgraphs. Many INs used in parallel systems are hierarchically recursive, topologies based on cartesian product are one example. Recursiveness makes the design and technology of manufacturing INs easier. Large scale problems that can be solved in parallel by recursive decomposition into smaller subproblems can be easily and efficiently mapped on the hierarchically recursive topologies using induction.

\item[{\em Incremental extendability and incremental scalability}:] If the definition of the topology allows graphs of any size, the topology is said to be {\em incrementally extendable}. There are incrementally extendable topologies, and some are at least partially extendable, since they allow size granularity only greater than 1. Some hierarchically recursive topologies allow graphs of specific discrete sizes, such as powers of two. If a topology is incrementally extendable, a very important question is how the structure of an instance of size $n$ differs from the structure of an instance of size $n+k$ for some integer constant $k\geq1$. An $(n+k)$-vertex instance can be obtained from a $n$-vertex one by removing $r(k)$ edges (to get a subgraph of the larger instance) and by adding additional vertices and corresponding edges to this subgraph. If $r(k)=O(k)$, the topology is said {\em incrementally scalable}. Very few topologies are incrementally scalable. For example, 2-D meshes are incrementally extendable, but not scalable.

\item[{\em Symmetry}:] This is a very important requirement. Many IN topologies are vertex or edge symmetric. Intuitively, a symmetric network is easy to understand and the design of parallel and communication algorithms is very much easier, since it is irrelevant where the computation and/or communication starts or in which directions it will evolve. Also, the symmetry is helpful for solving issues related to VLSI design.

\item[{\em Support for routing and collective communication}:] This is a crucially important property and we will devote it several lectures. The network topology should enable an simple shortest path routing, so that the basic routing algorithm could be implemented in hardware. Equally important parameters of an IN are complexities of {\em permutation routing} and {\em one-to-all\/} and {\em all-to-all\/} communication operations. The design of efficient communication algorithms is very simplified if the  topology is symmetric and it also depends on the communication technology and router architecture.

\item[{\em Embeddability of and into other topologies}:] The efficiency of a parallel algorithm running on a parallel machine depends on the similarity between the process graph and the underlying physical IN topology. We need a suitable mapping or {\em embedding\/} of the process graph into the topology. Desirable are those topologies which are able to simulate efficiently other topologies.

\item[{\em Simple VLSI or 3-D layout}:] Any VLSI implementation of an IN implies VLSI-related requirements, such as easy mapping to rectangular grid, decomposition of a large IN into building blocks so that the lengths and numbers of inter-chip wires is minimal (recall the bisection width above). Except for the bisection width, the discussion of VLSI design issues is beyond the scope of this course.

\edesc

In the rest of the section, we give a survey of definitions and properties of several well-known topologies for INs. Some topologies are understood better than some others so that the description of properties will vary for various topologies.

\section{Mesh-based topologies}

This family of topologies, also called {\em strictly orthogonal\/}, include hypercubes, meshes, and tori.
These are the most important interconnection topologies due to their simplicity and they are also the best understood topologies.

\subsection{Binary hypercube of dimension $n$, $Q_n$}

The $n$-{\em dimensional binary hypercube\/} (or $n$-{\em cube\/}) is a graph with $2^n$ vertices labeled by $n$-bit binary strings, with edges joining two vertices whenever their labels differ in a single bit (see Figure \ref{q4}).

\begin{figure}[ht]
\cobrx{80mm}{cube4}
\caption{Hypercube $Q_4$}
\label{q4}
\end{figure}

\begin{center}
\begin{tabular}{lp{1cm}l}
$V(Q_n)=\Bs^n$ & & $|V(Q_n)|=2^n$\\
$E(Q_n)=\{\la x,\non_i(x)\ra;  x\in V(Q_n),0\leq i<n\}$ & & $|E(Q_n)|=n2^{n-1}$\\
$\diam(Q_n)=n$ & & $\deg(Q_n)=\{n\}$\\
$\bw_{\rm e}(Q_n)=2^{n-1}$ & &
\end{tabular}
\end{center}

\bitem
\item \vglue 5mm The hypercube is a {\it regular balanced bipartite hamiltonian\/} graph.
\item The distance between two vertices $u$ and $v$ in $Q_n$  is the {\it Hamming distance}.
\item It has {\it logarithmic\/} diameter. The average distance in $Q_n$ is $n/2$.
\item It is {\it not\/} a constant-degree topology, the vertex degree grows logarithmically with the number of vertices. If the number of nodes of a parallel computer doubles, the degree of each vertex increases by one. A router with $k$ communication ports can be used for building a machine with at most $2^k$ nodes. This problem is solved by constant-degree and logarithmic-diameter derivatives of the hypercube, called {\em hypercubic networks}, see Page \pageref{hypercubic}.
\item The hypercube has the {\it largest possible bisection width}. Each vertex in one half is adjacent to its {\em image\/} in the other half.
\item The connectivity of $Q_n$ is $n$, which is {\it optimal}. If $k\leq n$ is the distance between vertices $u$ and $v$, then there are $n$ vertex-disjoint (therefore edge-disjoint) paths between $u$ and $v$ such that $k$ of these paths have length $k$ and $n-k$ paths have length $k+2$.
\item The {\it fault diameter\/} of $Q_n$ is $n$. If we remove $n-1$ edges from $Q_n$, there still exists a path of length at most $n$ between any two vertices.
\item Fault tolerance is also excellent, there are $k!$ different shortest paths between two vertices in distance $k$, since $k!$ is the number of permutations of $k$ coordinates in which the addresses differ.
\item The hypercube is {\it hierarchically recursive}. $Q_n$ is a cartesian product of {\em subcubes}:
\[
Q_n=Q_k\times Q_{n-k},\qquad k\in\{1,\ldots,n-1\}.
\]
There are $n\choose{k}$ ways how to make this decomposition. In particular, for any $j\in\Ns$ there exists a {\em canonical decomposition\/} of $Q_n$ into two $(n-1)$-cubes $Q_{n-1}[j=0]$ and $Q_{n-1}[j=1]$ (This gives the optimal bisection width.) For example, since $Q_6=Q_3\times Q_3$, we can view a $Q_6$ as a 3-cube, whose vertices are 3-cubes (Figure \ref{q3timesq3} shows a simplified picture of that). But also $Q_6=Q_2\times Q_4=Q_2\times Q_2\times Q_2$, and so on. Each $k$-dimensional subcube $S$ of $Q_n$ is uniquely specified by its {\em address} $s_{n-1}\ldots s_1s_0$, where $s_i\in\{0,1,{*}\}$, with exactly $k$ appearances of the {\em don't care\/} symbol $*$. $0$-dimensional subcubes are vertices and subcubes are generalized ``internally structured" vertices. Vertices correspond to minterms and subcubes to terms in Boolean algebra.
\item The  hypercube topology is {\it not\/} incrementally extendable, it is defined only for powers of two. However, many incrementally extendable generalizations have been proposed, such as {\em incomplete hypercubes}.
\item The hypercube is both {\it vertex-\/} and {\it edge-symmetric graph}. The automorphism group of $Q_n$ consists of $2^n\times n!$ translations and rotations. A translation is a systematic renaming of vertices. Let $\XOR$ denote the bitwise XOR operation. {\em Translation\/} from $u$ to $v$ is a mapping $\forall x\in V(Q_n)\ (x\mapsto (u\XOR v))$. A {\em rotation\/} is a permutation (renaming) of dimensions.
\item The basic routing algorithm is {\it trivial\/} and {\it optimal}, usually hardwired, and called $e$-{\em cube\/} routing: \label{standhypercuberoute}The bits in which the sender and receiver differ are compared in a fixed order, usually left to right.
\item There exist {\it optimal\/} algorithms for collective communication in almost all communication models.
\item The hypercube can also {\it simulate efficiently almost any other topology}.
\eitem

\begin{figure}[ht]
\cobrx{50mm}{q6isq3xq3}
\caption{$Q_6=Q_3\times Q_3$.}
\label{q3timesq3}
\end{figure}

The notion of subcubes is extremely important for using a hypercube multiprocessor as a multiuser multitasking machine. Users declare the numbers of required nodes for their tasks and the operating system allocates, if possible, subcubes of the appropriate size, which are released upon completion of the tasks. This is called the {\em subcube allocation problem\/} and there exist many algorithms for it which must maintain lists of allocated and free subcubes. Dynamic allocation and releasing of subcubes lead to the {\em fragmentation\/} problem in the hypercube similar to segment fragmentation of dynamic or virtual memory.

The hypercube is the best topology for implementing most parallel and communication algorithms. It serves as a testbed for parallel feasibility of problems on distributed memory architectures. Its only drawback is the logarithmic vertex degree and lack of scalability.

Several families of older massively parallel multiprocessors were based on the hypercube: nCUBE series 1 and 2, Intel's iPSC/2 and iPSC/860, TMC's CM-2, and several others. Currently, SGI Origin's interconnection network uses hypercube topology.

\subsection{$n$-dimensional mesh of dimensions $z_1,z_2,\twodots,z_n$, $M(z_1,z_2,\twodots,z_n)$}
\label{meshes}

Given integers $z_i\geq 2$, $1\leq i\leq n$, the $n$-dimensional mesh with side lengths $z_i$, $M(z_1,z_2,\twodots,z_n)$, is defined as follows (Figure \ref{334}(a) shows mesh $M(3,3,4)$).

\begin{center}
\begin{tabular}{ll}
$V(M(\ldots))=\{(a_1,a_2,\twodots,a_n) ;  0\leq a_i\leq z_i-1\,\forall i\in\{1,\twodots,n\}\}$ & $|V(M(\ldots))|=\prod_{i=1}^n z_i$\\
$E(M(\ldots))=\{\la (\twodots,a_i,\twodots),(\twodots,a_i+1,\twodots)\ra;  0\leq a_i\leq z_i-2\}$ & $|E(M(\ldots))|=\sum_{i=1}^n(z_i-1)\prod_{j=1\atop j\not=i}^n z_j$\\
$\diam(M(\ldots))=\sum_{i=1}^n(z_i-1)=\Omega(\root n \of {|V(M(\ldots))|})$ & \\
$\deg(M(\ldots))=\{n,\twodots,n+j\}$, $j=|\{z_i;  z_i>2\}|$ & \\
\multicolumn{2}{l}{$\bw_{\rm e}(M(\ldots))=\cases{(\prod_{i=1}^n z_i)/\max_iz_i &if $\max_iz_i$ is even,\cr
                           \Omega((\prod_{i=1}^n z_i)/\max_iz_i) &otherwise.\cr}$}
\end{tabular}
\end{center}

\bitem
\item When dealing with a mesh, we usually assume that its dimension $n$ is {\it fixed}. If we want to change its size, we change the side lengths. All the properties explained in what follows are stated under the assumption of fixed mesh dimension.
\item The most practical meshes are, of course, {\it 2-D\/} and {\it 3-D\/} ones: they have trivial VLSI or 3-D design (2-D meshes are planar) and allow simple and natural mapping of 2-D  and 3-D space problems with locality of interprocessor interactions (computer graphics and virtual reality, fluid mechanics, modeling of 3-D systems in earth sciences or astronomy, etc). They are also natural, even though  not optimal, topologies for linear algebra problems (parallel solvers of linear or differential equations). 1-D meshes are called {\em linear arrays\/} and they are incrementally scalable.
\item The mesh $M(k,k,\twodots,k)$ is called $k$-{\em ary} $n$-{\em cube}.
\item A mesh is a {\it bipartite\/} (not necessarily balanced) graph. If at least one side has even length, the mesh has a {\it hamiltonian cycle}. A hamiltonian path exists always.
\item Meshes are {\it not regular}, but the degree of any vertex is {\it bounded\/} by $2n$. Of course, the degree of a corner vertex is less than the degree of an internal vertex.
\item Therefore, meshes  are {\it not vertex symmetric}. This complicates the design of some parallel and communication algorithms, but on the other hand, there are problems taking advantage of the asymmetry between boundary and internal vertices, e.g., parallel finite difference schemes for solving differential equations in 2-D  and 3-D space.
\item The distance between two vertices is the sum of differences in corresponding coordinates.
\item The diameter is $n$-{\it th root\/} of the size of graph. It is large for lower-dimensional meshes, and therefore, they are not optimal for algorithms with global interactions, e.g., parallel sorting, or problems where global broadcasting or permutations are required, such as in parallel solvers of linear equations. So, 2-D  or 3-D mesh are simple extendable INs with small fixed vertex degree.
\item The exact value of bisection width is known only if $\max_iz_i$ is even. Then removing the middle edges of linear paths in the longest-side direction results in a bisection of minimal size. However, if $\max_iz_i$ is odd, the exact value of the bisection width is not known for $n\geq 4$. However, it is easy to calculate it for 2- or 3-D meshes. For example, $\bw_{\rm e}(M(11,9,6))=64$. (Can you show that?) Lower-dimensional meshes have very low bisection width, which creates a bottleneck for many parallel mesh-based algorithms.
\item The connectivity is {\it optimal}. The proof is similar to that for hypercubes. Also, the shortest path redundancy is optimal.
\item The fault diameter is {\it optimal}, equal to the nonfaulty diameter. Fault distances are by at most 4 larger than  nonfaulty ones.
\item Meshes are also {\it hierarchically recursive}. They can be decomposed into cartesian products of lower-dimensional meshes, up to the {\em canonical decomposition\/} into a cartesian product of linear arrays:  $M(z_1,z_2,\twodots,z_n)=M(z_1,z_2,\allowbreak\twodots,\allowbreak z_{n-1})\times M(z_n)=M(z_1,\twodots,z_i)\times M(z_{i+1},\twodots,z_j)\times M(z_{j+1},\twodots,z_n)=M(z_1)\times M(z_2)\times\cdots\times M(z_n)$. Submeshes are also denoted by strings with don't-care symbols.
\item Meshes are {\it incrementally extendable}, but {\it not\/} incrementally scalable. You can define a mesh of any size, but the amount of edges that must be removed from a smaller one to construct a larger one, can be very large.
\item The basic shortest-path routing in general meshes is also trivial. It is called {\em dimension ordered\/} routing.  Again, addresses of the sender and receiver are inspected in a prespecified order and the message is sent via the first port corresponding to a coordinate in which the addresses differ. In 2- and 3-D meshes, this is called $XY$ and $XYZ$ {\em routing}.
\item Many optimal communication and parallel algorithms have been developed for meshes (optimal in the sense of matching the mesh lower bounds due to large diameter, low bisection width, etc), namely for 2-D and 3-D meshes. Many of them will be treated in detail in this course.
\item Besides standard meshes, various modifications  have been proposed, such reconfigurable meshes, meshes of buses, and so on. We will not deal with these variants here.
\eitem

The most important mesh-based parallel computers are Intel's Paragon (2-D mesh) and MIT J-Machine (3-D mesh). Also transputers used 2-D mesh interconnection. Processors in mesh-based machines are allocated by submeshes and the {\it submesh allocation\/} strategy must handle possible {\it dynamic fragmentation\/} and {\it compaction\/} of the global mesh network, similarly to hypercube machines.

\begin{figure}[ht]
\rule{0pt}{0pt} \hfill \obrx{50mm}{mesh334} \hfill \obrx{50mm}{torus334}\hfill\rule{0pt}{0pt}
\caption{Mesh $M(3,3,4)$ and torus $T(3,3,4)$.}
\label{334}
\end{figure}

\subsection{$n$-dimensional torus of dimensions $z_1,z_2,\twodots,z_n$, $T(z_1,z_2,\twodots,z_n)$}

Given integers $z_i\geq 2$, $1\leq i\leq n$, the $n$-dimensional torus $T(z_1,z_2,\twodots,z_n)$, also called {\em toroidal\/} or {\em wrapped mesh\/}, with side lengths $z_1,z_2,\twodots,z_n$, is defined as follows.


\begin{center}
\begin{tabular}{ll}
$V(T(\ldots))=V(M(\ldots))$ & $|V(T(\ldots))|=\prod_{i=1}^n z_i$\\
$E(T(\ldots))=\{\la (\twodots,a_i,\twodots),(\twodots,a_i\oplus_{z_i}1,\twodots)\ra;  0\leq a_i<z_i\}$ & $|E(T(\ldots))|=n\times \prod_{i=1}^n z_i$\\
$\diam(T(\ldots))=\sum_{i=1}^n\left\lfloor {z_i\over 2}\right\rfloor$ & $\deg(T(\ldots))=\{2n\}$\\
$\bw_{\rm e}(T(\ldots))=2\bw_{\rm e}(M(\ldots))$ &
\end{tabular}
\end{center}

Figure \ref{334}(b) shows a 3-dimensional torus $T(3,3,4)$. Adding wrap-around edges changes dramatically many structural properties, even though many feature remain similar to meshes. We will highlight some of them.
\bitem
\item A 1-dimensional torus is simply a {\it cycle} or {\it ring}.
\item The torus $T(k,k,\twodots,k)$ is called $k$-{\em ary} $n$-{\em torus}.
\item Tori are {\it bipartite\/} iff all side lengths are even. But then, they are {\it balanced}.
\item Any torus has a {\it hamiltonian cycle}.
\item Tori are {\it regular} and {\it vertex symmetric}. The automorphism group consists of translations. $k$-ary $n$-tori are also {\it edge symmetric\/} and have rotations as automorphisms.
\item The diameter and average distance of a torus drops to nearly {\it one half\/} compared to a mesh with the same dimensionality, and the connectivity and the bisection width {\it doubles}. Hence, the connectivity of $n$-dimensional torus, $2n$, is also {\it optimal}.
\item They are also {\it hierarchically recursive}, again thanks to cartesian product structure. Each torus can be canonically decomposed into a cartesian product of cycles.
\item Scalability properties are even slightly worse here than at meshes.
\item Cyclic structure of tori makes shortest path routing algorithms more complicated, but the basic approach remains {\it dimension ordered\/} routing.
\item Similarly to meshes, many fundamental problems  are known to have topologically optimal solutions on tori and we will demonstrate many of them here.
\eitem

\subsection{Comparisons of hypercubes, meshes, and tori}

\bitem
\item $M(2,2,\twodots,2)$ is isomorphic $Q_n$
\item $n$-dimensional meshes and tori are generalizations of $Q_n$
\item For some $k$ and $n$, $k<n$, a $k$-dimensional mesh/torus can be a subgraph of $Q_n$.
\eitem
%See Chapter \ref{EmbeddingsChapter} for more details.
For example, Figure \ref{torus44} illustrates that torus $T(4,4)$ is isomorphic to $Q_4$.
\begin{figure}[ht]
\cobrx{30mm}{torus4x4}
\caption{$T(4,4)= Q_4$.}
\label{torus44}
\end{figure}
Table \ref{comp-of-meshes} shows the tradeoff between the diameter, number of edges, and bisection width of $M(8,8,4)$, $T(8,8,4)$, and $Q_8$. All these graphs have the same size $|V()|=256$ and $M(8,8,4)\subset T(8,8,4)\subset Q_8$.

\begin{table}[ht]
\label{comp-of-meshes}
\tabcolsep=1.5mm
\begin{center}
\begin{tabular}{|c|c|c|c|}
\cline{2-4}
\multicolumn{1}{c|}{} & $M(8,8,4)$ & $T(8,8,4)$ & $Q_8$\\
\hline
$\diam()$ & 17 & 10 & 8 \\
\hline
$|E()|$ & 640 & 768 & 1024 \\
\hline
$\bw_{\rm e}()$ & 32 & 64 & 128 \\
\hline
\end{tabular}
\end{center}
\caption{Comparison of several characteristics of meshes, tori, and hypercubes.}
\label{comparison}
\end{table}

The tori are likely to become widely used interconnection networks. For example, Cray/SGI T3D and T3E and Convex Exemplar are based on 3-D torus, Intel/CMU iWarp uses 2-D torus, and several commercial machines use rings as a primary interconnect.

\section{Hypercubic topologies}
\label{hypercubic}

{\em Hypercubic networks\/} are logarithmic-diameter and constant-degree derivatives of the hypercube. The price to be paid for the constant vertex degree is worse extendability: the size of instances becomes $n2^n$ or similar. Therefore, they grow even faster with increasing dimension than the hypercube. They share many properties, such as bisection width $\Omega(N/\log N)$. Let us shortly describe two representants: {\em cube-connected cycles} and {\em butterflies}.

\subsection{Cube-connected cycles of dimension $n$, $\CCC_n$}


\begin{figure}[ht]
\cobrx{55mm}{ccc3}
\caption{Cube Connected Cycles $\CCC_3$.}
\label{ccc3}
\end{figure}

\begin{center}
\begin{tabular}{ll}
$V(\CCC_n)=\{(i,x) ;  0\leq i<n \wedge x\in\Bs^n\}$ & $|V(\CCC_n)|=n2^n$\\
$E(\CCC_n)=\{\la(i,x),(i\oplus_n1,x)\ra,\la(i,x),(i,\non_i(x))\ra;  (i,x)\in V(\CCC_n)\}$ & $|E(\CCC_n)|=n2^{n-1}+n2^n$\\
$\diam(\CCC_n)=(2n-2)+\lfB n/2\rfB$ for $n>3$, $\diam(\CCC_3)=6$ & $\deg(\CCC_n)=\{3\}$\\
$\bw_{\rm e}(\CCC_n)=2^{n-1}$ &
\end{tabular}
\end{center}

$\CCC_n$ is constructed from $Q_n$ by replacing each hypercube vertex with a cycle of $n$ vertices so that each vertex takes care of just one hypercube direction. See Figure \ref{ccc3} where $(*,x)$ stands for $\{(0,x), (1,x), (2,x)\}$. Figure \ref{anotherccc3} shows other possible ways how to view $CCC$ topology. Part (b) is a useful simplification, $\CCC$ is viewed as a pancake made of all cycles, all the cycles look alike, and all hypercube edges are projected into vertex loops.

\bitem
\item $\CCC_n$ is {\it balanced bipartite\/} iff $n$ is even.
\item Any $\CCC_n$ has a {\it hamiltonian cycle}.
\item $CCC$ is a {\it regular\/} and {\it vertex-symmetric\/} topology. Figure \ref{anotherccc3}(b) shows it clearly. It is {\it not\/} an edge-symmetric graph, there are two kinds of edges: {\em hypercube\/} edges and {\em cycle\/} edges.
\item It has {\it optimal\/} connectivity 3. Between two arbitrary vertices, there are at most two shortest paths.
\item Shortest path routing is based on $e$-cube routing and Figure \ref{anotherccc3}(b) is helpful in understanding it.
\item In contrast to the hypercube, $\CCC$ is not hierarchically recursive. We can split a $\CCC_n$ into two halves which are very similar to $\CCC_{n-1}$, but they have one abundant vertex in each cycle.
\eitem

\begin{figure}[ht]
\cobrx{.9\textwidth}{flatccc3}
\caption{Another view of $\CCC$.}
\label{anotherccc3}
\end{figure}


\subsection{Butterfly of dimension $n$}

Actually, there are two kinds of butterfly networks, wrapped and ordinary.
The {\em wrapped butterfly}, $\wBF_n$, is defined as follows:

\begin{center}
\begin{tabular}{ll}
$V(\wBF_n)=\{(i,x);  0\leq i<n \wedge x\in\Bs^n\}$ & $|V(\wBF_n)|=n2^n$\\
$E(\wBF_n)=\{\la (i,x),(i\oplus_n1,x)\ra,\la(i,x),(i\oplus_n1,\non_i(x))\ra\mid(i,x)\in V(\wBF_n)\}$ & $|E(\wBF_n)|=n2^{n+1}$\\
$\diam(\wBF_n)=n+\lfB n\over 2\rfB$ & $\deg(\wBF_n)=\{4\}$\\
$\bw_{\rm e}(\wBF_n)=2^n$ &
\end{tabular}
\end{center}

If each cycle of $\wBF_n$ shrinks to a single vertex and redundant edges are removed, we get $Q_n$ (see Figure \ref{wrappedbutterfly}). Wrapped butterfly topology has basically the same properties as $\CCC$, except that it has more edges, and therefore larger bisection width and lower diameter.
\begin{figure}[ht]
\cobrx{.9\textwidth}{flatbf3}
\caption{Wrapped butterflies}
\label{wrappedbutterfly}
\end{figure}

An $n$-dimensional {\em ordinary butterfly}, $\oBF_n$, can be made form $\wBF_n$  by replacing $n$-vertex cycles with $(n+1)$-vertex paths (or in other words, an ordinary  butterfly is a hypercube with each vertex unfolded into a path).

\begin{figure}[ht]
\cobrx{90mm}{bf3}
\caption{Ordinary butterfly $\oBF_3$.}
\label{ordinarybutterfly}
\end{figure}

\begin{center}
\begin{tabular}{ll}
$V(\oBF_n)=\{(i,x);  0\leq i\leq n \wedge x\in\Bs^n\}$ & $|V(\oBF_n)|=(n+1)2^n$\\
$E(\oBF_n)=\{\la(i,x),(i+1,x)\ra, \la(i,x),(i+1,\non_i(x))\ra\mid(i,x)\in V(\oBF_n),i<n\}$ & $|E(\oBF_n)|=n2^{n+1}$\\
$\diam(\oBF_n)=2n$ & $\deg(\oBF_n)=\{2,4\}$\\
$\bw_{\rm e}(\oBF_n)=2^n$ &
\end{tabular}
\end{center}

\bitem
\item The vertices of $\oBF_n$ are organized into {\em columns} ({\em stages}) $i$, $0\leq i\leq n$, and {\em rows} $x$, $0\leq x\leq 2^n-1$. There are also two kinds of edges in the ordinary butterfly: {\em straight\/} edges and {\em cross\/} ({\em hypercube}) edges (see Figure \ref{ordinarybutterfly}). Edges linking vertices of stages $i$ and $i+1$ are said to be {\em of stage} $i$.
\item The ordinary butterfly is {\it not\/} vertex symmetric, it is {\it not\/} even regular.
\item It is not {\it hamiltonian}.
\item On the other hand, it is {\it hierarchically recursive}: $\oBF_n$ contains two $\oBF_{n-1}$ as subgraphs.
\item The connectivity is 2, which is {\it optimal}. 
\item There exists only one shortest path between any pair of vertices $(0,x)$ and $(n,y)$. The path traverses each stage exactly once using a cross edge of stage $i$ iff the row numbers $x$ and $y$ differ in the $i$-th bit. It follows that the basic shortest-path routing is basically the e-cube routing from hypercubes, see Page \pageref{standhypercuberoute}.
\eitem

\section{Tree-based topologies}

Process graphs have often tree-like structure. However, the bottleneck at the root and poor connectivity and robustness disqualifies any simple complete tree as a potential general purpose IN topology. Fortunately, tree-like graphs can be efficiently mapped on most usual INs. The only commercially important tree-based parallel machine have been TMC's CM-5 with its famous {\em fat tree\/} and some special-purpose {\em pyramidal machines}. In this course, we will describe an interesting hybrid of meshes and trees.

\subsection{2-D mesh of trees of height $n$, $\MT_n$}

2-D  and 3-D meshes suffer from {\it large diameter\/}, whereas trees suffer from {\it small connectivity\/} and {\it bisection width}. 2-D mesh of trees is a hybrid topology based on the 2-D mesh and complete binary tree, which has the diameter of the tree topology (i.e., logarithmic) and the bisection width of the 2-D mesh topology (i.e., square root of the size). For integer $n\geq 1$, let $N=2^n$. Then $\MT_n$ is defined as follows:

\begin{figure}[ht]
\cobrx{70mm}{meshtree2}
\caption{Mesh of trees $\MT_2$.}
\label{mt3}
\end{figure}

\begin{center}
\begin{tabular}{ll}
$V(\MT_n)=(\Bs^n\times\cup_{i=0}^n\Bs^i)\cup(\cup_{i=0}^n\Bs^i\times\Bs^n)$ & $|V(\MT_n)|=3N^2-2N$\\
$E(\MT_n)=\{\la(x,y),(xa,y)\ra;  \len(x){<} n\wedge a\in\Bs\}$ & $|E(\MT_n)|=4N(N-1)$\\
\phantom{$E(\MT_n)=$}$\cup \{\la(x,y),(x,ya)\ra;  \len(y)< n\wedge a\in\Bs\}$ & $\bw_{\rm e}(\MT_n)=N=\Theta(\sqrt{|V(\MT_n)|})$\\
$\deg(\MT_n)=\{2,3\}$ & $\diam(\MT_n)=4\log N=4n$
\end{tabular}
\end{center}


\bitem
\item The mesh-of-trees topology can be defined for any mesh $M(2^k,2^l)$. It can be even defined for any $n$-dimensional mesh with side lengths equal to powers of two. We will discuss only the 2-D square case for simplicity.
\item Informally, a mesh-of-trees is a 2-D mesh whose linear rows and columns are replaced with {\em row\/} and {\em column\/} complete binary {\em trees} of height $n$, $\CBT_n$. The original mesh vertices are {\it leaves\/} in the intersections of  the row and column trees and the additional vertices are exactly the internal vertices of the trees. It follows that $\MT_n\subset\CBT_n\times \CBT_n$.
\item An $N$-vertex mesh-of-trees is constant vertex degree $O(\log N)$-diameter and $\Omega(\sqrt{N})$-bisection width topology.  
\item The mesh-of-trees topology  is {\it bipartite}, but {\it not\/} balanced, therefore it is {\it not\/} hamiltonian.
\item It is {\it hierarchically recursive}. By removing row- or column-tree roots, it decomposes into disjoint submeshes-of-trees. Low-level submeshes-of-trees can be used for local computations and the root nodes can be used for global communication and coordination.
\item It is {\it neither\/} regular {\it neither\/} vertex symmetric.
\item Its extendability is rather poor. 
\item Shortest-path routing is simple, it alternates routing in 2-D meshes and trees. Assume source vertex $u$ and destination vertex $v$.
 \bitem
    \item If $u$ is in a {\it row\/} tree and $v$ in a {\it column\/} tree, the shortest path from $u$ to $v$ is uniquely defined via the intersection leaf of both trees.
    \item If $u$ and $v$ are in two {\it different row\/} trees $T_u$ and $T_v$, compare the level of $u$ and $v$ in their trees. If $u$ is lower than $v$, then go down in $T_u$ to any leaf in the subtree, use the corresponding column tree to get to $T_v$, and use the tree routing in $T_v$ to get to $v$ (hence pass through the root of the smallest common subtree of $v$ and of the homothetic image of $u$ in $T_v$). If $u$ is higher than $v$, then go first to the image of $v$ in $T_u$ and via any leaf get to $T_v$ then. The number of different shortest paths depends on the heights of the vertices.
    \item The routing between two internal nodes of column trees is similar.
  \eitem
\eitem

No commercial machine has been  built upon this topology yet. 

\section{Shuffle-based topologies}

The adjacency in orthogonal and hypercubic topologies is based on incrementing and/or inverting coordinates. In shuffle topologies, the adjacency is based on {\em shifting\/} the whole strings, typical operation is left shift and left rotation by one position, which enforces orientation on edges. Since most of notions and results apply equally to both directed and undirected versions of these topologies and digraphs are regular in contrast to undirected graphs, we will concentrate on digraphs only. 

Shuffle-based topologies have very rich, but also rather complicated structure. In general, they are more difficult to understand compared to previous topologies. They have the following properties:
\bitem
\item Logarithmic diameter and constant vertex degree.
\item Optimal or nearly optimal connectivity.
\item Fault diameter nearly the same as non faulty one, similarly for fault average distances.
\item Large bisection width of order $N/\log N$.
\item Simple shortest-path routing.
\item They are neither symmetric nor hierarchically recursive.
\item They are rich of cycles of various lengths, since the rotation induces equivalence classes of vertices called {\em necklaces}.
\item At the same time, they are rich of subtrees and spanning trees, since the general shift operation induces subtrees. 
\eitem
The shuffle-based family includes three main topologies, shuffle exchange, de Bruijn, and Kautz, and their variations. We will describe only the first two main ones.

\subsection{Shuffle-exchange networks}

Even if the shuffle-exchange topology can be defined for any alphabet, the focus of computer science has always been primarily on the binary shuffle-exchange topology. The {\em shuffle\/} of a binary string $x$, $\sigma(x)$, is its {\em left rotation by one bit}. The {\em inversion of the least significant bit\/} of $x$ is called the {\em exchange\/} operation. {\em Shuffle-exchange digraph of dimension} $n$, $\SE_{n}$, is defined as follows:
\begin{figure}[ht]
\cobrx{70mm}{SE3}
\caption{Digraph $\SE_3$.}
\label{SE3}
\end{figure}

\begin{center}
\begin{tabular}{ll}
$V(\SE_n)=\Bs^n$ & $|V(\SE_n)|=2^n$\\
$A(\SE_n)=\{\la x{\rightarrow}\non_0(x)\ra;  x\in\Bs^n\}\cup\{\la x{\rightarrow}\sigma(x)\ra;  x\in\Bs^n\}$ & $|A(\SE_n)|=2^{n+1}$\\
$\indeg(\SE_n)=\outdeg(\SE_n)=\{2\}$ & $\diam(\SE_n)=2n-1$\\
$\bw_{\rm e}(\SE_n)=\Theta(\frac{2^n}{n})$ &
\end{tabular}
\end{center}

\bitem
\item  $\la x{\rightarrow}\sigma(x)\ra$ is called a {\em shuffle arc} and $\la x{\rightarrow}\non_0(x)\ra$ is called an {\em exchange arc}.
\item The distance between vertices $0^n$ and $1^n$ is $2n-1$: we need $n$ exchange and $n-1$ shuffle operations. The shortest dipath from vertex $u$ to vertex $v$ is constructed by finding the longest substring $\alpha$ such that $u=u'\alpha$ and $v=\alpha v'$ and then by traversing the shuffle and/or exchange arcs so that prefix $u'$ is converted into postfix $v'$. We need {\em at most} $2n-1$ steps, which gives the diameter of $\SE_n$.
\item $\SE_n$ is a 2-regular digraph.
\item The length of necklaces depends on the {\it periodicity\/} of strings. For example, if $n=6$, then the necklace containing vertex $111010$ has length $6$, whereas the necklace containing vertex $101101$ has length 3. In general, the necklaces can have length $i$ where $i$ divides $n$. Therefore, the neighborhood of a vertex in a SE digraph depends on its periodicity and the SE topology is {\it not\/} vertex symmetric. 
\item Due to this necklace structure, digraph $\SE_n$ does not contain as subgraphs any $\SE_i$, $i<n$. Fortunately, there exists so called ``approximate'' hierarchical recursivity of shuffle-exchange topology, as we will explain in further sections.
\item The exact value of $\bw_{\rm e}(\SE_n)$ is  known up to a small constant multiplicative factor.
\item Undirected shuffle-exchange topology is similar. Strings can be rotated to the right, which is so called {\em unshuffle operation\/} $\sigma^{-1}(x)$. The average distance drops, since unshuflle operation may provide shorter paths. For example, if $n=10$, $u=1011101010$, and $v=1000010111$, the shortest directed path from $u$ to $v$ has length 13, whereas the shortest undirected path has length 8. The diameter, bisection width, the number and lengths of necklaces remain the same. The shuffle-exchange graph is not regular. 
\eitem

\subsection{De Bruijn digraph of degree $d$ and dimension $n$, $\dB_{d,n}$}

The adjacency is based on only one general operation of {\em left shift} by 1 position $\lambda$: If $x=x_{n-1}\twodots x_0\in\Zs_d^n$ and $a\in\Zs_d$, then \label{lambda} $\lambda(x,a)=x_{n-2}\twodots x_0a$.

\begin{center}
\begin{tabular}{lp{1cm}l}
$V(\dB_{d,n})=\Zs_d^n$ & & $|V(\dB_{d,n})|=d^n$\\
$E(\dB_{d,n})=\{\la x{\rightarrow}\lambda(x,a)\ra;  x\in\Zs_d^n,a\in\Zs_d\}$ & & $|E(\dB_{d,n})|=d^{n+1}$\\
$\indeg(\dB_{d,n})=\outdeg(\dB_{d,n})=\{d\}$ & & $\diam(\dB_{d,n})=n$\\
$\bw_{\rm e}(\dB_{d,n})=\Theta(d^n/n)$ & &
\end{tabular}
\end{center}

\bitem
\item Vertex $u$ of $\dB_{d,n}$ is adjacent to vertex $v$ if the first $n-1$ letters of $v$ equal to the last $n-1$ letters of $u$. 
\item Any vertex can be obtained from another one by at most $n$ left shifts, hence the diameter. The fact that de Bruijn topologies have {\em extremely low\/} diameter is jeopardized by the fact that the {\em average distance\/} is very close to the diameter. This is a disadvantage compared to mesh-based topologies. 
\item $\dB_{d,n}$ is $d$-regular, each vertex is incident to $d$ and incident from $d$ arcs. 
\item The structure of necklaces depends on periodicity of strings, similarly to shuffle-exchange topology, which destroys both the symmetry and the recursivity. 
\item Even though the topology is not recursive, the problem how to decompose $\dB_{2,n}$ into isomorphic universal building blocks has been partially solved within project Galileo at JPL in Pasadena (they have built a parallel machine based on $\dB_{2,16}$).
\item Instead of hierarchical recursivity, de Bruijn topology has a different interesting property. $\dB_{d,n}$ is the {\em line digraph\/} of $\dB_{d,n-1}$, i.e., if we replace all arcs in $\dB_{d,n-1}$ with vertices and preserve adjacency, we get $\dB_{d,n}$. This property is used for optimal simulations of large de Bruijn networks on small ones. 
\item The connectivity is {\em nearly\/} optimal: $\kappa(\dB_{d,n})=\lambda(\dB_{d,n})=d-1$. It is only $d-1$ due to the loops at vertices $a^n$, $a\in\Zs_d$ (see for example Figure \ref{db2+3}). 
\item The fault diameter of $\dB_{d,n}$ is only $n+1$: The lengths of all $d-1$ vertex-disjoint paths joining two arbitrary vertices $u$ and $v$ are at most $n+1$. 
\item The exact value of $\bw_{\rm e}(\dB_{d,n})$ is known up to a small constant multiplicative factor.
\item The binary de Bruijn digraph $\dB_{2,n}$ is the state diagram of a shift register. It needs only 4 links per node to achieve the same diameter as equally sized hypercube.
\item Besides digraphs, there exist also undirected graph versions. Most properties of digraphs apply to graphs, including the complexity of communication algorithms.
\item Also the problem of incremental extendability and scalability was partially solved. There exist modified  de Bruijn topologies, called {\em generalized\/} de Bruijn networks, which are incrementally extendable, and so called {\em partial line digraphs\/}, which are even incrementally scalable. However, the topological complexity of these variations is even higher than that of standard ones.
\eitem

\begin{figure}[ht]
\cobrx{140mm}{dB23}
\caption{De Bruijn digraphs $\dB_{2,3}$ and $\dB_{3,2}$.}
\label{db2+3}
\end{figure}

\end{document}