%%%%%%%%%%%%%%%%%%%%%%% file template.tex %%%%%%%%%%%%%%%%%%%%%%%%%
%
% This is a general template file for the LaTeX package SVJour3
% for Springer journals. Springer Heidelberg 2006/03/15
%
% Copy it to a new file with a new name and use it as the basis
% for your article. Delete % signs as needed.
%
% This template includes a few options for different layouts and
% content for various journals. Please consult a previous issue of
% your journal as needed.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% First comes an example EPS file -- just ignore it and
% proceed on the \documentclass line
% your LaTeX will extract the file if required
\begin{filecontents*}{example.eps}
%!PS-Adobe-3.0 EPSF-3.0
%%BoundingBox: 19 19 221 221
%%CreationDate: Mon Sep 29 1997
%%Creator: programmed by hand (JK)
%%EndComments
gsave
newpath
20 20 moveto
20 220 lineto
220 220 lineto
220 20 lineto
closepath
2 setlinewidth
gsave
.4 setgray fill
grestore
stroke
grestore
\end{filecontents*}
%
\documentclass{svjour3}
%% My own preambles strats
\usepackage{amsmath}
\usepackage{amssymb}
%% My own preambles ends
% onecolumn (standard format)
%\documentclass[smallextended]{svjour3} % onecolumn (second format)
%\documentclass[twocolumn]{svjour3} % twocolumn
%
\smartqed % flush right qed marks, e.g. at end of proof
%
\usepackage{graphicx}
%
% \usepackage{mathptmx} % use Times fonts if available on your TeX system
%
% insert here the call for the packages your document requires
%\usepackage{latexsym}
% etc.
%
% please place your own definitions here and don't use \def but
% \newcommand{}{}
%
% Insert the name of "your journal" with
\journalname{Numerical Algorithms}
%
%preamble
\providecommand{\newoperator}[3]{%
\newcommand*{#1}{\mathop{#2}#3}}
\providecommand{\renewoperator}[3]{%
\renewcommand*{#1}{\mathop{#2}#3}}
% Some new operators
\newoperator{\res}{\mathrm{res}}{\nolimits}
\newcommand{\Matlab}{\textsc{Matlab}\ }
\newcommand{\Maple}{\textsc{Maple}\ }
\newcommand{\interpolational}{interpolational\ } % RMC: anticipating being convinced by John to change it back.
\newcommand{\IJeh}{\ensuremath{[I\!J]}}
\newcommand{\HB}{HB}
\newcommand{\e}{\ensuremath{\varepsilon}}
\newcommand{\V}{\ensuremath{\mathbf{V}}}
\newcommand{\Mt}{\ensuremath{\widetilde{\mathbf{M}}}}
\renewcommand{\qed}{\begin{flushright}\vbox{\ensuremath{\natural}}\end{flushright}}
%\usepackage{showlabels}
\begin{document}
\title{Polynomial Algebra
for Birkhoff Interpolants % \thanks{Grants or other notes
%about the article that should go on the front page should be
%placed here. General acknowledgments should be placed at the end of the article.}
}
\subtitle{
%Do you have a subtitle?\\ If so, write it here
}
%\titlerunning{Short form of title} % if too long for running head
\author{John C. Butcher \and Robert M. Corless \and Laureano Gonzalez-Vega \and Azar Shakoori
%Second Author %etc.
}
%\authorrunning{Short form of author list} % if too long for running head
\institute{R. M. Corless \at
Department of Applied Mathematics University of Western Ontario \\
Tel.: %+123-45-678910
\\
Fax: %+123-45-678910
\\
\email{rcorless@uwo.ca} %
\\
% \emph{Present address:} of F. Author % if needed
%\and
%A. Shakoori \at
% second address
}
\date{Received: date / Accepted: date}
% The correct dates will be entered by the editor
\maketitle
% Commentaries to be eventually deleted
%%\def\tick{\marginpar{\raisebox{-1mm}{$\sqrt{}$}}}
%%\def\NB{\marginpar{\raisebox{-1mm}{\LARGE$\ast$}}}
%%
%%\vspace{-5mm}
%%\section*{How things stand: 3 September 2009}
%%
%%\vspace{-2mm}
%%\subsection*{\it Present state of manuscript}
%%\begin{enumerate}
%%\item[Abstract] written
%%\item[1] The introduction is written
%%\item[] visit this again when the rest of the paper is written.
%%\item[2] Lalo has undertaken to write this section
%%\item[3] Written
%%\item[3.2] Number some of the equations between the present (19) and (20) because some of these will be referred to later.
%%\item[4.1] Written
%%\item[4.2] The Azar and John version is written and Rob's earlier version is included for comparison.definite proposal.
%%\item[] If Rob wants a new version synthesising the existing two versions, he should make a
%%\item[4.3 etc] Azar and John want to combine \tick 4.3 and 4.4 into a short subsection based mainly on examples. The subsection at present numbered 4.5 could be deleted,
%%The first draft of the section on root finding is the responsibility of Azar and Rob.
%%\item[5] Rob and Lalo are to take responsibility for the first draft of this section.
%%\item[6] Delay until paper substantially complete
%%\item[] Bibliography: Lalo has undertaken to propose the first draft of this
%%\end{enumerate}
%%
%%
%%\vspace{-2mm}
%%\subsection*{\it Plans for completing the paper}
%%
%%\vspace{-2mm}
%%\begin{enumerate}
%%\item John wants to look again at the notation~$a$ versus $\widehat a $. He will do this at a later date.
%%\item John want to understand why HB interpolation, the way we are doing it, doesn't lead to a rational formula
%%for the interpolant. In particular why is the expression at present numbered (36) a polynomial?
%%\item 3 September: As from this date, Azar takes over management of the manuscript.
%%
%%\item Cleaning up.
%%
%%\begin{enumerate}
%%\item remove \verb'\usepackage(showlabels)'
%%\item convert equation to equation* for all unlabelled equations
%%\item Remove ticks etc from paper
%%\item remove this commentary stuff from the beginning of the paper
%%\end{enumerate}
%%
%%\item Rob wants to include a section on rational interpolation. John doesn't understand why this is appropriate in this paper because it falls outside the core aims and will delay completion.
%%
%%\end{enumerate}
%%
%%
%%
\begin{abstract}
We introduce a unifying formulation of a number of related problems which can all be solved using a contour integral formula. Each of these problems requires finding a non-trivial linear combination of some of the values of a function~$f$, and its first and higher derivatives, at a number of data points. This linear combination is required to have zero value when~$f$ is a polynomial of up to a specific degree~$p$. Examples of this type of problem include Lagrange, Hermite and Hermite-Birkhoff interpolation; fixed-denominator rational interpolation; and various numerical quadrature and differentiation formulae. Other applications include the estimation of missing data and root-finding.
\keywords{Lagrange, Hermite, and Hermite-Birkhoff interpolation \and Contour integrals \and Barycentric form\and Fixed-denominator Rational Interpolation \and %Missing data
Root-finding}
% \PACS{PACS code1 \and PACS code2 \and more}
\subclass{41A05 \and 65D05 \and 65D25 \and 65D30}
\end{abstract}
\section{Introduction}
The purpose of this paper is to present a number of approximation formulae using a single standard formulation.
These include formulae for Lagrange, Hermite and Hermite-Birkhoff interpolation (hereafter \HB\ interpolation), divided difference formulae, numerical quadrature and numerical differentiation.
Each of these approximation formulae can be written as a non-trivial linear combination
\begin{equation}\label{eq:prebirk}
\sum_{i=1}^n\sum_{j\in S_i} \widehat a_{ij} f^{(j)} (\tau_i) =0,
\end{equation}
where~$f$ is a polynomial of degree not exceeding~$p$, and~$S_i$ is a
% proper
subset of $\{0,1,\dots, s_i-1\}$ such that $s_i-1\in S_i$. The positive integers $s_i$ are sometimes referred to as `confluencies'.
The value of~$p$ is $-2+\sum_{i=1}^n | S_i|$, where $ | S_i|$ is the cardinality of~$S_i$.
If $S_i=\{0,1,\dots,s_i-1\}$ , for each $i=1,2,\dots,n$, we will rewrite \eqref{eq:prebirk} in the form
\begin{equation}
\sum_{i=1}^n\sum_{j = 0}^{s_i-1} a_{ij} f^{(j)} (\tau_i) =0.
\end{equation}
The common feature of these formulations is their relation to contour integrals of the form
\begin{equation}
\frac1{2\pi i} \oint_C R(z) f(z) dz,
\end{equation}
where~$R$ is a rational function related to the problem. By using the Cauchy integral formula we will find the approximation we require.
The paper is organized as follows: a collection of pointers to known literature on \HB\ interpolation and related problems will be presented in Section \ref{sec:review}. This is followed by Section \ref{sec:contour}, where the central result is given and by Section \ref{sec:appl} where diverse applications are considered. Results and discussion on the existence and other properties of these approximations are presented in Section \ref{sec:posed}.
\section{Selections from the literature}\label{sec:review}
The literature relevant to this present paper is substantial, and we do not present a comprehensive survey. The references of the papers we cite should be consulted for further pointers.
\HB\ interpolation problems occur in several applications. For example, the numerical solution of initial-value problems for ordinary differential equations often needs interpolants for event location, error control, or simply for graphical output. Derivative data is often more readily available than data on the value of the solution; see e.g.~\cite{Higham:RKDC:1991} and, more recently, \cite{Tsitouras:RKI:2007}. Birkhoff's original paper~\cite{Birkhoff:GMV:1906} was concerned with ``mechanical differentiation and quadrature'', and these remain important. Other, newer, applications occur in, for instance, Computer-Aided Geometric Design, and in any application that has to deal with missing or unsampled data.
Several algorithms exist in the literature for solving \HB\ interpolation problems. The simplest is presented in one line in~\cite[p.~12]{Lorentz:BI:1983}, but is not a practical algorithm for even moderate-sized problems, being just Cramer's rule. Existing more practical algorithms include~\cite{Fiala:AHB:1973} and~\cite{Muehlbach:AHB:1983}, but we claim that the algorithm presented in this paper is more numerically stable and in some cases much faster, especially if the number of missing data points is small compared to the overall number of data points.
The method of this paper is also useful in the derivation of other formulae, including compact finite differences~\cite{Lele199216}, which themselves find applications in
several fields (see e.g.~\cite{Zhao:IDE:2006,Zhao:CFD:2007}).
The main technique of this paper is contour integration, which has a long and intimate connection with interpolation and with finite differences; see, e.g.~\cite[p.~11]{MilneThomson:1933}.
We will cite such references as we need in the course of the exposition. This technique was first used for \HB\ interpolation in~\cite{Butcher:MGRKM:1967}, and we develop it
further here.
\HB\ interpolation is also an old subject, dating to 1906 with Birkhoff's first paper, and comprehensively reviewed till 1983 in~\cite{Lorentz:BI:1983}. We do not attempt a comprehensive
review of the literature since then. Indeed, our paper is concerned with an algorithm for solving specific \HB\ interpolation problems, and not with finding conditions for unique
solvability of such problems, which represents a significant focus of modern research in the area. \HB\ interpolation problems have several interesting properties. The first of these is that \HB\ interpolation problems do not always have unique solutions. The continuity properties of \HB\ interpolation problems are also of interest~\cite{Dyn:CBI:1982}. This will be discussed
further in Section~\ref{sec:posed}.
In this paper we rely on a particular solution of the Hermite interpolation problem, which can be regarded as a special case of the \HB\ interpolation problem. It is well-known, see e.g.~\cite{Higham:2002:ASN},
that Hermite interpolation (via divided differences for example) can be used as a fast method to solve confluent Vandermonde matrices. Some of the present authors maintain in
another paper that methods for solving Hermite interpolation
problems that avoid using Newton or monomial bases may be more numerically stable~\cite{Corless:LBB:2004}, and that in particular algorithms that treat each point equally, and do not depend on
an imposed ordering, may result in better-conditioned problems. In particular, following~\cite{Berrut:BLI:2004} and~\cite{Higham:NSB:2004} and~\cite{Corless:BHIE:2008}, we use
the \textsl{barycentric forms}: If the (Hermite) data $f^{(j)}(\tau_i)/j!$ is known for $1 \le i \le n$ and $0 \le j \le s_i - 1$ (as in the previous section), then the \interpolational
polynomial $f(t)$ may be represented (in a numerically stable fashion, if the confluencies $s_i$ are not too large) as
\begin{equation}
f(t) = w(t) \sum_{i=1}^n \sum_{j=0}^{s_i-1}\sum_{k=0}^j \gamma_{ij}f^{(k)}(\tau_i)(t-\tau_i)^{k-j-1}/k!\>,
\end{equation}
where $w(t)$ is given by~\eqref{eq:w} below, or $f(t)$ can be written in the `second barycentric form'
\begin{equation}
f(t) = \frac{\sum_{i=1}^n \sum_{j=0}^{s_i-1}\sum_{k=0}^j \gamma_{ij}f^{(k)}(\tau_i)(t-\tau_i)^{k-j-1}/k!}{\sum_{i=1}^n \sum_{j=0}^{s_i-1} \gamma_{ij}(t-\tau_i)^{-j-1}}\>.
\end{equation}
Evaluating $f(t)$ or its derivatives exactly (to floating-point precision) at the nodes $t=\tau_i$ requires special treatment, but this turns out not to be difficult.
The coefficients $\gamma_{ij}$ appearing in those formulae were computed via divided differences in~\cite{Schneider:BCH:1991}. Unfortunately, the numerical stability of that
computation depends on the ordering of the nodes $\tau_i$; the method we present below appears to have superior stability, in compensation for being a constant factor slower
than the algorithm of~\cite{Schneider:BCH:1991}.
Finally, our paper makes heavy use of partial fractions: specifically, computation of partial fraction expansions in the case when the
denominator is completely factored into (possibly complex) linear factors. See, e.g.~\cite{Henrici:ACCA:1986}, although the algorithms we use go back at least to Euler (1738).
The algorithms appropriate to this case are simpler than when the factors of the denominator are not known (see e.g.~\cite{MahoneySivazlian:PFD:1983}) or when computation over
other fields than the complex numbers are needed (see e.g.~\cite{Bronstein:FPF:1993,Gathen:MCA:1999,Geddes:ACA:1992,Stoutemyer:MPFE:2008}).
It seems remarkable that the connection between partial fractions
and Hermite interpolation, which we learned from~\cite{Schneider:BCH:1991} but was `in the literature' much before that---even Turnbull~\cite{Turnbull:NPF:1927}
states that he believes the result is not new, possibly going back to Jacobi in 1841---is not better known. Recent papers apparently rediscovering the
connection include~\cite{Hou:Inversionconf:2002} and~\cite{LutherRost:ICV:2004}, which last uses it and the \interpolational definition of matrix functions~\cite{Higham:FM:2008} to evaluate
the matrix exponential in an efficient and robust way.
Asymptotically fast (though numerically unstable)
algorithms for computing the partial fraction decomposition in $O(n\log n\log \log n )$ operations date to~\cite{chin:554,kung:582}, and the FFT is
again used in~\cite{Berriochoa2009}. We do not use asymptotically fast methods, because we believe they are numerically unstable. The algorithm we use is of quadratic
cost, not quasilinear.
\section{Solution by contour integrals}\label{sec:contour}
\subsection{Statement of the problem}
We are concerned with a function~$f$ and various approximations related to it. The approximations will have order~$p$, in the sense that if~$f$ is replaced by a polynomial of degree~$p$, then the approximation will be exact. The approximations will all have the form
\begin{equation}\label{eq:hermite}
\sum_{i=1}^n \sum_{j=0}^{s_i-1} a_{ij} f^{(j)} (\tau_i) =0, \quad \deg(f) \le p.
\end{equation}
Because of its relation to Hermite interpolation, we will refer to the construction of approximations satisfying \eqref{eq:hermite} as the ``Hermite case".
In Subsection \ref{sub:hermite} we will obtain formulae for the coefficients~$a_{ij}$.
A more general problem, which we will refer to as the ``Birkhoff case", is to assume that there is missing data. In other words, we will assume that for each~$i$, $f^{(j)}(\tau_i)$ is given only for a subset~$S_i$ of $\{0,1,\dots, s_i-1\}$. We will assume that such a subset always contains $s_i-1$.
As a matter of parsimony, this will enable us to avoid the situation that $s_i$ could have been replaced by a lower integer.\footnote{Our \Maple and \Matlab codes make no such restriction, though---they simply do more work than strictly necessary and give an answer equivalent to the most parsimonious approach, although containing extra factors in numerator and denominator that cancel out.}
With this more general case, (\ref{eq:hermite}) will be replaced by
\begin{equation}\label{eq:birkhoff}
\sum_{i=1}^n \sum_{j\in S_i} \widehat a_{ij} f^{(j)} (\tau_i) =0, \quad \deg(f) \le p.
\end{equation}
By counting the number of constraints, we would hope to be able to solve (\ref{eq:hermite}) with $p=-2+\sum_{i=1}^n s_i$.
% It will be shown by construction that this is always possible, when the problem is poised SAVE FOR LATER
In the Birkhoff case, again by counting constraints, we see that it is appropriate to aim for an order
\begin{equation}
p=-2+\sum_{i=1}^n | S_i|.
\end{equation}
%\fbox{Comment on where we discuss existence etc}
Conditions under which \eqref{eq:birkhoff} is guaranteed to have a solution will be discussed in Section~\ref{sec:posed}.
\subsection{Solution using contour integrals in the Hermite case}\label{sub:hermite}
Given an approximation problem in the Hermite case, we define $w(z)$ by
\begin{equation}\label{eq:w}
w(z) = \prod_{i=1}^n (z-\tau_i)^{s_i},
\end{equation}
where the $\tau_i$ and $s_i$ are as in \eqref{eq:hermite}.
We have the following lemma.
\begin{lemma}\label{lem:contour}
Let~$f$ be a polynomial of degree not exceeding $p=-2+\sum_{i=1}^n s_i$. Then
\begin{equation}\label{eq:contour}
\frac1{2\pi i} \oint_C \frac{f(z)}{w(z)} dz=0,
\end{equation}
where $w(z)$ is defined by \eqref{eq:w} and the contour~$C$ is a circle with centre~$0$ and radius greater than $\max_{i=1}^n |\tau_i|$.
\end{lemma}
\begin{proof}
The proof is a classic result in complex analysis and is an easy consequence of (for example)~\cite[Thm.~8.1, p.~233]{Levinson:COMP:1970}.
%For~$R$ sufficiently large, we have the estimates
%\begin{equation}
%|f(z)| < c_1 R^p, \qquad |w(z)| > c_2 R^{p+2},
%\end{equation}
%for $|z|=R$ and constants $c_1$ and $c_2$, which depend on the coefficients in~$f$ and~$w$ respectively.
%Hence
%\begin{equation}
%\left|\frac1{2\pi i} \oint_C \frac{f(z)}{w(z)} dz\right| \le \frac{c_1}{c_2 R},
%\end{equation}
%which can be made arbitrarily small by choosing~$R$ large enough. Hence the integral is zero.
\qed
\end{proof}
We can now write $w(z)^{-1}$ in \eqref{eq:contour} in partial fractions
\begin{equation}\label{eq:parf}
\frac1{w(z)} = \sum_{i=1}^n \sum_{j=0}^{s_i-1} \frac{\gamma_{ij} }{(z-\tau_i)^{j+1}},
\end{equation}
so that substituting \eqref{eq:parf} into \eqref{eq:contour} and evaluating term by term, we find the result
\begin{equation}
\sum_{i=1}^n \sum_{j=0}^{s_i-1} \frac{\gamma_{ij}}{j!} f^{(j)} (\tau_i) = 0.
\end{equation}
Hence, we can solve \eqref{eq:hermite} using
\begin{equation}\label{eq:aijgamma}
a_{ij} = \frac{\gamma_{ij}}{j!}.
\end{equation}
The $\gamma_{ij}$ in \eqref{eq:aijgamma} are known as the generalized
barycentric weights in the Hermite interpolation problem~\cite{Schneider:BCH:1991}.
To evaluate the $a_{ij}$ in a convenient form, we use the following result, which is
equivalent to the local series method for computing the partial fraction expansion of $1/w(z)$.
Many sources contain this algorithm, for example~\cite[v.~1, p.~555]{Henrici:ACCA:1986}.
Faster algorithms are known, see e.g.~\cite{Schneider:BCH:1991}, but they can be
significantly less stable numerically. We include a description of the algorithm here,
for convenience; our \Matlab and \Maple implementations of this algorithm use the
recurrence relation~\eqref{eq:recursion} below.
\begin{lemma}\label{lem:gamma}
Let~$K$ denote a finite set of integers, and define $\chi_1, \chi_2,\ldots$ by the formula
\begin{equation}\label{eq:Kai}
\chi_0+\chi_1 t+\chi_2 t^2+\ldots=\chi_0\prod_{k\in K}^{}(1-t\theta_k)^{-s_k}\,,
\end{equation}
where $\chi_0$ is given.
Then, $\chi_1, \chi_2,\ldots$ satisfy the recurrence relation
\begin{equation}\label{eq:recursion}
\chi_j=\dfrac{1}{j}\displaystyle \sum_{\ell=0}^{j-1}\chi_{\ell}\phi_{j-\ell}\,,
\end{equation}
where $\phi_j=\displaystyle \sum_{k\in K}^{}s_k\theta_k^j$\,.
\end{lemma}
\begin{proof}
From the logarithmic series we have
\begin{equation}\label{eq:logseries}
\ln{(1-\theta_k t)}=-\sum_{j=1}^{\infty}\dfrac{t^j}{j}\theta_k^j\,,.
\end{equation}
For $|t|$ sufficiently small, it follows by multiplying by $-s_k$ and summing over $k\in K$, that
\begin{equation}\label{eq:logproduct}
\ln{ \Big( \prod_{k\in K}^{}(1-t\theta_k)^{-s_k}\Big)}=\sum_{j=1}^{\infty}\dfrac{t^j}{j}\phi_{j}\,,
\end{equation}
and therefore
\begin{equation}\label{eq:Kaiexp}
\chi_0+\chi_1 t+\chi_2 t^2+\cdots=\chi_0\exp{\bigg( \displaystyle \sum_{j=1}^{\infty}\dfrac{t^j}{j}\phi_j\bigg)}\,.
\end{equation}
Differentiate \eqref{eq:Kaiexp} with respect to~$t$ and it follows that
\begin{equation}\label{eq:diffexp}
\chi_1 t+2\chi_2 t+ 3\chi_3 t^2+\cdots=(\phi_1+\phi_2 t+\cdots)( \chi_0+\chi_1 t+\chi_2 t^2+\cdots )\,.
\end{equation}
Equate the coefficients of $t^{j-1}$ and \eqref{eq:recursion} follows.
\qed
\end{proof}
In our application of this lemma, let
\begin{equation*}
K=\{1,2,\dots,n\} \setminus\{i\},\hspace{2mm}\theta_k=\frac1{\tau_k-\tau_i},\hspace{2mm} \chi_0= \prod_{k\in K} (\tau_i-\tau_k)^{-s_k},
\hspace{2mm} t=z-\tau_i.
\end{equation*}
For a given $i\in \{1,2,\dots,n\}$, evaluate the Laurent expansion about $z=\tau_i$ as follows:
\begin{equation}\label{eq:Laurentexp}
\begin{aligned}
\frac1{w(z)} &=\frac1{(z-\tau_i)^{s_i}} \prod_{k\in K} (z-\tau_k)^{-s_k}\\
&= \frac1{(z-\tau_i)^{s_i}} \prod_{k\in K} \Big((\tau_i-\tau_k)\Big(1-\frac{z-\tau_i}{\tau_k-\tau_i}\Big)\Big)^{-s_k}\\
&= \frac1{(z-\tau_i)^{s_i}} \chi_0 \prod_{k\in K} (1- t \theta_k)^{-s_k}\\
&= \frac1{(z-\tau_i)^{s_i}}(\chi_0 + \chi_1 (z-\tau_i) + \chi_2 (z-\tau_i)^2 + \cdots )
\end{aligned}
\end{equation}
and hence
\begin{equation}\label{eq:alphaij}
\gamma_{ij} = \chi_{s_i-j-1}, \quad a_{ij} =\frac{ \chi_{s_i-j-1}}{j!}\>.
\end{equation}
\begin{remark}
It costs $n-2$ multiplications and $n-2$ powers to compute each $\chi_{i,0}$, of which there are $n$, making $O(n^2)$ work. It
costs $s_i(s_i+1)/2$ multiplications and additions to compute each $\chi_{i,j}$, $1 \le j \le s_i-1$. Thus the total cost is
$O(n^2) + \sum_{i=1}^n s_i(s_i+1)/2 $; examining the ways in which the degree $p$ may be partitioned into blocks of length $s_i$,
we find that the maximum of the sum occurs when one $s_i = p/3$ and all the other $s_j=1$, when the total cost is bounded by $O(p^2)$.
\end{remark}
Having found the coefficients in \eqref{eq:hermite}, we generalize to the Birkhoff case.
\subsection{Solution in the Birkhoff case}\label{sub:birkhoff}
To allow for missing data,
replace \eqref{eq:contour} by
\begin{equation}\label{eq:contourB}
\frac1{2\pi i} \oint_C \frac{B(z)f(z)}{w(z)} dz=0,
\end{equation}
where
\begin{equation}\label{eq:birkfactor}
B(z) = b_0 + b_1 z + \cdots + b_m z^m
\end{equation}
and~$m$ is the number of missing data items. The integral is still zero if~$f$ is now restricted to a polynomial of degree
\begin{equation}
p=-2-m+ \sum_{i=1}^n s_i = -2 + \sum_{i=1}^n |S_i|.
\end{equation}
The coefficients in $B(z)$, suitably normalized, are to be chosen so that, in the partial fraction expansion of
$B(z)/w(z)$, which we will write as
\begin{equation}\label{eq:parfracbw}
\frac{B(z)}{w(z)} = \sum_{i=1}^n \sum_{j=0}^{s_i-1} \frac{\widehat\gamma_{ij} }{(z-\tau_i)^{j+1}},
\end{equation}
the value of $\widehat \gamma_{ij}$ will vanish whenever $j\not\in S_i$. To solve \eqref{eq:birkhoff}, it is then possible to use a slightly modified form of Lemma \ref{lem:contour} to give the result
\begin{equation}\label{eq:gammahat}
\widehat a_{ij} = \frac{\widehat \gamma_{ij}}{j!},\quad j\in S_i.
\end{equation}
We compute the coefficients $\widehat\gamma_{ij}$ by multiplying the local series for $B(z)$ with the
partial fraction decomposition of $1/w(z)$. The coefficients are computed with Cauchy convolution,
which we write in matrix notation in the Theorem below.
%
%\framebox{Complete the remark}
%\begin{remark}
%The partial fraction expansion of \eqref{eq:parfracbw} can be represented in a matrix notation. To obtain the corresponding matrix notation we recall that in the Hermite case if the local series in \eqref{}:
%\begin{equation}
%B(z)=\beta_{i,0}+\beta_{i,1}(z-\tau_i)+\ldots+\beta_{i,s_i-1}(z-\tau_i)^{s_i-1}+O((z-\tau_i)^{s_i})\,,
%\end{equation}
%We have
%\begin{equation}
%\dfrac{B(z)}{w(z)}=\displaystyle\sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\sum_{k=0}^{j}\gamma_{ij}\beta_{ik}(z-\tau_i)^{k-j-1}
%\end{equation}
%interchanging orders of summation as in Lemma~\ref{lem:rational}
%\begin{equation}
%=\displaystyle\sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\widehat{\gamma}_{ij}(z-\tau_i)^{(-j-1)}\,,
%\end{equation}
%where
%\begin{equation}
%\widehat{\gamma}=\sum_{k=0}^{s_i-1-j}\gamma_{i,j+k}\beta_{ik}\,.
%\end{equation}
%Interpreting this in matrix form, we have
%\begin{equation}
%\mathbf{\Gamma}_i\mathbf{T}_i\mathbf{b}=
%\left[
%\begin{array}{c}
%\widehat{\gamma}_{i,0} \\
%\widehat{\gamma}_{i,1}\\
%\vdots \\
%\widehat{\gamma}_{i,s_i-1} \\
%\end{array}
%\right]
%\end{equation}
%where
%\begin{equation}
%\mathbf{\Gamma}_{i}=\left[
%\begin{array}{ccccc}
%\gamma_{i,0} & \gamma_{i,1} & \ldots & \gamma_{i,s_i-2} &\gamma_{i,s_i-1} \\
%\gamma_{i,1} & \gamma_{i,2} & \ldots &\gamma_{i,s_i-1} & 0 \\
%\vdots & & & & \vdots \\
% \gamma_{i,s_i-1}& 0 & \ldots & 0 & 0\\
%\end{array}
%\right]\,,
%\end{equation}
%Note that for $0 \leqslant j \leqslant s_i-1$ and $0 \leqslant k\leqslant s_i-1$, the $(j,k)$ entry (indexing from~$0$) of $\mathbf{\Gamma}_i$ is
%\begin{equation}
%\mathbf{\Gamma}_{i}(j,k)=\left\{
%\begin{array}{rl}
%\gamma_{i,k+j} &\quad \text{if}~k+j\leqslant s_i-1\\
%0 & \quad \text{otherwise } \\
%\end{array} \right.
%\end{equation}
%and
%\begin{equation}
%\mathbf{T}_i=\left[
%\begin{array}{ccccc}
%1 & \tau_i & \tau^2_i & \ldots & \tau_i^m \\
% & 1 & 2\tau_i & \ldots & m\tau^{m-1}_i\\
% & & 1 & \ldots & \dfrac{m(m-1)}{2!}\tau^{m-2}_i\\
% & & & \ddots & \vdots\\
% & & & & 1\\
% \end{array}
%\right]\,,
%\end{equation}
%
%Similarly,
%\begin{equation}
%\mathbf{T}_{i}(j,k)=\binom{k}{j}t^{k-j}_i
%\end{equation}
%(Recall that $\binom{k}{j}=0$, if $j>k$).
%\end{remark}
%%
%\framebox{Use Rob's handwritten notes to clarify the following theorem a bit more $\sqrt{} $}
\begin{theorem}\label{th:matrix}
For $i=1,2,\dots, n$, let $\mathbf{M}_i$ denote the $s_i \times (m+1)$ matrix
\begin{equation}\label{eq:gammaT}
\mathbf{M}_i = \mathbf{\Gamma}_i \mathbf{T}_i,
\end{equation}
where $\mathbf{\Gamma}_i$ is the $s_i \times s_i$ matrix
\begin{equation}
\mathbf{\Gamma}_{i}=\left[
\begin{array}{ccccc}
\gamma_{i,0} & \gamma_{i,1} & \ldots & \gamma_{i,s_i-2} &\gamma_{i,s_i-1} \\
\gamma_{i,1} & \gamma_{i,2} & \ldots &\gamma_{i,s_i-1} & 0 \\
\vdots & \vdots & \rotatebox{70}{$\ddots$} & \rotatebox{70}{$\ddots$} & \vdots \\
\gamma_{i,s_i-2} & \gamma_{i,s_i-1} & 0& &\\
\gamma_{i,s_i-1}& 0 & 0 & \cdots& 0\\
\end{array}
\right]\,,
\end{equation}
%\begin{equation}
%\mathbf{\Gamma}_{i}=\left[
%\begin{array}{ccccc}
%\gamma_{i,0} & \gamma_{i,1} & \ldots & \gamma_{i,s_i-2} &\gamma_{i,s_i-1} \\
%\gamma_{i,1} & \gamma_{i,2} & \ldots &\gamma_{i,s_i-1} & 0 \\
%\vdots & & & & \vdots \\
% \gamma_{i,s_i-1}& 0 & \ldots & 0 & 0\\
%\end{array}
%\right]\,,
%\end{equation}
where for $0 \leqslant j \leqslant s_i-1$ and $0 \leqslant k\leqslant s_i-1$, the $(j,k)$ entry (indexing from~$0$) of $\mathbf{\Gamma}_i$ is
\begin{equation}
\mathbf{\Gamma}_{i}(j,k)=\left\{
\begin{array}{rl}
\gamma_{i,k+j} &\quad \text{if}~k+j\leqslant s_i-1\\
0 & \quad \text{otherwise } \\
\end{array} \right.
\end{equation}
%
%with $(j,k)$ element $\Gamma_{jk} =\gamma_{i,j+k-2}$
%(or~$0$ if $j+k\ge 2+s_i$);
and $\mathbf{T}_i$ is the $s_i \times(m+1)$ matrix
\begin{equation}
\mathbf{T}_i=\left[
\begin{array}{ccccccccc}
1 & & \tau_i & & \tau^2_i & & \ldots & & \tau_i^m \\
& & 1 & & 2\tau_i & & \ldots & & m\tau^{m-1}_i\\
& & & & 1 & & \ldots & & \dfrac{m(m-1)}{2!}\tau^{m-2}_i\\
& & & & & & \ddots & & \vdots\\
\end{array}
\right]\,,
\end{equation}
with the (j,k) element
\begin{equation}
\mathbf{T}_{i}(j,k)=\binom{k}{j}t^{k-j}_i\>,
\end{equation}
and the last row allows computation of the $(s_i-1)$th derivative of~$B$ evaluated at $\tau_i$.
[Recall that $\binom{k}{j}=0$, if $j>k$.]
%, with $(j,k)$ element $T_{jk}= \binom{k-1}{j-1}\tau_i^{k-j}$ (or~$0$ if $j>k$)
Let $\Mt$ denote the $m\times(m+1)$ matrix
\begin{equation}
\Mt=
\left[
\begin{array}{c}
\Mt_1 \\
\Mt_2 \\
\vdots\\
\Mt_n
\end{array}
\right],
\end{equation}
where
$\Mt_i$ consists of rows of $\mathbf{M}_i$ such that row number~$j$ is included if and only if $j\not\in S_i$.
Then, if $\Mt$ has rank~$m$, in which case we say that the \HB\ problem is \textsl{R-regular}, for ``residue regular'',
the coefficients in \eqref{eq:birkfactor} are a non-zero solution of
\begin{equation}
\Mt {\mathbf b} =0\,, \quad \mbox{where} \quad {\mathbf b}=
\left[
\begin{array}{cccc}
b_0&
b_1&
\dots &
b_m
\end{array}
\right]^T.
\end{equation}
\end{theorem}
\begin{proof}
The proof requires finding~$m$ homogeneous linear equations in the elements of~$\mathbf b$.
This, in turn, requires finding the values of $\widehat \gamma_{ij}$ in \eqref{eq:gammahat}, in terms of~$\mathbf b$.
The first step is to write the Taylor coefficients about $\tau_i$ in terms of~$\mathbf b$. This is given by the formula
\begin{equation}\label{eq:Tib}
\left[
\begin{array}{c}
B(\tau_i)\\
B'(\tau_i)\\
\frac1{2!} B''(\tau_i)\\
\vdots\\
\frac1{(s_i-1)!} B^{(s_i-1)}(\tau_i)
\end{array}
\right]= \mathbf{T}_i {\mathbf b}.
\end{equation}
The values of $\widehat \gamma_{ij}$, for $j=0,1,\dots,s_i-1$, are found by multiplying \eqref{eq:parf} by
\begin{equation}
B(z)=B(\tau_i) + B'(\tau_i) (z-\tau_i) + \cdots + \frac1{(s_i-1)!} B^{(s_i-1)}(\tau_i) (z-\tau_i)^{s_i-1} + O((z-\tau_i)^{s_i}).
\end{equation}
[Note that some of those derivatives will be zero if $s_i > m$.]
This gives
\begin{equation}
\widehat \gamma_{ij} = \sum_{\ell=0}^{\min(j,m)} \gamma_{i\ell} \frac{B^{(\ell)}(\tau_i)}{\ell!},
\end{equation}
which can be written as
\begin{equation}
\left[
\begin{array}{c}
\widehat \gamma_{i0}\\
\widehat \gamma_{i1}\\
\vdots\\
\widehat \gamma_{i,s_i-1}\\
\end{array}
\right]=
\mathbf{\Gamma}_i \left[
\begin{array}{c}
B(\tau_i)\\
B'(\tau_i)\\
\frac1{2!} B''(\tau_i)\\
\vdots\\
\frac1{(s_i-1)!} B^{(s_i-1)}(\tau_i)
\end{array}
\right].
\end{equation}
Substitute from \eqref{eq:Tib} and use \eqref{eq:gammaT}; we find that
\begin{equation}
\left[
\begin{array}{c}
\widehat \gamma_{i0}\\
\widehat \gamma_{i1}\\
\vdots\\
\widehat \gamma_{i,s_i-1}\\
\end{array}
\right]=\mathbf{\Gamma}_i (\mathbf{T}_i {\mathbf b})
=\mathbf{M}_i {\mathbf b}
\end{equation}
and because $\widehat \gamma_{ij}=0$ for $j\not\in S_i$, it follows that $\Mt_i {\mathbf b}=0$.
Combine this result for all $i=1,2,\dots,n$ and the theorem follows.
\qed
\end{proof}
As in Subsection \ref{sub:hermite}. we conclude this investigation of the Birkhoff case by finding the coefficients in\eqref{eq:birkhoff}. These are
\begin{equation}
\widehat a_{ij} = \frac{\widehat \gamma_{ij}}{j!}, \qquad i\in \{1,2,\dots, n\}, j\in S_i.
\end{equation}
\section{Specific applications}\label{sec:appl}
\subsection{Interpolation}
Given data points as in Section \ref{sec:contour}, we can obtain a formula for the \interpolational polynomial by adding to the set $\{\tau_1, \tau_2, \dots, \tau_n\}$ the additional point $\tau_0=t$, with $s_0=1$. This means, in the Hermite case, that we make use of the contour integral \eqref{eq:contour} in which $w(z)$ is replaced by
\begin{equation}\label{eq:winterp}
w(z) = \prod_{i=0}^n (z-\tau_i)^{s_i} = (z-t) \prod_{i=1}^n (z-\tau_i)^{s_i}
\end{equation}
Following through with the construction of Subsection \ref{sub:hermite}, we find the modified form of \eqref{eq:hermite} as follows:
\begin{equation}
a_{00}(t) f(t) + \sum_{i=1}^n \sum_{j=0}^{s_i-1} a_{ij}(t) f^{(j)} (\tau_i) =0, \quad \deg(f) \le -1+\sum_{i=1}^n s_i.
\end{equation}
Assuming that $a_{00}(t)\ne 0$, this gives the interpolation formula
\begin{equation}
f(t) = \sum_{i=1}^n \sum_{j=0}^{s_i-1} \frac{-a_{ij}(t)}{a_{00}(t) } f^{(j)} (\tau_i), \quad \deg(f) \le -1+\sum_{i=1}^n s_i.
\end{equation}
\textsl{A priori} it is not clear that the expression for $f(t)$ is polynomial in~$t$; we shall discuss later, in Theorem~\ref{thm:reallypolynomial}, just why it is indeed polynomial.
%By Theorem~\ref{thm:reallypolynomial} the expression for $f(t)$ is polynomial in~$t$
In the Birkhoff case, we can do the construction described in Subsection \ref{sub:birkhoff}, again with $w(z)$ replaced by
\eqref{eq:winterp}. This gives a formula
\begin{equation}
f(t) = \sum_{i=1}^n \sum_{j\in S_i} \frac{-\widehat a_{ij}(t)}{\widehat a_{00}(t) } f^{(j)} (\tau_i), \quad \deg(f) \le -1+\sum_{i=1}^n |S_i|,
\end{equation}
again assuming that $\widehat a_{00}(t) \ne 0$. Again this is polynomial in~$t$.
If we have a particular numerical value of~$t$, or only a few such values, for which we need to evaluate~$f(t)$, then perhaps it makes sense to construct
these coefficients $\widehat a_{00}(t)$ and $\widehat a_{ij}(t)$ anew for each~$t$. If, however, we have a great many values of~$t$ for which we need to evaluate~$f$, then
perhaps it makes sense to do the construction ``once and for all''. This is indeed possible. The linear system defining the unknown coefficients of $B(z)$ then
contains a matrix and right-hand-side that depend polynomially on the symbol~$t$.
Solving this system is then a \textsl{symbolic computation}, and therefore more expensive, but still feasible in many applications. An alternative approach
that may be cheaper, filling in missing data, is discussed in Section~\ref{subsec:fillingdata}.
\begin{example}\label{ex:one}
Let $\tau = [0,r,1]$ with data given as $f(0)$, $f'(r)$, and $f(1)$. We do not specify~$r$, but for example suppose that~$r$ is a real number between~$0$ and~$1$.
Finding a polynomial of degree~$2$ that fits this data is a \HB\ problem. We begin by defining $w(z) = z(z-r)^2(z-1)$, adding a new node $\tau_0 = t$
and making all necessary changes in our notation to make this example clear, and computing the partial fraction
decomposition of
\begin{equation}
\begin{aligned}
\frac{1}{(z-t)w(z)} &= \frac {1}{t \left( r-t \right) ^{2}
\left( t-1 \right) \mathbf{ \left( z-t \right)}} + {\frac {1}{{r}^{2}t\mathbf{z}}} \\
&{}+ {\frac {1}{r \left( r-1 \right) \left( r-t \right) \mathbf{\left( z-r
\right) ^{2}}}} + {\frac {2\,r-t+2\,rt-3\,{r}^{2}}{ {r}^{2}\left( r-1
\right) ^{2} \left( r-t \right) ^{2}\mathbf{\left( z-r \right)} }}\\
&{} -{\frac {1}{ \left( r-1 \right) ^{2} \left( t-1
\right) \mathbf{ \left( z-1 \right)} }}
\>.
\end{aligned}
\end{equation}
The matrices of Theorem~\ref{th:matrix} that we need are, therefore, simple: $\Mt_0$ is empty, as is $\Mt_1$ and $\Mt_3$, but we
must have one row present in $\Mt_2$, namely
\begin{equation}
\widehat{\gamma}_{2,0}(b_0(t) + b_1(t)r) + \widehat{\gamma}_{2,1}b_1(t) = 0\>. \label{eq:residuex1}
\end{equation}
Enforcing this will ensure that the residue multiplying the unknown value of~$f$ at~$r$ will be zero. Clearly, we have one constraint and two unknowns $b_0$ and $b_1$,
each of which depend on~$t$; it is convenient to add a normalization condition, namely that the residue at $z=t$, our new symbolic node, is~$-1$. This will allow
easy isolation of the value of $f(t)$ from the resulting zero sum of residues. This residue is
\begin{equation}
\frac{b_0(t) + b_1(t)t}{w(t)} = -1\>.\label{eq:residuex1n}
\end{equation}
The determinant of the $2 \times 2$ matrix of these equations (\ref{eq:residuex1},\ref{eq:residuex1n}) is
\begin{equation}
\mathrm{Det} = \frac{2r-1}{r^2(1-r)^2}\>,
\end{equation}
which clearly identifies the problematic cases $r=0$ and $r=1$ when our formulation is structurally discontinuous, and the case $r=1/2$ when the problem is not R-regular (see
Section~\ref{sec:posed}).
If~$r$ is not~$0$, $1/2$, or~$1$, the system can be solved for the unknowns and the resulting residues computed, giving the barycentric formula
\begin{equation}
f(t) = w \left( t \right) \left( {\frac { \left( t+1-2\,r \right) f
\left( 0 \right) }{t \left( 2\,r - 1 \right) \left( t-r \right) ^{2}
}}+{\frac { f' \left( r \right) }{ \left( 2\,r-1 \right) \left( t-r \right) ^{2}}}+{\frac { \left( 2\,r-t
\right) f \left( 1 \right) }{ \left( t-r \right) ^{2} \left( 2\,r - 1
\right) \left( t-1 \right) }} \right)
\end{equation}
Notice that $w(t)=t(t-r)^2(t-1)$ contains a factor that exactly cancels each of the~$t$, $t-1$, and $(t-r)^2$ factors in each term of the barycentric formula.
The end result is a polynomial in~$t$. One could, if one wanted, rewrite the polynomial and express it in the monomial basis. We stress that doing so
is, in general, a bad idea.
\begin{remark}
When we are missing~$m$ pieces of data, we need only solve an~$m$ by~$m$ linear system, symbolic in that it contains the symbol~$t$;
here we solved an $m+1$ by $m+1$ system (symbolic both in~$t$ and in the parameter~$r$) because we chose to normalize
by making the residue at $z=t$ to be $-1$, instead of taking $B(z)$ to be monic, $B(z) = b_0(t) + z$. This is a matter of taste, which makes
no practical difference in the method.
\end{remark}
\end{example}
We now show that the solution we obtain to the \HB\ \interpolational problem is indeed a polynomial. Modifying our
notation from~\eqref{eq:winterp} slightly so that $w(z)$ refers only to the factors containing $\tau_i$, not $t$,
and so that the polynomial $B(z)$ which implicitly depends on $t$ is rewritten with the dependence explicitly noted,
we have the following.
\begin{theorem}\label{thm:reallypolynomial}
If the \HB\ problem is R-regular, write the order $p=-1+\sum_{i=1}^n$ Birkhoff interpolation formula in the form
\begin{equation}\label{eq:poly1}
f(t) = \sum_{i=1}^n \sum_{j\in S_i} \alpha_{ij}(t) f^{(j)}(\tau_i),
\end{equation}
where
\begin{equation*}
-\frac{B(t,z)w(t)}{B(t,t)(z-t)w(z)} = -\frac1{z-t} + \sum_{i=1}^n \sum_{j\in S_i}\frac{ \gamma_{ij}(t)}{(z-\tau_i)^{j+1}}
\end{equation*}
and $\alpha_{ij}(t) =\gamma_{ij}(t)/j!\>$;
then $\alpha_{ij}(t)$ is a polynomial in $t$.
\end{theorem}
{\bf Proof} The proof is by induction on $m$, starting with $m=0$, so that $B(t,z)=1$. For given $i\in \{1,2,\dots,n\}$,
the values of $\gamma_{ij}(t)$, $j=0,1,\dots,s_i-1$ are equal to the coefficients of $(z-\tau_i)^{-j-1}$ in the Laurent expansion
of $-w(t)/(z-t)w(z)$ about $z=\tau_i$. This is equal to the product of the Taylor series of
$-w(t)/(z-t)$ and the Laurent series of $1/w(z)$. This product is $w(t) A L$, where $L$ is the Laurent expansion of $1/w(z)$ and
\begin{equation*}
A=\frac1{t-\tau_i} + \frac{z-\tau_i}{(t-\tau_i)^2} + \cdots + \frac{(z-\tau_i)^{s_i-1}}{(t-\tau_i)^{s_i}}
\end{equation*}
is the Taylor series for $1/(t-z)$. Note that in $A$, it is necessary to include only $s_i$ terms because only terms of negative
exponent in $w(t) A L$ are relevant. The proof for $m=0$ concludes by observing that all the terms in $A$ cancel with $w(z)$
and that the Laurent coefficients in $L$ are independent of $t$.
If $m>1$, there exists $I\in \{1,2,\dots,n\}$ and $J.
\end{equation}
For $f $ a polynomial of degree not exceeding~$2$ we now have
\begin{equation}
0=\frac1{2\pi i} \oint_C \frac{f(z)dz}{w(z)}= -\frac13f(1) + \frac14 f(2) - \frac12f'(2) + \frac1{12} f(4)
\end{equation}
which immediately gives
\begin{equation}
f(2) = \tfrac43 f(1) + 2f'(2)-\tfrac13 f(4)\>.
\end{equation}
\end{example}
Of course, this particular problem could have been solved by other approaches as well, for example by divided differences~\cite{MilneThomson:1933}.
The general approach is to add a single pair $(I,J)$ to the set of known data, at a time. That is, we suppose we wish to identify $f^{(J)}(\tau_I)$, which is
not given by any $(i,j)$ in $1\le i \le n$, $j\in S_i$ (but $\tau_I$ is one of the \interpolational nodes, and
so we cannot use the approach of the previous section, which required that $t\ne \tau_i$ for each~$i$).
Now carry out the construction in Subsection~\ref{sub:birkhoff}, but
here we do not set to zero the row corresponding to the pair $(I,J)$; this gives us only the remaining $m-1$ conditions to satisfy.
Thus, we need a multiplier $B(z)$ of only degree $m-1$. For convenience, as in example~\ref{ex:one}, we may use a degree~$m$
multiplier and set the residue for $(I,J)$ to be $-1$, if this is possible.
The resulting sum of residues is (in general)
\begin{equation}\label{eq:fillin}
a_{I\!J} f^{(J)}(\tau_I)+ \sum_{i=1}^n \sum_{j\in S_i} a_{ij} f^{(j)}(\tau_i) =0
\end{equation}
If $a_{I\!J} \ne 0$, this will allow us to identify the (formerly) missing piece of data.
\begin{remark}
If we wish to identify \textsl{all} missing data, then we need to do this process $m$ times, once for each missing
data point. It is evident that the equations are the same for each time, with the only difference being that the
residues are, one at a time, being set to $-1$. Thus the matrix (which is $\Mt$, except that here it is $m$ by $m$ because
we take $B(z)$ to be degree $m-1$) is identical each time; only the right-hand side changes,
being a column of all zeros except one $-1$. That is, the coefficients of $B(z)$ for each missing point are given
by the columns of $-\Mt^{-1}$. Obviously, if the \HB\ problem is R-regular, this fill-in process succeeds.
Our implementations use this method, and always fill in all missing data.
\end{remark}
The connection between the \interpolational polynomial and the fill-in result given by~\eqref{eq:fillin} is as follows.
\begin{theorem}\label{th:equiv}
If $a_{I\!J}$ in \eqref{eq:fillin} is not zero, and $\varphi(t)$ is the unique \interpolational polynomial determined by the (assumed R-regular) \HB\
problem $f^{(j)}(\tau_i)$, $j\in S_i$, $i\in
\{1,2,\dots,n\}$, then
\begin{equation}\label{eq:equiv}
f^{(J)}(\tau_I) = \varphi^{(J)}(\tau_I).
\end{equation}
\end{theorem}
\begin{proof}
Since $\varphi(t)$ is a polynomial of degree not exceeding $-1+\sum_{i=1}^n|S_i|$, \eqref{eq:fillin} holds with~$f$ replaced by $\varphi$.
That is,
\begin{equation}
a_{I\!J} \varphi^{(J)}(\tau_I)+ \sum_{i=1}^n \sum_{j\in S_i} a_{ij} \varphi^{(j)}(\tau_i) =0.
\end{equation}
Because of the \interpolational property of $\varphi$, $\varphi^{(j)}(\tau_i)= f^{(j)}(\tau_i)$ for all $j\in S_i$, $i\in\{1,2,\dots,n\}$.
Hence,
\begin{equation}\label{eq:phieqf}
a_{I\!J} \varphi^{(J)}(\tau_I)+ \sum_{i=1}^n \sum_{j\in S_i} a_{ij} f^{(j)}(\tau_i) =0.
\end{equation}
Subtract \eqref{eq:phieqf} from \eqref{eq:fillin}, and the result follows.
\qed
\end{proof}
\begin{table}
\caption{\label{tab:birkex1} Relative error in a degree 23 \HB\ problem, solved in \Matlab by the fill-in method.}
\begin{verbatim}
tau = cos( (0:n)*pi/n ); % nodes
t = linspace( -1, 1, 20*n+1 );
f = @(x) sin(pi*x);
df = @(x) pi*cos(pi*x);
ddf = @(x) -pi^2*sin(pi*x);
rho = [f(tau); df(tau); ddf(tau)]; % data
rho(1,6)=NaN;
rho(1,7) = NaN;
rho(2,8)=NaN;
r = rho(:);
r = birkhoff_interp1(r,tau,3)';
[w,D] = genbarywts( tau, 3 ); % get barycentric weights
[y,yp] = hermiteval( r, t, tau, 3, w, D ); % evaluate interpolant
\end{verbatim}
\end{table}
\begin{figure}
\begin{center}
\fbox{
\scalebox{0.45}{
{
\includegraphics{sinpideg2326}
}}
}
%\includegraphics{geometriccontours.eps,height=5cm,width=5cm,angle=-90,clip=}
\end{center}
\caption{\label{fig:derzer} Relative error $y/\sin(\pi t)-1$ in a degree $23$ \HB\ interpolant (missing two function values and one derivative value).
For comparison, we plot on the same figure the relative error in the corresponding degree $26$ Hermite interpolant. One sees that the Hermite interpolant
matches all $9$ data points and their first and (less visibly) their second derivatives; one also sees in the graph which two function values
were dropped for this example, and, looking carefully, one can also see that which derivative value was dropped. The change induced by the missing data is
noticeable but not alarming. }
\end{figure}
\begin{example}
In Table~\ref{tab:birkex1} we see some \Matlab statements defining a degree~$23$ ($27 - 1$ minus $m=3$ missing data) \HB\ interpolation problem,
together with calls to \Matlab routines that we have written to solve such problems.
In Figure~\ref{fig:derzer} we see a plot of the relative error of the polynomial \textsl{evaluated as a barycentric Hermite polynomial using the recovered
(filled-in) data}. When we solved this problem using a symbolic evaluation point $t$ using only 14 decimal digits for the computation of the coefficients
and the solution of the mixed symbolic-numeric linear system, we found that the final formula contained significant numerical errors. In particular, there was a
spurious pole in the barycentric formula. Increasing the number of digits to 20 cured the problem in this instance, but indicates in general that one may expect
the `fill-in' numerical method to have greater numerical stability than the `construct the polynomial' approach. This perhaps counter-intuitive observation
will be discussed in greater detail later.
When the number of missing data is small, it is convenient as well as efficient to proceed in this manner. The \Matlab code \texttt{birkhoff\_interp1}
is available on request, and the other codes are available at \texttt{http:// www.orcca.on.ca/ TechReports/ 2007/ TR-07-05.html}.
\end{example}
%\subsection*{Filling in missing data (Rob's original draft)}
%If $f(t)$ is required at many points, it may make sense to `fill in' the missing data and thereby convert the Birkhoff problem to a pure hermite problem, for which the generalized barycentric formulae provide explicit answers. this turns out to be simple to do, as follows.
%
%If there is only one missing piece, say $f^{(J)}(\tau_I)/J!$ is not known, then we proceed as before,
%\begin{equation}
%\frac{1}{2\pi i}\oint_{c} \displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\gamma_{ij}\dfrac{f(z)}{(z-\tau_i)^{j+1}}dz=0\,,
%\end{equation}
%for all polynomials $f(z)$ of degree $\leqslant p=-2+\sum_{i=1}^{n}s_i$.
%Thus
%\begin{equation}
%\gamma_{\IJeh}\dfrac{f^{(J)}(\tau_I)}{J!}+\displaystyle \sum_{i=1}^{n}\sum_{j=0\atop{(i,j)\neq (I,J)}}^{s_i-1} a_{ij}f^{(j)}(\tau_i)=0\,,
%\end{equation}
%(where $a_{ij}=\gamma_{ij}/{j!}$ as before) giving, if $\gamma_{\IJeh}\neq 0$, a unique value for
%\begin{equation}
%f^{(J)}(\tau_I)=\dfrac{-1}{a_{\IJeh}}\displaystyle \sum_{i=1}^{n}\sum_{j=0\atop{(i,j)\neq (I,J)}}^{s_i-1}a_{ij}\,.
%f^{(j)}(\tau_i)
%\end{equation}
%If $\gamma_{\IJeh}=0$, then the problem is not poised (see section~\ref{sec:poised}).
%
%If there are two missing pieces of data, say $f^{(J)}(\tau_I)$ and $f^{(L)}(\tau_K)$, then we use, once for (I,J) and again for $(K,L)$, a $B(z)$ factor of degree~$1$ to suppress the residue at the other missing piece:
%\begin{equation}
%\dfrac{1}{2\pi i}\oint_{c}\dfrac{(z-b_0)f(z)}{w(z)}dz=0\,,
%\end{equation}
%for all polynomials of degree $\leqslant p=-3+\sum_{i=1}^{n}s_i$\,.
%We have therefore,
%\begin{equation}
%\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}a_{ij}(b_0)f^{(j)}(\tau_i)=0\,,
%\end{equation}
%and we may hope to choose $b_0$ in order to set, first, $a_{\IJeh}(b^{*}_0)=0$ whence
%\begin{equation}
%f^{(L)}(\tau_k)=
%\dfrac{-1}{a_{KL}}
%\displaystyle \sum_{i=1}^{n}
%\sum_{j=0\atop{(i,j)\neq (I,J)\atop{(i,j)\neq (K,L)}}}^{s_i-1}a_{ij}(b^{*}_0) f^{(j)}(\tau_i)\,,
%\end{equation}
%and then $b_0=\widehat{b}_0$ making
%$a_{KL}(\widehat{b}_0)=0$, giving a similar formula for $f^{(J)}(\tau_I)$.
%
%
%Now that the idea is clear, we lay out the general case, where we have~$m$ missing pieces of data in sets $j\in S_i^{c}$, $1\leqslant i \leqslant n$, as before. For each missing piece $(I,J)$ we form
%\begin{equation}
%B^{\IJeh}(z)=b_0^{\IJeh}+b_1^{\IJeh}z+\ldots +b_{m-2}^{\IJeh}z^{m-1}+z^m\,,
%\end{equation}
%and choose the $m-1$ free coefficients to suppress the residues at each of the other $m-1$ missing data locations, leading to
%\begin{equation}
%f^{(J)}(\tau_I)\dfrac{-1}{a^{\IJeh}_{\IJeh}}\displaystyle \sum_{i=1}^{n}\sum_{j\in S_i} a_{ij}^{\IJeh} f^{(j)}(\tau_i)\,.
%\end{equation}
%
%Since the $b_k^{\IJeh}$ appears linearly in the residues, the equations for choosing the $b^{\IJeh}_{k}$ will also be linear.
%Suppose the partial fraction decomposition of
%\begin{equation}
%\dfrac{B^{\IJeh}(z)}{w(z)}=\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\dfrac{\widehat{\gamma}_{ij}}{(z-\tau_i)^{j+1}}\,,
%\end{equation}
%then as before $a_{ij}^{\IJeh}=\dfrac{\widehat{\gamma}^{ij}}{j!}$. So, we need only compute the partial fraction decomposition.
%
%We do this by local Taylor series:
%\begin{equation}
%B(z)=B(\tau_i)+B'(\tau_i)(z-\tau_i)+\ldots+\dfrac{B^{(s_i-1)}}{(s_i-1)!}(\tau_i)(z-\tau_i)^{s_i-1}+\ldots
%\end{equation}
%whence
%
%\begin{equation}
%\begin{aligned}
%\dfrac{B(z)}{w(z)} & =\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\sum_{k=0}^{j}\gamma_{ij}\beta_{ik}(z-\tau_i)^{k-j-1}\\ &= \displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\sum_{j=k}^{s_i-1}\gamma_{ij}\beta_{ik}(z-\tau_i)^{k-j-1}\atop {\mu=j-k \atop{j=\mu+k}} \\ &=
%\displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\sum_{\mu=0}^{s_i-1-k}\gamma_{i,\mu+k}
%\beta_{ik}(z-\tau_i)^{-\mu-1} \\ &=
%\displaystyle \sum_{i=1}^{n}\sum_{\mu=0}^{s_i-1}\left( \sum_{k=0}^{s_i-1-\mu}\gamma_{i,\mu+k}\beta_{ik}\right)(z-\tau_i)^{-\mu-1}
%\end{aligned}
%\end{equation}
%
%\begin{equation}
%\therefore \quad
%\widehat{\gamma}_{ij}=\sum_{k=0}^{s_i-1-j}\gamma_{i,\mu+k}\beta_{ik}
%\end{equation}
%and
%\begin{equation}
%\beta_{ik}=\dfrac{B^{(k)}(\tau_i)}{k!}=\sum_{j=k}^{m}\binom{m}{j}\tau_i^{j-k}.\,b_j
%\end{equation}
%\begin{equation}
%\therefore \quad
%\widehat{\gamma}_{ij}=\displaystyle \sum_{k=0}^{s_i-1-j}\sum_{k=0}^{m}\binom{m}{j}\gamma_{i,\mu+k}\tau_i^{j-k}b_j\,.
%\end{equation}
%
%\subsection{Numerical quadrature and differentiation}
\subsection{Numerical differentiation}
Let us begin with the Lagrange case, when all confluencies $s_i = 1$, so the degree $p=n-1$. To obtain a numerical differentiation formula
\begin{equation}\label{eq:diff}
f'(\tau_0) = \sum_{i=1}^n \delta_i f(\tau_i),
\end{equation}
which is to hold whenever $f$ is a polynomial of degree not exceeding $p$,
it is possible to use the Lagrange interpolation formula
\begin{equation}\label{eq:Linterp}
f(t) = \sum_{i=1}^n \alpha_i(t) f(\tau_i)
\end{equation}
and to find the coefficients in \eqref{eq:diff} from
\begin{equation*}
\delta_i = \alpha_i'(\tau_0).
\end{equation*}
It is also possible to derive the coefficients in \eqref{eq:diff} using contour integration.
If $\tau_o=\tau_1$, then we can use the integral
\begin{equation*}
\frac1{2\pi i} \oint \frac{f(z)dz}{(z-\tau_1)^2 \prod_{i=2}^n (z-\tau_i)},
\end{equation*}
leading to the Hermite formula
\begin{equation*}
\alpha_{10} f(\tau_1) +\alpha_{11} f'(\tau_1) + \sum_{i=2}^n \alpha_{10} f(\tau_i)=0,
\end{equation*}
from which it follows that
\begin{equation*}
f'(\tau_1) = -\frac{\alpha_{10}}{\alpha_{11}}f(\tau_1) - \sum_{i=2}^n \frac{\alpha_{10}}{\alpha_{11}} f(\tau_i)
\end{equation*}
Repeating this for each $\tau_i$, $1 \le i \le n$, gives us the well-known differentiation matrix for the Lagrange
basis (see e.g.~\cite[Ch.~6]{Trefethen:SMM:2000}). The paper~\cite{Corless:BHIE:2008} used brute force to
establish the differentiation matrix for the Hermite case; a similar analysis to that done here would have
simplified the calculations considerably.
Returning to the Lagrange case, if $\tau_0 \not\in \{\tau_1, \tau_2, \dots, \tau_n\}$, then we use the integral
\begin{equation*}
\frac1{2\pi i} \oint \frac{(b_0+b_1 z)f(z)dz}{(z-\tau_0)^2 \prod_{i=1}^n (z-\tau_i)},
\end{equation*}
with $b_0$ and $b_1$ chosen so that the residue at $z=\tau_0$ is zero.
It is found that, with $b_1=1$,
\begin{equation*}
b_0 = \Big(\sum_{i=1}^n \frac1{\tau_0-\tau_i}\Big)^{-1} - \tau_0.
\end{equation*}
A short calculation then gives
\begin{equation*}
\frac{b_0+\tau_0}{w(\tau_0)} f'(\tau_0) + \sum_{i=1}^n \left( \frac{b_0+\tau_i}{(\tau_i-\tau_0)^2\prod_{j\ne i}(\tau_i-\tau_j)}\right) f(\tau_i) = 0\>,
\end{equation*}
from whence $f'(\tau_0)$ can be extracted.
\subsection{Numerical integration}
To find an integration formula
\begin{equation}\label{eq:Nint}
\int_a^b f(x) dx = \sum_{i=1}^n \lambda_i f(\tau_i),
\end{equation}
it is possible to start with \eqref{eq:Linterp}, and define the coefficients in \eqref{eq:Nint} from
\begin{equation*}
\lambda_i = \int_a^b \alpha(t) dt.
\end{equation*}
The connection to \HB\ interpolation is well-known; see e.g.~\cite{MR0318743}.
To use the contour integration approach, we can obtain the approximation
\begin{equation*}
f(b) - f(a) = \sum_{i=1}^n \gamma_i f\,'(\tau_i),
\end{equation*}
using
\begin{equation*}
\frac1{2\pi i} \oint \frac{B(z) f(z)dz}{(z-a)(z-b)P(z)^2}=\frac1{2\pi i} \oint \frac{B(z) f(z)dz}{(z-a)(z-b) \prod_{i=1}^n (z-\tau_i)^2},
\end{equation*}
with $B(z)$ a polynomial of degree $n$ chosen so that the residue of
$B(z)/(z-a)(z-b) P(z)^2$ at $z=\tau_i$ is zero for $i=1,2,\dots,n$.
In the special case, $a=-1$, $b=1$, $B(z)=1$, we can ask if it is still possible for the residues at $z=\tau_i$ to be zero, for all $i=1,2,\dots, n$.
This would be exact for $f$ a polynomial of degree $2n$ and would correspond to Gaussian quadrature. We show how to characterise this property in terms of the choice of the $\tau_i$.
Choose one of the nodes and denote it by it $\tau$. Also write $P(x) = (x-\tau)Q(x)$. It then follows that
\begin{align*}
P'(\tau) &= Q(\tau),\\
P''(\tau) &= 2 Q'(\tau).
\end{align*}
To calculate the residue at $z=\tau$ in $1/(z^2-1)P(z)^2$, substitute $z=\tau+t$ and we obtain
\begin{align*}
&\frac1{t^2(-1+\tau +t)(1+\tau+t)Q(\tau+t)^2} \\
&= \frac1{(\tau^2-1)Q(\tau)^2} \frac1{t^2} \Big(1+\frac{t}{1-\tau}\Big)
\Big(1+\frac{t}{-1-\tau}\Big)\Big(1-\frac{2t Q'(\tau)}{Q(\tau)}\Big)+ O(1).
\end{align*}
The coefficient of $t^{-1}$ will vanish if
\begin{equation*}
\frac1{1-\tau}+\frac1{-1-\tau}= \frac{2Q'(\tau)}{Q(\tau)};
\end{equation*}
that is
\begin{equation*}
\frac{2\tau}{\tau^2-1} + \frac{P''(\tau)}{P'(\tau)}=0
\end{equation*}
or
\begin{equation}\label{eq:Pdd}
(\tau^2-1) P''(\tau) + 2\tau P'(\tau)=0,
\end{equation}
which is to hold for all $\tau=\tau_i$, $i=1,2,\dots,n$.
The left hand side is a polynomial in $\tau$ of degree $n$ and is therefore a multiple of $P(\tau)$.
By matching the coefficient of $x^n$, $P$ satisfies the equation
\begin{equation*}
(x^2-1) P''(x) + 2xP'(x)-n(n+1) P(x)=0,
\end{equation*}
where the left-hand side is a polynomial of degree only $n-1$. Since this polynomial vanishes
at $n$ points, it is the zero polynomial.
Thus, to within a scaling factor, the $P(x)$ is the Legendre polynomial $P_n(x)$.
\subsection{Root-finding}
%\framebox{Write the first draft $\sqrt{}$}\\
We may wish to compute the roots of a univariate polynomial that is specified by a \HB\ interpolation problem.
There are strong numerical and structural advantages in working with the given data directly without completing the interpolation
and converting to the familiar monomial basis~\cite{Corless:LBB:2004}.
Numerically stable sets of basis-preserving root-finding algorithms, in the Hermite and Lagrange cases, have been used in~\cite{Corless:BHIE:2008}.
In a similar fashion, if we have a \HB\ interpolation problem, we do not wish to first compute a monomial basis representation for the solution
before finding the desired roots. Instead, we suggest that the approach of Section~\ref{subsec:fillingdata} be used to first fill in the missing data. Having done this, without completing the interpolation, we have converted the original problem to the Hermite case. We can then use existing algorithms to compute all roots of the given polynomial by solving a generalized eigenvalue problem. This is done by constructing a pair of matrices, known as the `companion matrix pencil'.
The finite generalized eigenvalues of this matrix pencil are the roots of the original polynomial. We demonstrate this process with an example.
\begin{example}\label{ex:three}
As in example~\ref{ex:two}, let $\tau = [1,2,4]$. Suppose also that $f(1) = 1$, $f'(2) = 0$, and $f(4) = -1$.
The `fill-in' process gives
\begin{equation}
f(2) = \tfrac43 f(1) + 2f'(2)-\tfrac13 f(4) = \tfrac{5}{3}\>.
\end{equation}
The generalized companion matrix pencil of~\cite{Corless:BHIE:2008} is
\begin{equation}
C_0 =
\left[ \begin {array}{ccccc} 1& \,& & &\,\phantom{-}1\\
\noalign{\medskip} &\,\phantom{-}2& & &\,\phantom{-}\tfrac53\\
\noalign{\medskip} &\,\phantom{-}1&\phantom{i}2& & \,\phantom{-}0\\
\noalign{\medskip} & & &\,\phantom{-}4&\,-1\\
\noalign{\medskip}\tfrac13&\,-\!\tfrac{1}4&\phantom{i}\tfrac12&\,-\!\tfrac1{12}& \,\phantom{-}0\end {array} \right]
\end{equation}
and $C_1$ is, as usual, the $5 \times 5$ identity matrix with the $(5,5)$ corner being replaced by zero. Note that the generalized
barycentric weights $\gamma_{ij}$ appear in the bottom row, and the polynomial values appear in the final column. The nodes appear in transposed Jordan blocks along
the diagonal. It is easy to see that $\det\left(tC_1 - C0\right) = f(t)$ for any~$t$.
We find the generalized eigenvalues numerically. There are three infinite eigenvalues (because~$f$ is degree~$2$ while the matrices are~$5$ by~$5$), and
two finite eigenvalues: Maple's call to LAPACK routines gets $3.58113883008418021$ and $0.418861169915851983$. [The exact values are $2\pm \sqrt{10}/2$.]
We remark that the roots were found \textsl{without} converting to a monomial basis at any time. Further, fast methods
can be stably used to find the eigenvalues~\cite{Gemignani:DQR:2008}.
\end{example}
\subsection{Rational interpolation}
The approach described in this paper can be generalized to the rational polynomial case, with fixed denominator.
This is interesting and useful because, as in~\cite{Corless:BHIE:2008}, one can choose the denominator with parameters
that can be \textsl{tuned} to ensure monotonicity, convexity, or other properties, as in~\cite{BrankinGladwell}.
In what follows, we assume $R(z)=f(z)/q(z)$ is the rational polynomial with $q(z)$ fixed and not zero at any node.
For compactness, we use superscripts for derivatives instead of the clumsier Leibniz notation:
\begin{equation}
R^{(\ell)}(\tau_i)=\dfrac{d^{\ell}}{dz^{\ell}}\left( \dfrac{f(z)}{q(z)}\right)\Big|_{\{z=\tau_{i}\}}\>.
\end{equation}
\begin{lemma} \textsl{Absorbing a denominator into the barycentric weights}.
Let $w(z)$ be defined as in \eqref{eq:w}. Suppose $\deg{q(z)}\leqslant -1+\sum_{i=1}^{n} s_i$. Put $\sigma_{ij}=\frac{q^{(j)}(\tau_i)}{j!}$ for brevity. Then
\begin{equation}
\frac{q(z)}{w(z)}=\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\dfrac{\alpha_{ij}}{(z-\tau_i)^{j+1}}
\end{equation}
where
\begin{equation}
\alpha_{ij}=\sum_{k=0}^{s_i-1-j}\gamma_{i,j+k}\sigma_{ik}\,.
\end{equation}
Moreover, if $q(\tau_i)\neq 0$, and $\gamma_{i,s_i-1}$ is as defined in \eqref{eq:alphaij} then
\begin{equation}
\alpha_{i,s_i-1}=\gamma_{i,s_i-1} \sigma_{i,0}\neq 0.
\end{equation}
%(Recall $w(z)=\prod_{i=1}^{n}(z-\tau_i)^{s_i}$, $\tau_i\neq \tau_j$, if $i\neq j$, and $\gamma_{i,s_i-1}=\prod_{j\neq i}(\tau_i-\tau_j)^{-s_i}$).
\end{lemma}
\begin{proof}
By the Hermite interpolation formula,
\begin{equation}
\dfrac{q(z)}{w(z)}=\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\sum_{k=0}^{j}\gamma_{ij}\sigma_{ik}(z-\tau_i)^{k-j-1}
\end{equation}
Interchanging the order of the second and third sums,
\begin{equation}
\dfrac{q(z)}{w(z)}=\displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\sum_{j=k}^{s_i-1}\gamma_{ij}\sigma_{ik}(z-\tau_i)^{k-j-1}\,.
\end{equation}
Putting $\ell=j-k$,
\begin{equation}
\dfrac{q(z)}{w(z)}=\displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\sum_{\ell=0}^{s_i-1-k}\gamma_{i,\ell+k}\sigma_{ik}(z-\tau_i)^{-\ell-1}
\end{equation}
Interchanging the second and third sums again and renaming the temporary variable~$\ell$ as~$j$ again, we have
\begin{equation}
\dfrac{q(z)}{w(z)}=\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\left(\sum_{k=0}^{s_i-1-j}\gamma_{i,j+k}\sigma_{ik} \right)(z-\tau_i)^{-j-1}
\end{equation}
as claimed. Now,
\begin{equation}
\alpha_{i,s_i-1}=\sum_{k=0}^{0}\gamma_{i,s_i-1+k}\sigma_{i,k}=\gamma_{i,s_i-1} \sigma_{i,0}\neq 0\,,
\end{equation}
also as claimed, since $\sigma_{i,0}=q(\tau_i)\neq 0$.
\qed
\end{proof}
%Assume henceforth $q(z)$ is fixed.
\begin{lemma}\label{lem:qfixed}
If $\deg{q(z)}\leqslant -2-m+\sum_{i=1}^n s_i$,
and if the contour~$C$ is taken large enough to enclose all $\tau_i$,
then for all polynomials $B(z)$ of degree $\leqslant m$ and all polynomials $f(z)$ of degree $\leqslant -2-m+\sum_{i=1}^{n}s_i$, we have
\begin{equation}
0 = \dfrac{1}{2\pi i}\oint_{C}\dfrac{B(z)f(z)}{w(z)}dz
=\dfrac{1}{2\pi i}\oint_{C}\dfrac{B(z)q(z)}{w(z)}\cdot\dfrac{f(z)}{q(z)}dz\>.
\end{equation}
\end{lemma}
\begin{proof}
The proof follows by degree counting and Lemma~\ref{lem:contour}.
\qed
\end{proof}
\begin{remark}
The degree restriction on $q(z)$ above is unnecessary. One can have arbitrarily large degrees for $q$, and the residues at infinity
all turn out to be zero, leaving the same expansion.
\end{remark}
Theorem~\ref{th:equiv} can be generalized to the rational polynomial case.
\begin{theorem}
Let, as before, $R(z)=f(z)/q(z)$ be the rational polynomial with $q(z)$ fixed. Fix $(I,J)$ as in Theorem~\ref{th:equiv}, thus
focussing our attention on a single missing data piece.
Use superscripts to indicate that
$B(z)=B^{\IJeh}(z)$
depends on the particular index pair $(I,J)$.
If $B(z)$ can be chosen as
\begin{equation}
B(z) = B^{\IJeh}(z)=\sum_{k=0}^{m-1}b_{k}^{\IJeh}z^k
\end{equation}
with
\begin{equation}
\dfrac{B^{\IJeh}(z)q(z)}{w(z)}=\dfrac{-1}{(z-\tau_I)^{J+1}}+\displaystyle \sum_{i=1}^{n}\sum_{j\in S_i}\dfrac{\alpha^{\IJeh}_{ij}}{(z-\tau_i)^{j+1}}\,,
\end{equation}
%\framebox{Fix the notation, we have avoided using the notation $S^c_I$ before the following $\sqrt{}$}\\
where $J\not\in S_I$, and as before~$m$ is the number of missing data items,
%\sum_{i=1}^{n}|S^c_i|$,
then
\begin{equation}
\dfrac{R^{(J)}(\tau_i)}{J!}=\displaystyle \sum_{i=1}^{n}\sum_{j\in S_i}\alpha^{\IJeh}_{ij}\rho_{ij}
\end{equation}
for every polynomial $f(z)$ of degree $\leqslant -1-m+\sum_{i=1}^{n}S_i$ and with
\begin{equation}
\dfrac{R^{(j)}(\tau_{i})}{j!}=\rho_{ij}
\end{equation}
given for $1\leqslant i \leqslant n $, $j\in S_i$\,.
\end{theorem}
\begin{proof}
Replace~$m$ with $m-1$ in Lemma~\ref{lem:qfixed} (note that the residues when $q(z)=0$ are zero), and apply the Cauchy Residue Theorem.
It follows that
\begin{equation}
\begin{aligned}
0 = &\dfrac{1}{2\pi i}\oint \dfrac{B(z)q(z)}{w(z)}\cdot \dfrac{f(z)}{q(z)}dz \\= &
\dfrac{1}{2\pi i}\oint \left(\dfrac{-1}{(z-\tau_I)^{(J+1)}}+\displaystyle \sum_{i=1}^{n}\sum_{j\in S_i}\dfrac{\alpha^{\IJeh}_{ij}}{(z-\tau_i)^{j+1}} \right) \dfrac{f(z)}{q(z)}dz
\\ =& \dfrac{-R^{(J)}(\tau_i)}{J!}+\displaystyle\sum_{i=1}^{n}\sum_{j\in S_i}\alpha^{\IJeh}_{ij}\rho_{ij}\,.
\end{aligned}
\end{equation}
\qed
\end{proof}
\begin{remark}
As in Section~\ref{subsec:fillingdata}, one may find all missing data at once by inverting a single matrix. Note also that this
matrix may be singular, depending on the denominator $q$, even if the polynomial \HB\ problem is R-regular.
\end{remark}
\section{Regularity and conditioning}\label{sec:posed}
The \HB\ interpolation problem is not always \textsl{regular}, by which is commonly meant (see e.g.~\cite[p.3]{Lorentz:BI:1983}) that the
problem has a unique solution for each given set of function data $\rho_{ij} := f^{(j)}(\tau_i)/j!$. If not, the problem is termed \textsl{singular}.
For example, if one specifies $f(0) = 0$,
$f(1) = 0$, and $f'(1/2)=0$, then \textsl{any} quadratic with zeros at $0$ and $1$ will have derivative zero half-way between; thus the solution
is not unique. Contrariwise, specifying instead $f'(0) = 1$ (say) makes the problem have no solution. In both cases the problem is singular.
Changing the node at $1/2$ to anything
different, even $1/(2+\e)$ for $\e$ small, makes the problem regular again. Thus in a certain sense the \HB\ interpolation
problem can be discontinuous, although in this instance the discontinuity is at least visible in the solution as $\e \to 0$:
\begin{equation}
f(t) = -{\frac { \left( t-1 \right) \left( 2\,t+t\e+\e \right)
f(0)}{\e}}-{\frac {t \left( t-1 \right) \left( 2+\e
\right) f'(1/(2+\e))}{\e}}+{\frac {t \left( t\e+2\,t-2
\right) f(1)}{\e}}
\end{equation}
and the solution visibly tends to infinity as $\e \to 0$. Continuity issues can be more complicated, however: see~\cite{Dyn:CBI:1982}.
The \HB\ interpolation problem can also be `structurally' singular; for example, specifying a function value at $0$ and a nonzero $2$nd derivative, but not a $1$st
derivative, does not allow a linear polynomial to fit the data.
We have shown that if an \HB\ problem is R-regular, it is regular.
It is clear that a given \HB\ interpolation problem is regular if and only if its associated matrix, obtained by deleting rows from the confluent Vandermonde matrix
(rows corresponding to missing data) and deleting the same number of final columns (corresponding to the highest degree monomials), is square and has nonzero determinant.
Numerically, one expects that \textsl{near}-singularity will be important. If the structured condition number of the associated matrix is large, one would expect
the resulting coefficients (in the monomial basis) to vary widely and erratically with perturbations in the data (and also, incidentally, with rounding errors in
the computation).
We remark that the associated matrix is not used in the algorithm of this paper, although we will use it in a proof, momentarily.
We do not express the \HB\ \interpolational polynomial in the monomial basis. Rather,
we usually express it in a barycentric Hermite form. This avoids the potential ill-conditioning of the associated matrix.
However, we do not avoid ill-conditioning altogether (of course). If the \HB\ interpolation problem is itself ill-conditioned, then the solution from our
algorithm will reflect that, and accuracy will suffer. More, the $(m-1)$ by $(m-1)$ matrix (or the $m$ by $m$ matrix) that we \textsl{do} use in our
algorithm may be ill-conditioned itself, possibly independently of the ill-conditioning of the underlying \HB\ problem. We have not proved any results
about the conditioning of this matrix, but remark that if $m \ll p$ then one would expect less difficulty by our method than by using the associated
matrix, which is $p$ by $p$.
We now discuss the relationship of the regularity of the problem with the success or failure of our algorithms, \textsl{i.e.} R-regularity.
\subsection{The case $m=1$}
Let $H_{ik}(x)$, $1\leqslant i\leqslant n$, $0\leqslant k\leqslant s_{i}-1$ be the Hermite \interpolational basis on the nodes $\tau_i$ with confluencies $s_i$. Then it is known that
\begin{equation}\label{eq:kronecker}
\dfrac{H^{(j)}_{ik}(\tau_{\ell})}{j!}=\delta_{i}^{\ell}\>\delta_{j}^{k}
\end{equation}
where $\delta_{i}^{\ell}$ is the Kronecker delta function (the factorials are the matter of convenience). Hence the solution to the Hermite interpolation problem is
\begin{equation}
f(t)=\displaystyle\sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\rho_{ij}H_{ij}(t)\,.
\end{equation}
Now, we can (by interchanging orders of summation), give an explicit formula for these $H_{ij}(t)$ (which is not new, but apparently not widely known either)
\begin{equation}
\begin{aligned}
\dfrac{f(t)}{w(t)} &= \displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1}\sum_{k=0}^{j}\gamma_{ij}\rho_{ik}(t-\tau_i)^{k-j-1}\\
&= \displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\sum_{j=k}^{s_i-1}\gamma_{ij}\rho_{ik}(t-\tau_i)^{k-j-1}\\
&= \displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\sum_{\ell=0}^{s_i-1-k}\gamma_{i,\ell+k}
\rho_{ik}(t-\tau_{i})^{-\ell-1}\\
&= \displaystyle \sum_{i=1}^{n}\sum_{k=0}^{s_i-1}\rho_{ik}\left( \sum_{\ell=0}^{s_i-1-k}\gamma_{i,\ell+k}(t-\tau_i)^{-\ell-1}\right)\>.
\end{aligned}
\end{equation}
Hence
\begin{lemma}\label{lemma:box}
\begin{equation}
H_{ik}(t)=w(t)\sum_{\ell=0}^{s_i-1-k}\gamma_{i.\ell+k}(t-\tau_i)^{-\ell-1}\>, \label{eq:hermitebasis}
\end{equation}
\end{lemma}
and
\begin{lemma}\label{lemma:boxbox}
\textsl{The leading coefficient of a basis element}
\begin{equation}
[t^p]H_{ik}(t)=\gamma_{i,k}
\end{equation}
from the $\ell=0$ term of~\eqref{eq:hermitebasis}.
\end{lemma}
\begin{remark}
A similar rearrangement, but replacing $w(t)/B(t)$ with its rational form, gives an explicit (second) barycentric expression for the \textsl{Hermite-Birkhoff} basis, as well:
\begin{equation}
B_{ik}(t)=\frac{\sum_{\ell=0}^{s_i-1-k}\widehat{\gamma}_{i,\ell+k}(t-\tau_i)^{-\ell-1}}%
{\sum_{i=1}^n \sum_{j \in S_i} \widehat{\gamma}_{i,j}(t-\tau_i)^{-j-1}}
\>. \label{eq:birkhoffbasis}
\end{equation}
Here $\widehat{\gamma}_{i,\ell+k} = 0$ if $\ell+k \not \in S_i$. This rational function is, in fact, polynomial in $t$ (ignoring rounding),
and satisfies $B_{ij}^{(\ell)}(\tau_k)/\ell! = \delta_{i}^k \delta_{j}^{\ell}$ for $1 \le i, k \le n$ and $j \in S_i$.
Because rounding errors actually do make a difference, we do not recommend forming the \HB\ basis explicitly in floating-point
arithmetic. We believe that this expression is new.
\end{remark}
\begin{lemma}
Let $\V$ be the confluent Vandermonde matrix on $\tau_i$ with confluencies $s_i$, $1\leqslant i\leqslant n$.
Then, since the $\tau_i$ are distinct and so $\V$ is nonsingular, if we put
\begin{equation}
\mathbf{A}=
\left[
\begin{array}{cc}
\V & \mathbf{R} \\
-\mathbf{T} & \boldsymbol{0} \\
\end{array}
\right]
\end{equation}
where
\begin{equation}
\mathbf{T}=\left[
\begin{array}{cccc}
1 \>& t \>& \ldots \>& t^{p} \\
\end{array}
\right]\,
\end{equation}
and
\begin{equation}
\mathbf{R}=\left[
\begin{array}{c}
\rho_{10}\\
\rho_{11}\\
\vdots\\
\rho_{n,s_n-1}\\
\end{array}
\right]\>,
\end{equation}
then we have
\begin{equation}
\det\left({\mathbf{A}}\right)=\det\left({\V}\right)\cdot\det\left(0+\mathbf{T}\V^{-1}\mathbf{R}\right)\,
\end{equation}
by the Schur complement~\cite[Ch.~4]{Hogben:HLA:2007}. Therefore,
\begin{equation}
\det\left({\mathbf{A}}\right)=\det{\left(\V\right)}f(t)
\end{equation}
because
\begin{equation}
\V^{-1}\mathbf{R}
\end{equation}
gives the coefficients of $f$ in the monomial basis.
\end{lemma}
\begin{theorem}
The \HB\ problem with a single missing datum (so $m=1$) at say $(I,J)$, is regular if and only if $\det \left(\V\right) \ne 0$ and
the coefficient of $(z-\tau_I)^{-J-1}$, that is $\gamma_{I\!J}$, is not $0$.
That is, an \HB\ problem with $m=1$ is regular if and only if it is R-regular.
\end{theorem}
\begin{proof}
Use Laplace expansion on $\det{(\mathbf{A})}$ beginning with the last column.
\begin{equation}\label{eq:laplace}
f(t)=\displaystyle \sum_{i=1}^{n}\sum_{j=0}^{s_i-1} \pm\rho_{ij}\det{\left( \V
\xleftarrow[(i,j)]{}
%\leftarrow{\atop{(i,j)}}
-\mathbf{T}
\right)}
\end{equation}
where the notation
\begin{equation}
\V
\xleftarrow[(i,j)]{}
%\leftarrow{\atop{(i,j)}}
-\mathbf{T}
\end{equation}
means replace the row of $\V$ corresponding to the $i$th node, $j$th derivative with the row of monomials indicated.
But this shows
\begin{equation}
H_{ij}(t)=
\pm\det{\left(\V
\xleftarrow[(i,j)]{}
%\leftarrow{\atop{(i,j)}}
-\mathbf{T}
\right)}\,.
\end{equation}
Now do the Laplace expansion of this $p\times p$ determinant to find the coefficient of $t^{p}$, which is $\pm \det{\left( \V_B \right)}$ where $\V_B$ is obtained from $\V$ by deleting the $(i,j)$ row and the final column. This is precisely the \HB\ $m=1$ \interpolational matrix; the problem is regular if and only if this determinant is not zero. Comparison gives
\begin{equation}
\pm \det{\left(\V_B\right)}=\det{\left(\V\right)}\gamma_{I\!J}
\end{equation}
and since $\det{\left(\V\right)}\neq 0$ the proof is complete.
\end{proof}
\begin{remark}
We now are assured that if $\gamma_{I\!J}=0$, the \HB\ problem missing only that datum is not regular.
Previously, all we had claimed was that if $\gamma_{I\!J}\neq0$ then the problem was R-regular, and hence regular.
Now we have proved that our algorithm succeeds for all $m=1$ regular \HB\ problems.
\end{remark}
\begin{example}
The $m=1$ case is only partially illuminating for the $m>1$ case.
Suppose $\tau = [0, r, s, 1]$, and that we are given $f(0)$, $f'(0)$, $f'(r)$, $f'(s)$, and $f(1)$. By taking determinants, we find that this problem is regular if
and only if
\begin{equation}
-4\,r+6\,rs+3-4\,s \ne 0\>. \label{eq:exrsreg}
\end{equation}
However, the partial fraction expansion of $1/w(z)$ has residue
\begin{equation}
{\frac {4\,r-5\,{r}^{2}-2\,s+3\,rs}{{r}^{3} \left( r-s \right) ^{3}
\left( r-1 \right) ^{2}}}
\end{equation}
at $z=r$. This coefficient can be zero, for example when $r=1/5$ and $s=3/7$, without the problem being singular. For example, specifying $f(0)=0$, $f'(0)=1$, $f'(1/5)=0$,
$f'(3/7)=0$, and $f(1)=0$ has the unique solution
\begin{equation}
{\frac {1}{105}}\,t \left( 105-346\,t+385\,{t}^{2} \right) \left( 1-t \right)\>.
\end{equation}
So we are left with the interesting observation that if we were missing only one piece of data, namely $f(1/5)$, then the \HB\ problem is singular and does
not have a unique solution; if we are \textsl{also} missing $f(3/7)$, then the problem is regular. This is, of course, because the \textsl{degrees} are different
in each case. There is no degree $5$ (at most) polynomial fitting the $6$ pieces of data $f(0)=0$, $f'(0)=1$, $f'(1/5)=0$, $f(3/7)=0$, $f'(3/7)=0$, $f(1)=0$,
but there is a degree $4$ polynomial fitting the $5$ pieces of data obtained by omitting the constraint $f(3/7)=0$.
\end{example}
\begin{remark}
Computations show that, in every case, $\det\left(\Mt\right) = \pm\det\left(\V_B\right)/\det\left({\V}\right)$. That is, the determinant of the matrix $\Mt$ arising in our algorithm
is zero precisely when the \HB\ problem is singular, so R-regular problems are regular.
The algorithm fails if our restriction $\tau_i = \tau_j \implies i = j$ is violated, so that the nodes become indistinct;
this prevents coalescence to a regular \HB\ problem, which can happen (for example, specifying $f(0)$, $f'(r)$, and $f(1)$ is a regular problem even if $r=0$,
but our formulation will break down because $\det\left(\V\right) = 0$ in that case).
\end{remark}
\section{Concluding remarks }
We have presented usually stable algorithms, of cost $O(m^3)+O(p^2)$ in the degree $p$ and number $m$ of missing data, for solving the \HB\ interpolation problem and related problems. We have provided \Matlab and \Maple code implementing our
algorithms. The method is not precisely new, in that contour
integrals have been used for interpolation problems before, as have partial fractions. We believe, however, that it is new enough, and robust and
efficient enough, to be potentially interesting. The flexibility of the algorithm (allowing solution of rational interpolation problems) is also attractive.
The detailed numerical stability properties of these algorithms remain open. We believe that the `fill-in' method is more often stable than the `semi-symbolic' method,
based on our examples. We expect that for low-confluency problems, with small $m$, our algorithms may be preferential to existing algorithms.
\begin{acknowledgements}
This work was partially supported by the Natural Engineering Research Council of Canada. Special thanks go to the University of Newcastle, Australia, for an invitation
that allowed RMC to take a side trip to visit JCB in Auckland
at a time when AS was also there. We also thank Piers Lawrence for writing the \Matlab implementation of the fill-in algorithm. Dhavide Aruliah
was helpful in early discussions of this idea.
\end{acknowledgements}
\bibliographystyle{abbrv}
\bibliography{PABV,JNAIAM}
%
%\begin{thebibliography}{}
%
% and use \bibitem to create references. Consult the Instructions
% for authors for reference list style.
%
%\bibitem{LevinsonRedheffer}
%Complex Variables, Norman Levinson and Raymond M. Redheffer
%Author, Article title, Journal, Volume, page numbers (year)
%% Format for books
%\bibitem{RefB}
%Author, Book title, page numbers. Publisher, place (year)
%% etc
%\end{thebibliography}
\end{document}
% end of file template.tex
%
% Alternative version of Lemma 2: equivalent, and was useful to find typos.
%
\begin{lemma}
\label{lem:gamma1}
\textsl{Computation of the generalized barycentric weights.}\\
\par\noindent
Write
\begin{equation}
\frac{1}{w(z)} = \frac{1}{\prod_{j=1}^n (z-\tau_j)^{s_j}} = \frac{h_i(z)}{(z-\tau_i)^{s_i}}
\end{equation}
so that
\begin{equation}
h_i(z) = \prod_{{j=1}\atop{j\ne i}}^n (z-\tau_j)^{-s_j} \>.
\end{equation}
The local Taylor series for $h_i(z)$ about $z=\tau_i$ (where it is analytic)
gives the desired local Laurent series
coefficients of $1/w(z)$, which themselves provide the full
partial fraction expansion when we do this for each $i = 1$, $2$, $\ldots$, $n$.
Put, for $\ell = 0, 1, 2, \ldots$,
\begin{equation}
u_{i,\ell} = \sum_{{j=1} \atop{ j \ne i}}^n \frac{s_j}{(\tau_j - \tau_i)^{\ell + 1}}\>.
\end{equation}
Also put
\begin{equation}
\gamma_{i,s_i-1} = h_i(\tau_i) = \prod_{{j=1}\atop{j\ne i}} (\tau_i-\tau_j)^{-s_j}\>,
\end{equation}
and $\nu_{i,0} = 1$.
Then $\gamma_{i,s_i-j} = \gamma_{i,s_i-1}\nu_{i,j-1}$
where
\begin{equation}
\nu_{i,m+1} = \frac{1}{m+1}\sum_{\ell=0}^m u_{i,\ell} \nu_{i,m-\ell} \label{eq:recursion1}
\end{equation}
for $m = 0$, $1$, $\ldots$, $s_i-2$.
\end{lemma}
\begin{proof}
Taking logarithms,
\begin{equation}
\ln h_i(t) = \sum_{j\ne i} -s_j \ln(t - \tau_j )\>.
\end{equation}
Differentiating,
\begin{equation}
\begin{split}
\frac{h_i'(t)}{h_i(t)} &= \sum_{j\ne i} \frac{-s_j}{t-\tau_j} \\
&= \sum_{j\ne i} \frac{-s_j}{\tau_i-\tau_j + t - \tau_i} \\
&= \sum_{j\ne i} \frac{s_j}{(\tau_i-\tau_j)(1-\zeta)}\>,
\end{split}
\end{equation}
where $\zeta = (t-\tau_i)/(\tau_j-\tau_i)$. Using the geometric series,
\begin{equation}
\begin{split}
\phantom{\frac{h_i'(t)}{h_i(t)}} &= \sum_{j\ne i} \sum_{\ell \ge 0} \frac{s_j}{(\tau_j-\tau_i)^{\ell+1}} \left(t-\tau_i\right)^\ell \\
&= \sum_{\ell\ge 0}\left( \sum_{j \ne i} \frac{s_j}{(\tau_j-\tau_i)^{\ell+1}}\right) (t-\tau_i)^\ell \\
&= \sum_{\ell \ge 0} u_{i,\ell}(t-\tau_i)^\ell\>.
\end{split}
\end{equation}
Then
\begin{equation}
h_i'(t) = \left( \sum_{\ell \ge 0} u_{i,\ell}(t-\tau_i)^\ell \right) h_i(t)
\end{equation}
and we may use Cauchy convolution to find a recurrence relation for the
local series coefficients of $h_i(t)$, as is standard (see, e.g.~\cite{Henrici:ACCA:1986}).
In detail, if
\begin{equation}
h_i(t) = \sum_{m\ge 0} h_i(\tau_i)\nu_{i,m} (t-\tau_i)^m \>,
\end{equation}
then (as claimed) $\nu_{i,0} = 1$, and by comparing terms in
\begin{equation}
h_i'(t) = \sum_{m\ge 0} (m+1)h_i(\tau_i) \nu_{i,m+1} (t-\tau_i)^m
\end{equation}
and
\begin{equation}
\begin{split}
h_i'(t) &= \left( \sum_{\ell \ge 0} u_{i,\ell}(t-\tau_i)^\ell \right)\left( \sum_{m\ge 0} h_i(\tau_i)\nu_{i,m} (t-\tau_i)^m \right)\\
&= \sum_{m\ge 0} h_i(\tau_i)\left( \sum_{\ell=0}^m u_{i,\ell}\nu_{i,m-\ell}\right) (t-\tau_i)^m
\end{split}
\end{equation}
the recurrence relation~\eqref{eq:recursion1} is established.
\qed
\end{proof}