@article{Sapienza_Gallo_Bolibar_Pérez_Taylor_2025,
  title        = {Spherical Path Regression Through Universal Differential Equations With Applications to Paleomagnetism},
  volume       = {2},
  issn         = {2993-5210},
  doi          = {10.1029/2025jh000626},
  abstractnote = {Directional data analysis plays a central role in paleomagnetism, where observations lie on a spherical surface. Existing methods for analyzing directional data often fail to incorporate prior physical knowledge about plate geodynamics, significantly constraining their potential. To address this limitation, we developed a hybrid, physics-informed machine learning model that uses a neural network to learn the underlying rotations responsible for generating directional data. Our method robustly captures the time-dependent variability of directional data in both synthetic and real paleomagnetic data sets. Additionally, by leveraging in the differentiable programming paradigm, we can incorporate physical constraints in the form of regularizations. These results have the potential to improve future estimations of apparent polar wander paths, advancing the reconstruction of past tectonic plate motions. The history of plate tectonic motion can be reconstructed using apparent polar wander paths, which describe the motion of a tectonic plate relative to the Earth’s spin axis over time. Estimating these paths—and the underlying movement of a plate on the Earth’s surface—requires interpreting noisy magnetic signals preserved in rocks when they form, a problem known as inverse modeling. In this work, we develop a new methodology for estimating these paths by modeling directional observations as solutions to differential equations. Simultaneously, our method allows to include geophysical constraints in the model. The inverse modeling is enabled by the use of differentiable programming, a computer paradigm that allows to evaluate the sensitivity of the apparent polar wander path with respect to changes in the underlying plate rotations. We demonstrate the performance of our method using both synthetic and real-world examples, and we introduce a computational library to allow users to use our method. Path regression plays a central role in the reconstruction of plate tectonics movement Machine learning with differential equations constraints can be used for sphere regression with directional data We introduce an open source library for path regression with geophysical regularizations Path regression plays a central role in the reconstruction of plate tectonics movement Machine learning with differential equations constraints can be used for sphere regression with directional data We introduce an open source library for path regression with geophysical regularizations},
  number       = {4},
  journal      = {Journal of Geophysical Research: Machine Learning and Computation},
  author       = {Sapienza, F. and Gallo, L. C. and Bolibar, J. and Pérez, F. and Taylor, J.},
  year         = {2025}
}

@article{Sapienza_2024,
  title        = {Differentiable Programming for Differential Equations: A Review},
  doi          = {10.48550/arxiv.2406.09699},
  abstractnote = {The differentiable programming paradigm is a cornerstone of modern scientific computing. It refers to numerical methods for computing the gradient of a numerical model’s output. Many scientific models are based on differential equations, where differentiable programming plays a crucial role in calculating model sensitivities, inverting model parameters, and training hybrid models that combine differential equations with data-driven approaches. Furthermore, recognizing the strong synergies between inverse methods and machine learning offers the opportunity to establish a coherent framework applicable to both fields. Differentiating functions based on the numerical solution of differential equations is non-trivial. Numerous methods based on a wide variety of paradigms have been proposed in the literature, each with pros and cons specific to the type of problem investigated. Here, we provide a comprehensive review of existing techniques to compute derivatives of numerical solutions of differential equations. We first discuss the importance of gradients of solutions of differential equations in a variety of scientific domains. Second, we lay out the mathematical foundations of the various approaches and compare them with each other. Third, we cover the computational considerations and explore the solutions available in modern scientific software. Last but not least, we provide best-practices and recommendations for practitioners. We hope that this work accelerates the fusion of scientific models and data, and fosters a modern approach to scientific modelling.},
  journal      = {arXiv},
  author       = {Sapienza, Facundo and Bolibar, Jordi and Schäfer, Frank and Groenke, Brian and Pal, Avik and Boussange, Victor and Heimbach, Patrick and Hooker, Giles and Pérez, Fernando and Persson, Per-Olof and Rackauckas, Christopher},
  year         = {2024}
}

@article{Bolibar_Sapienza_2023,
  title        = {Universal differential equations for glacier ice flow modelling},
  volume       = {16},
  doi          = {10.5194/gmd-16-6671-2023},
  abstractnote = {Geoscientific models are facing increasing challenges to exploit growing datasets coming from remote sensing. Universal differential equations (UDEs), aided by differentiable programming, provide a new scientific modelling paradigm enabling both complex functional inversions to potentially discover new physical laws and data assimilation from heterogeneous and sparse observations. We demonstrate an application of UDEs as a proof of concept to learn the creep component of ice flow, i.e. a nonlinear diffusivity differential equation, of a glacier evolution model. By combining a mechanistic model based on a two-dimensional shallow-ice approximation partial differential equation with an embedded neural network, i.e. a UDE, we can learn parts of an equation as nonlinear functions that then can be translated into mathematical expressions. We implemented this modelling framework as ODINN.jl, a package in the Julia programming language, providing high performance, source-to-source automatic differentiation (AD) and seamless integration with tools and global datasets from the Open Global Glacier Model in Python. We demonstrate this concept for 17 different glaciers around the world, for which we successfully recover a prescribed artificial law describing ice creep variability by solving ∼ 500 000 ordinary differential equations in parallel. Furthermore, we investigate which are the best tools in the scientific machine learning ecosystem in Julia to differentiate and optimize large nonlinear diffusivity UDEs. This study represents a proof of concept for a new modelling framework aiming at discovering empirical laws for large-scale glacier processes, such as the variability in ice creep and basal sliding for ice flow, and new hybrid surface mass balance models.},
  number       = {22},
  journal      = {Geoscientific Model Development},
  author       = {Bolibar, Jordi and Sapienza, Facundo and Maussion, Fabien and Lguensat, Redouane and Wouters, Bert and Pérez, Fernando},
  year         = {2023},
  pages        = {6671–6687}
}

@misc{hastie2009elements,
  title     = {The elements of statistical learning},
  author    = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome and others},
  year      = {2009},
  publisher = {Springer series in statistics New-York}
}

@book{pml1Book,
  author    = {Kevin P. Murphy},
  title     = {Probabilistic Machine Learning: An introduction},
  publisher = {MIT Press},
  year      = 2022,
  url       = {http://probml.github.io/book1}
}

@article{ramsay2017dynamic,
  title     = {Dynamic data analysis},
  author    = {Ramsay, James and Hooker, Giles},
  journal   = {Springer New York, New York, NY. doi},
  volume    = {10},
  pages     = {978--1},
  year      = {2017},
  publisher = {Springer}
}

@article{thuerey2021physics,
  title   = {Physics-based deep learning},
  author  = {Thuerey, Nils and Holl, Philipp and Mueller, Maximilian and Schnell, Patrick and Trost, Felix and Um, Kiwon},
  journal = {arXiv preprint arXiv:2109.05237},
  year    = {2021}
}

 @article{Karniadakis_Kevrekidis_Lu_Perdikaris_Wang_Yang_2021,
  title        = {Physics-informed machine learning},
  volume       = {3},
  doi          = {10.1038/s42254-021-00314-5},
  abstractnote = {Despite great progress in simulating multiphysics problems using the numerical discretization of partial differential equations (PDEs), one still cannot seamlessly incorporate noisy data into existing algorithms, mesh generation remains complex, and high-dimensional problems governed by parameterized PDEs cannot be tackled. Moreover, solving inverse problems with hidden physics is often prohibitively expensive and requires different formulations and elaborate computer codes. Machine learning has emerged as a promising alternative, but training deep neural networks requires big data, not always available for scientific problems. Instead, such networks can be trained from additional information obtained by enforcing the physical laws (for example, at random points in the continuous space-time domain). Such physics-informed learning integrates (noisy) data and mathematical models, and implements them through neural networks or other kernel-based regression networks. Moreover, it may be possible to design specialized network architectures that automatically satisfy some of the physical invariants for better accuracy, faster training and improved generalization. Here, we review some of the prevailing trends in embedding physics into machine learning, present some of the current capabilities and limitations and discuss diverse applications of physics-informed learning both for forward and inverse problems, including discovering hidden physics and tackling high-dimensional problems. The rapidly developing field of physics-informed learning integrates data and mathematical models seamlessly, enabling accurate inference of realistic and high-dimensional multiphysics problems. This Review discusses the methodology and provides diverse examples and an outlook for further developments. Physics-informed machine learning integrates seamlessly data and mathematical physics models, even in partially understood, uncertain and high-dimensional contexts.Kernel-based or neural network-based regression methods offer effective, simple and meshless implementations.Physics-informed neural networks are effective and efficient for ill-posed and inverse problems, and combined with domain decomposition are scalable to large problems.Operator regression, search for new intrinsic variables and representations, and equivariant neural network architectures with built-in physical constraints are promising areas of future research.There is a need for developing new frameworks and standardized benchmarks as well as new mathematics for scalable, robust and rigorous next-generation physics-informed learning machines. Physics-informed machine learning integrates seamlessly data and mathematical physics models, even in partially understood, uncertain and high-dimensional contexts. Kernel-based or neural network-based regression methods offer effective, simple and meshless implementations. Physics-informed neural networks are effective and efficient for ill-posed and inverse problems, and combined with domain decomposition are scalable to large problems. Operator regression, search for new intrinsic variables and representations, and equivariant neural network architectures with built-in physical constraints are promising areas of future research. There is a need for developing new frameworks and standardized benchmarks as well as new mathematics for scalable, robust and rigorous next-generation physics-informed learning machines.},
  number       = {6},
  journal      = {Nature Reviews Physics},
  author       = {Karniadakis, George Em and Kevrekidis, Ioannis G and Lu, Lu and Perdikaris, Paris and Wang, Sifan and Yang, Liu},
  year         = {2021},
  pages        = {422–440}
}

 @article{Raissi_Perdikaris_Karniadakis_2019,
  title        = {Physics-informed neural networks: A deep learning framework for solving forward and inverse problems involving nonlinear partial differential equations},
  volume       = {378},
  issn         = {0021-9991},
  doi          = {10.1016/j.jcp.2018.10.045},
  abstractnote = { We introduce physics-informed neural networks – neural networks that are trained to solve supervised learning tasks while respecting any given laws of physics described by general nonlinear partial differential equations. In this work, we present our developments in the context of solving two main classes of problems: data-driven solution and data-driven discovery of partial differential equations. Depending on the nature and arrangement of the available data, we devise two distinct types of algorithms, namely continuous time and discrete time models. The first type of models forms a new family of data-efficient spatio-temporal function approximators, while the latter type allows the use of arbitrarily accurate implicit Runge–Kutta time stepping schemes with unlimited number of stages. The effectiveness of the proposed framework is demonstrated through a collection of classical problems in fluids, quantum mechanics, reaction–diffusion systems, and the propagation of nonlinear shallow-water waves.},
  journal      = {Journal of Computational Physics},
  author       = {Raissi, M. and Perdikaris, P. and Karniadakis, G.E.},
  year         = {2019},
  pages        = {686–707}
}

 @article{rackauckas2020universal,
  title   = {Universal differential equations for scientific machine learning},
  author  = {Rackauckas, Christopher and Ma, Yingbo and Martensen, Julius and Warner, Collin and Zubov, Kirill and Supekar, Rohit and Skinner, Dominic and Ramadhan, Ali and Edelman, Alan},
  journal = {arXiv preprint arXiv:2001.04385},
  year    = {2020}
}

@article{blondel2024elements,
  title   = {The elements of differentiable programming},
  author  = {Blondel, Mathieu and Roulet, Vincent},
  journal = {arXiv preprint arXiv:2403.14606},
  year    = {2024}
}

@article{chen2018neural,
  title   = {Neural ordinary differential equations},
  author  = {Chen, Ricky TQ and Rubanova, Yulia and Bettencourt, Jesse and Duvenaud, David K},
  journal = {Advances in neural information processing systems},
  volume  = {31},
  year    = {2018}
}

@book{goodfellow2016deep,
  title     = {Deep learning},
  author    = {Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua},
  volume    = {1},
  number    = {2},
  year      = {2016},
  publisher = {MIT press Cambridge}
}

@article{Rackauckas_Nie_2016,
  title   = {DifferentialEquations.jl – A Performant and Feature-Rich Ecosystem for Solving Differential Equations in Julia},
  volume  = {5},
  issn    = {2049-9647},
  doi     = {10.5334/jors.151},
  number  = {1},
  journal = {Journal of Open Research Software},
  author  = {Rackauckas, Christopher and Nie, Qing},
  year    = {2016},
  pages   = {15}
}

@article{Butcher_Wanner_1996,
  title   = {Runge-Kutta methods: some historical notes},
  volume  = {22},
  issn    = {0168-9274},
  doi     = {10.1016/s0168-9274(96)00048-7},
  number  = {1–3},
  journal = {Applied Numerical Mathematics},
  author  = {Butcher, J.C. and Wanner, G.},
  year    = {1996},
  pages   = {113–151}
}

@book{hairer-solving-1,
  author    = {Hairer, Ernst and Wanner, Gerhard and Nørsett, Syvert},
  title     = {Solving Ordinary Differential Equations I: Nonstiff Problems (Second Revised Edition)},
  publisher = {Springer Berlin Heidelberg New York},
  year      = {2008}
}

@book{hairer-solving-2,
  author    = {Wanner, Gerhard and Hairer, Ernst},
  title     = {Solving ordinary differential equations II},
  volume    = {375},
  year      = {1996},
  publisher = {Springer Berlin Heidelberg New York}
}

@book{ascher2008numerical,
  title     = {Numerical methods for evolutionary differential equations},
  author    = {Ascher, Uri M},
  year      = {2008},
  publisher = {SIAM}
}

@article{Kim_Ji_Deng_Ma_Rackauckas_2021,
  title        = {Stiff neural ordinary differential equations},
  volume       = {31},
  issn         = {1054-1500},
  doi          = {10.1063/5.0060697},
  abstractnote = {Neural Ordinary Differential Equations (ODEs) are a promising approach to learn dynamical models from time-series data in science and engineering applications. This work aims at learning neural ODEs for stiff systems, which are usually raised from chemical kinetic modeling in chemical and biological systems. We first show the challenges of learning neural ODEs in the classical stiff ODE systems of Robertson’s problem and propose techniques to mitigate the challenges associated with scale separations in stiff systems. We then present successful demonstrations in stiff systems of Robertson’s problem and an air pollution problem. The demonstrations show that the usage of deep networks with rectified activations, proper scaling of the network outputs as well as loss functions, and stabilized gradient calculations are the key techniques enabling the learning of stiff neural ODEs. The success of learning stiff neural ODEs opens up possibilities of using neural ODEs in applications with widely varying time-scales, such as chemical dynamics in energy conversion, environmental engineering, and life sciences.},
  number       = {9},
  journal      = {Chaos: An Interdisciplinary Journal of Nonlinear Science},
  author       = {Kim, Suyong and Ji, Weiqi and Deng, Sili and Ma, Yingbo and Rackauckas, Christopher},
  year         = {2021},
  pages        = {093122}
}

@article{Ruder_2016, 
  title        = {An overview of gradient descent optimization algorithms}, 
  abstractNote = {Gradient descent optimization algorithms, while increasingly popular, are often used as black-box optimizers, as practical explanations of their strengths and weaknesses are hard to come by. This article aims to provide the reader with intuitions with regard to the behaviour of different algorithms that will allow her to put them to use. In the course of this overview, we look at different variants of gradient descent, summarize challenges, introduce the most common optimization algorithms, review architectures in a parallel and distributed setting, and investigate additional strategies for optimizing gradient descent.}, 
  journal      = {arXiv}, 
  author       = {Ruder, Sebastian}, 
  year         = {2016}
}

@Article{Lu_Pestourie_Yao_Wang_Verdugo_Johnson_2021,
  title={Physics-Informed Neural Networks with Hard Constraints for Inverse Design},
  volume={43},
  ISSN={1064-8275},
  DOI={10.1137/21m1397908},
  number={6},
  journal={SIAM Journal on Scientific Computing},
  author={Lu, Lu and Pestourie, Raphaël and Yao, Wenjie and Wang, Zhicheng and Verdugo, Francesc and Johnson, Steven G},
  year={2021},
  pages={B1105–B1132}
}

@book{boyd2004convex,
  title={Convex optimization},
  author={Boyd, Stephen and Vandenberghe, Lieven},
  year={2004},
  publisher={Cambridge university press}
}