@article{sun2025locno,
  title = {{Operator Learning for Earthquake Location: Methodology and Application to Out-of-Network Events in the Mendocino Triple Junction}},
  author = {Sun, Hongyu},
  journal = {Bulletin of the Seismological Society of America},
  year = {2025},
  month = oct,
  volume = {xx},
  pages = {1--20},
  issn = {1943-3573, 0037-1106},
  doi = {10.1785/0120250018},
  language = {en},
  urldate = {2025-10-15},
  file = {Sun2025BSSA.pdf}
}
Accurate location of earthquakes outside the boundaries of seismic networks is a well-known challenge due to limited azimuthal coverage. Offshore earthquakes, particularly along the Mendocino Transform fault, pose considerable risks to nearby communities. However, the lack of permanent stations in marine environments hinders accurate earthquake location. This study investigates the feasibility of locating offshore earthquakes around the Mendocino Triple Junction (MTJ) using advanced deep learning techniques. We propose the location neural operator (LocNO), an innovative operator learning framework that directly estimates earthquake locations from full-waveform data, even under sparse and nonideal network conditions. The approach integrates the Fourier neural operator and graph neural operator to capture the intricate spatiotemporal dependencies of seismic wavefields across stations and to estimate a spatial pseudoprobability density function over earthquake source coordinates (easting, northing, and depth). Trained on historical seismicity and simulated sparse network scenarios, LocNO generalizes across varying network geometries and provides robust estimates for out‐of‐network events, with the mean absolute errors on the order of 10 km horizontally and 4 km in depth. A case study of the 2017 Mw 5.7 earthquake sequence, 218 km west of Ferndale, California, demonstrates that LocNO yields robust locations for out‐of‐network earthquakes newly detected by the phase neural operator, even when recorded by only a few stations with large azimuthal gaps. These results open a new avenue for investigating MTJ seismotectonics using deep-learning-enhanced earthquake catalogs.
@article{sun_phase_2023,
  title = {Phase {Neural} {Operator} for {Multi}‐{Station} {Picking} of {Seismic} {Arrivals}},
  volume = {50},
  issn = {0094-8276, 1944-8007},
  doi = {10.1029/2023GL106434},
  language = {en},
  number = {24},
  urldate = {2024-08-08},
  journal = {Geophysical Research Letters},
  author = {Sun, Hongyu and Ross, Zachary E. and Zhu, Weiqiang and Azizzadenesheli, Kamyar},
  month = dec,
  year = {2023},
  pages = {e2023GL106434},
  file = {Sun2023GRL_withSI.pdf}
}
Seismic wave arrival time measurements form the basis for numerous downstream applications. State-of-the-art approaches for phase picking use deep neural networks to annotate seismograms at each station independently, yet human experts annotate seismic data by examining the whole network jointly. Here, we introduce a general-purpose network-wide phase picking algorithm based on a recently developed machine learning paradigm called Neural Operator. Our model, called Phase Neural Operator, leverages the spatio-temporal contextual information to pick phases simultaneously for any seismic network geometry. This results in superior performance over leading baseline algorithms by detecting many more earthquakes, picking more phase arrivals, while also greatly improving measurement accuracy. Following similar trends being seen across the domains of artificial intelligence, our approach provides but a glimpse of the potential gains from fully-utilizing the massive seismic data sets being collected worldwide.
@article{sun_beyond_2023,
  title = {Beyond {Correlations}: {Deep} {Learning} for {Seismic} {Interferometry}},
  volume = {34},
  copyright = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/IEEE.html},
  issn = {2162-237X, 2162-2388},
  shorttitle = {Beyond {Correlations}},
  doi = {10.1109/TNNLS.2022.3172385},
  number = {7},
  urldate = {2024-08-08},
  journal = {IEEE Transactions on Neural Networks and Learning Systems},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = jul,
  year = {2023},
  pages = {3385--3396},
  file = {Sun2023IEEE.pdf}
}
Passive seismic interferometry is a vastly generalized blind deconvolution question, where different paths through the Earth correspond to different channels called Green’s functions; the sources are completely incoherent and not shared by the channels, and the question is to estimate paths (channels) that are not present in the dataset. SI, turning noise to signal, has numerous applications, from monitoring industrial activities to crustal structure investigation. No standard method of signal processing will solve SI. Instead, domain scientists resort to a simple cross-correlation operation, a.k.a. correlogram, which can retrieve the Green’s function directly, but only under restrictive assumptions of ergodicity (energy equipartitioning) of the random process generating the seismic source. However, in practice, correlograms are not equal to the empirical Green’s function, because these assumptions are generally far from being satisfied in realistic situations. In the framework of supervised learning, we propose to train deep neural networks (NNs) to overcome two limitations of correlation-based SI: the temporal limitation of passive recordings and the spatial limitation of the random source distribution. Deep NNs are trained to implicitly find the relationship between the empirical Green’s functions and the correlograms and then used to extract the correct Green’s functions from ambient noise. The input of the network is correlograms (a virtual shot gather), and the desired output is the empirical Green’s function (the active shot gather). The NN can often retrieve Green’s functions from 5-min passive recordings with acceptable accuracy in our synthetic example. Although an exact estimation of the source locations may not be necessary, a prior knowledge of the source directionality (through a preliminary beamforming step) is helpful when training the NN to mitigate the challenges associated with inhomogeneous source distributions (directional wave fields). In this work, all the numerical examples are based on the retrieval of P-wave reflections in the exploration scale and are conducted on synthetic data. We use a modified ResNet in our numerical experiments.
@article{sun_learning_2023,
  title = {Learning with real data without real labels: a strategy for extrapolated full-waveform inversion with field data},
  volume = {235},
  copyright = {https://academic.oup.com/journals/pages/open\_access/funder\_policies/chorus/standard\_publication\_model},
  issn = {0956-540X, 1365-246X},
  shorttitle = {Learning with real data without real labels},
  doi = {10.1093/gji/ggad330},
  language = {en},
  number = {2},
  urldate = {2024-08-08},
  journal = {Geophysical Journal International},
  author = {Sun, Hongyu and Sun, Yen and Nammour, Rami and Rivera, Christian and Williamson, Paul and Demanet, Laurent},
  month = jul,
  year = {2023},
  pages = {1761--1777},
  file = {Sun2023GJI.pdf}
}
Full-waveform inversion (FWI) relies on low-frequency data to succeed if a good initial model is unavailable. However, field seismic data excited by active sources are typically band-limited above 3 Hz. By extrapolated FWI, we can start inversion from computational low frequencies extrapolated from band-limited data. However, low-frequency extrapolation with deep learning is challenging for field data since a neural network trained on synthetic data usually generalizes poorly on real seismic data. Here we use a semi-supervised learning method to extrapolate low frequencies for field data by training with real data without real labels. Specifically, by training CycleGAN with unpaired images of field 4–10 Hz band-limited and synthetic 0–4 Hz low-frequency shot gathers, we can extrapolate the 0–4 Hz low frequencies for the field data band-limited above 4 Hz. The source wavelet for the simulation of synthetic low-frequency data is used as the source in FWI using the extrapolated data. The inverted velocity model using only the extrapolated low frequencies is comparable to the tomography model. Our method strengthens the ability of FWI for mapping fine Earth structures by mitigating the cycle-skipping problem effectively.
@article{sun_deep_2022,
  title = {Deep {Learning} for {Low}-{Frequency} {Extrapolation} of {Multicomponent} {Data} in {Elastic} {FWI}},
  volume = {60},
  copyright = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/IEEE.html},
  issn = {0196-2892, 1558-0644},
  doi = {10.1109/TGRS.2021.3135790},
  urldate = {2024-08-08},
  journal = {IEEE Transactions on Geoscience and Remote Sensing},
  author = {Sun, Hongyu and Demanet, Laurent},
  year = {2022},
  pages = {1--11},
  file = {Sun2022IEEE.pdf}
}
Full-waveform inversion (FWI) strongly depends on an accurate starting model to succeed. This is particularly true in the elastic regime: The cycle-skipping phenomenon is more severe in elastic FWI compared to acoustic FWI due to the short S-wave wavelength. In this article, we extend our work on extrapolated FWI (EFWI) by proposing to synthesize the low frequencies of multicomponent elastic seismic records and use those “artificial” low frequencies to seed the frequency sweep of elastic FWI. Our solution involves deep learning: we can either train the same convolutional neural network (CNN) on two training datasets, one with vertical components and one with horizontal components of particle velocities, or train with two components together, to extrapolate the low frequencies of elastic data for 2-D elastic FWI. The architecture of this CNN is designed with a large receptive field by dilated convolution. Numerical examples on the Marmousi2 model show that the 2–4 Hz low-frequency data extrapolated from band-limited data above 4 Hz provide good starting models for elastic FWI of P- and S-wave velocities. In addition, we study the generalization ability of the proposed neural network from acoustic to elastic data. For elastic test data, collecting the training dataset by elastic simulation shows better extrapolation accuracy than acoustic simulation, i.e., a smaller generalization gap.
@article{sun_accelerating_2022,
  title = {Accelerating {Time}-{Reversal} {Imaging} with {Neural} {Operators} for {Real}-time {Earthquake} {Locations}},
  copyright = {Creative Commons Attribution 4.0 International},
  doi = {10.48550/ARXIV.2210.06636},
  urldate = {2024-08-08},
  publisher = {arXiv},
  author = {Sun, Hongyu and Yang, Yan and Azizzadenesheli, Kamyar and Clayton, Robert W. and Ross, Zachary E.},
  year = {2022},
  note = {arXiv preprint},
  keywords = {FOS: Physical sciences, Geophysics (physics.geo-ph)},
  annote = {Other 28 pages, 19 figures},
  file = {Sun2022Arxiv.pdf}
}
Earthquake hypocenters form the basis for a wide array of seismological analyses. Pick-based earthquake location workflows rely on the accuracy of phase pickers and may be biased when dealing with complex earthquake sequences in heterogeneous media. Time-reversal imaging of passive seismic sources with the cross-correlation imaging condition has potential for earthquake location with high accuracy and high resolution, but carries a large computational cost. Here we present an alternative deep-learning approach for earthquake location by combining the benefits of neural operators for wave propagation and time reversal imaging with multi-station waveform recordings. A U-shaped neural operator is trained to propagate seismic waves with various source time functions and thus can predict a backpropagated wavefield for each station in negligible time. These wavefields can either be stacked or correlated to locate earthquakes from the resulting source images. Compared with other waveform-based deep-learning location methods, time reversal imaging accounts for physical laws of wave propagation and is expected to achieve accurate earthquake location. We demonstrate the method with the 2D acoustic wave equation on both synthetic and field data. The results show that our method can efficiently obtain high resolution and high accuracy correlation-based time reversal imaging of earthquake sources. Moreover, our approach is adaptable to the number and geometry of seismic stations, which opens new strategies for real-time earthquake location and monitoring with dense seismic networks.
@article{sun_nanometric_2021,
  title = {Nanometric flow and earthquake instability},
  volume = {12},
  issn = {2041-1723},
  doi = {10.1038/s41467-021-26996-0},
  language = {en},
  number = {1},
  urldate = {2023-04-26},
  journal = {Nature Communications},
  author = {Sun, Hongyu and Pec, Matej},
  month = nov,
  year = {2021},
  pages = {6779},
  file = {Sun2021NC.pdf}
}
Fault zones accommodate relative motion between tectonic blocks and control earthquake nucleation. Nanocrystalline fault rocks are ubiquitous in “principal slip zones” indicating that these materials are determining fault stability. However, the rheology of nanocrystalline fault rocks remains poorly constrained. Here, we show that such fault rocks are an order of magnitude weaker than their microcrystalline counterparts when deformed at identical experimental conditions. Weakening of the fault rocks is hence intrinsic, it occurs once nanocrystalline layers form. However, it is difficult to produce “rate weakening” behavior due to the low measured stress exponent, n , of 1.3 ± 0.4 and the low activation energy, Q , of 16,000 ± 14,000 J/mol implying that the material will be strongly “rate strengthening” with a weak temperature sensitivity. Failure of the fault zone nevertheless occurs once these weak layers coalesce in a kinematically favored network. This type of instability is distinct from the frictional instability used to describe crustal earthquakes.
@article{sun_extrapolated_2020,
  title = {Extrapolated full-waveform inversion with deep learning},
  volume = {85},
  issn = {0016-8033, 1942-2156},
  doi = {10.1190/geo2019-0195.1},
  language = {en},
  number = {3},
  urldate = {2024-08-08},
  journal = {GEOPHYSICS},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = may,
  year = {2020},
  pages = {R275--R288},
  file = {Sun2020GEO.pdf}
}
The lack of low-frequency information and a good initial model can seriously affect the success of full-waveform inversion (FWI), due to the inherent cycle skipping problem. Computational low-frequency extrapolation is in principle the most direct way to address this issue. By considering bandwidth extension as a regression problem in machine learning, we have adopted an architecture of convolutional neural network (CNN) to automatically extrapolate the missing low frequencies. The band-limited recordings are the inputs of the CNN, and, in our numerical experiments, a neural network trained from enough samples can predict a reasonable approximation to the seismograms in the unobserved low-frequency band, in phase and in amplitude. The numerical experiments considered are set up on simulated P-wave data. In extrapolated FWI (EFWI), the low-wavenumber components of the model are determined from the extrapolated low frequencies, before proceeding with a frequency sweep of the band-limited data. The introduced deep-learning method of low-frequency extrapolation shows adequate generalizability for the initialization step of EFWI. Numerical examples show that the neural network trained on several submodels of the Marmousi model is able to predict the low frequencies for the BP 2004 benchmark model. Additionally, the neural network can robustly process seismic data with uncertainties due to the existence of random noise, a poorly known source wavelet, and a different finite-difference scheme in the forward modeling operator. Finally, this approach is not subject to strong assumptions on signals or velocity models of other methods for bandwidth extension and seems to offer a tantalizing solution to the problem of properly initializing FWI.
@phdthesis{sun2022learning,
  title = {Learning Seismic Waves for Imaging the Earth},
  author = {Sun, Hongyu},
  year = {2022},
  url = {https://dspace.mit.edu/handle/1721.1/144859},
  school = {Massachusetts Institute of Technology}
}
This thesis studies imaging Earth’s interior with seismic wavefields for seismic exploration and monitoring, and shows applications of deep learning in solving challenges in seismic imaging with either active or passive seismic data. For active data, we develop deep-learning methods to extrapolate missing low-frequency waves from band-limited seismograms. Low-frequency waves are essential to mitigate the cycle-skipping problem of full-waveform inversion (FWI), but data below ∼ 3 Hz are missing due to the band-limited characteristic of conventional artificial sources. Here we train convolutional neural networks to computationally extrapolate low-frequency data from bandlimited recordings so that FWI can start from the extrapolated low-frequency data. We also extend the method to elastic FWI where the cycle-skipping phenomenon is more severe compared to acoustic FWI, due to the short S-wave wavelength. Additionally, involving real seismic data in training may reduce the generalization error for the network trained only on synthetic data. We thus develop a semi-supervised learning method and train generative adversarial networks with real data without real labels. Both synthetic and field examples show that the extrapolated low frequencies can successfully initiate FWI from rough initial models. Furthermore, we show that extrapolated low frequencies may be used to increase the investigation depth of surface-wave inversion for near-surface characterization. Moving from active to passive data, we develop deep-learning methods to extract accurate Green’s functions from realistic noise environments. Seismic interferometry by cross-correlation of ambient noise may introduce spurious events in correlograms if the source distribution is inhomogeneous. Extremely long (from days to months) noise recordings are usually required for a reliable retrieval with high signal-to-noise ratio. We therefore propose a deep-learning method to overcome the spatial limitation of passive sources for the universal application of seismic interferometry and the temporal limitation of noise recording length for real-time monitoring. Collectively, we find that deep neural networks can learn to generate seismic waves under the regression framework of machine learning. We conclude that machine learning is a powerful complement to traditional computational approaches and may provide new insights into the imaging of the Earth’s structure and dynamics.
@inproceedings{sun_vsnet_2025,
  address = {Houston, Texas},
  title = {{VSNet}: {Deep}-learning inversion of velocity structures and source locations with passive seismic data},
  doi = {10.1190/image2025-4316895.1},
  language = {en},
  urldate = {2025-12-01},
  booktitle = {Fifth International Meeting for Applied Geoscience & Energy, Expanded Abstracts},
  publisher = {Society of Exploration Geophysicists},
  author = {Sun, Hongyu},
  month = dec,
  year = {2025},
  pages = {1--6},
  file = {Sun2025SEG.pdf}
}
Jointly determining source locations and velocity structures is essential in passive seismic inversion due to the coupling between hypocenters and velocity structures. Traditional methods, including travel-time tomography and full-waveform inversion (FWI), face challenges due to their sensitivity to initial models and high computational costs. Recently, deep learning has emerged as a promising alternative, offering improved efficiency and accuracy in seismic inversion. However, most existing data-driven FWI studies focus on active-source data, where the source information is known, making their direct application to passive seismic data challenging. Here I propose VSNet, a multi-task learning framework for the joint inversion of velocity structures and source locations with passive seismic data. VSNet consists of two separate sub-networks: VNet for velocity inversion and SNet for source localization, which are updated simultaneously through a soft-sharing mechanism. Using a passive seismic dataset simulated on the OpenFWI Kimberlina-CO2 velocity models, I demonstrate that VSNet can directly estimate both velocity structures and source locations from full-waveform data. The results show that VSNet achieves high accuracy in both tasks while being computationally efficient once trained, highlighting the potential of deep learning in passive seismic inversion.
@inproceedings{wu_seg_2025,
  address = {Houston, Texas},
  title = {An adaptive migration image-based CNN-based full waveform inversion (AIC-FWI)},
  doi = {10.1190/image2025-4316803.1},
  language = {en},
  urldate = {2025-12-01},
  booktitle = {Fifth International Meeting for Applied Geoscience & Energy, Expanded Abstracts},
  publisher = {Society of Exploration Geophysicists},
  author = {Wu, Yulang and Sun, Hongyu and Tian, Yapeng and McMechan, George A. and Zhou, Jing},
  month = dec,
  year = {2025},
  pages = {1--5},
  file = {Wu2025SEG.pdf}
}
We propose adaptive migration image-based CNN-FWI (AIC-FWI), an updated version of adaptive-feedback CNN-based reflection-waveform inversion (CNN-RWI), which trains a convolutional neural network (CNN) using the original velocity model and RTM image to predict the true velocity model. Unlike CNN-RWI, which requires high-resolution prior models, AIC-FWI captures features from migration images, making it suitable for inversion with a 1D velocity model. AIC-FWI offers several advantages: 1) no data fitting required, with higher-frequency data preferred; 2) self-supervised learning using pseudo-labels from the latest predicted models; 3) computational efficiency with fast convergence and scalability on GPUs/TPUs/CPU clusters. Synthetic tests on the Marmousi2 model show AIC-FWI can accurately predict velocity models from 1D initial models.
@inproceedings{sun_seismic_monitoring_seg_2025,
  address = {Houston, Texas},
  title = {Advancing seismic monitoring with operator learning},
  doi = {pending},
  language = {en},
  urldate = {2025-12-01},
  booktitle = {Fifth International Meeting for Applied Geoscience & Energy, Post-Convention Workshop W-11: Machine Learning in Geosciences in the Age of Generative AI, Expanded Abstracts},
  publisher = {Society of Exploration Geophysicists},
  author = {Sun, Hongyu},
  month = dec,
  year = {2025},
  pages = {1--5},
  file = {Sun2025SEG_workshop.pdf}
}
Seismic monitoring is essential for understanding earthquake processes and mitigating associated hazards. Traditional deep learning methods in seismology often rely on single-station inputs or fixed seismic acquisition geometries, limiting their robustness and scalability in dynamic seismic monitoring systems. Recent developments in operator learning have enabled new approaches to seismic monitoring, which address these limitations by leveraging the full spatiotemporal information contained in multi-station waveform data and generalizing across diverse seismic network configurations. This work reviews three recent contributions that illustrate these approaches: (1) the Phase Neural Operator (PhaseNO), an algorithm for multi-station picking of seismic arrivals; (2) the Location Neural Operator (LocNO), a full-waveform earthquake location algorithm; and (3) an accelerated correlation-based time-reversal imaging method that uses the Fourier Neural Operator for efficient wavefield backpropagation. Together, these methods demonstrate that operator learning can enhance the accuracy, adaptability, and computational efficiency of seismic monitoring across varying geologic settings and network configurations.
@inproceedings{sun_extrapolated_2022,
  address = {Houston, Texas},
  title = {Extrapolated surface-wave dispersion inversion},
  doi = {10.1190/image2022-3751174.1},
  language = {en},
  urldate = {2024-08-08},
  booktitle = {Second {International} {Meeting} for {Applied} {Geoscience} \& {Energy}},
  publisher = {Society of Exploration Geophysicists and American Association of Petroleum Geologists},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = aug,
  year = {2022},
  pages = {2060--2065},
  file = {Sun2022SEG.pdf}
}
We propose a surface-wave analysis method, extrapolated dispersion inversion (EDI), to image the near-surface shear-wave velocity structures beyond the penetration depth of conventional surface-wave inversion methods. Active-source surface waves are the main type of seismic data for an imaging depth of less than one kilometer. The relatively low-frequency data play an important role in dispersion inversion, by increasing the investigation depth and decreasing the inversion uncertainty. However, recorded surface-wave data from an active source generally lack low-frequency components below 3 Hz since acquiring them with an active-source survey requires considerable cost. Here, we propose to extrapolate the missing low-frequency surface waves from band-limited data so that low-frequency dispersion data can be measured. Due to the strong nonlinearity of bandwidth extension, we rely on deep learning to automately extrapolate the missing low-frequency surface waves. Numerical examples with synthetic layered models show that the extrapolated data provide additional dispersion data at long wavelengths and thus can be used to image much deeper structures compared with inversion using only band-limited data.
@inproceedings{sun_seismic_2021,
  address = {Denver, CO and virtual},
  title = {Seismic interferometry with neural networks},
  doi = {10.1190/segam2021-3594981.1},
  language = {en},
  urldate = {2024-08-08},
  booktitle = {First {International} {Meeting} for {Applied} {Geoscience} \& {Energy}, {Expanded} {Abstracts}},
  publisher = {Society of Exploration Geophysicists},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = sep,
  year = {2021},
  pages = {1384--1389},
  file = {Sun2021SEG.pdf}
}
Under the assumptions of diffuse wavefields or energy equipartitioning, theoretical studies showed that the Green’s function can be retrieved from the cross-correlation of ambient noise in seismic interferometry (SI). However, in practice, correlograms are not equal to the empirical Green’s function since the assumptions for correlation-based SI are generally not satisfied in realistic situations. In the framework of supervised learning, we propose to train deep neural networks to overcome two limitations of correlation-based SI: the temporal limitation of passive recordings, and the spatial limitation of the random source distribution. Deep neural networks are trained to implicitly find the relationship between the empirical Green’s function and the correlograms, and then used to extract the correct Green’s function from ambient noise. The input of the network is correlograms (a virtual shot gather) and the desired output is the empirical Green’s function (the active shot gather). Numerical examples show that a deep network aware of the source directionality (through a preliminary beamforming step) can help mitigate some of the challenges associated with inhomogeneous source distributions. In this work, all the numerical examples are based on the retrieval of P-wave reflections at exploration scales, and are conducted on synthetic data. Many precautions are taken to avoid the “learning crime” where the training and testing scenarios are too closely related. We use the CycleGAN architecture in all our numerical experiments.
@inproceedings{sun_elastic_2020,
  address = {Virtual},
  title = {Elastic full-waveform inversion with extrapolated low-frequency data},
  doi = {10.1190/segam2020-3428087.1},
  language = {en},
  urldate = {2024-08-08},
  booktitle = {{SEG} {Technical} {Program} {Expanded} {Abstracts} 2020},
  publisher = {Society of Exploration Geophysicists},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = sep,
  year = {2020},
  pages = {855--859},
  file = {Sun2020SEG.pdf}
}
Full waveform inversion (FWI) strongly depends on an accurate starting model to succeed. This is particularly true in the elastic regime: The cycle-skipping phenomenon is more severe in elastic FWI compared to acoustic FWI, due to the short S-wave wavelength. In this note, we extend our work on extrapolated FWI (EFWI) by proposing to synthesize the low frequencies of multi-component elastic seismic records, and use those ”artificial” low frequencies to seed the frequency sweep of elastic FWI. By leveraging deep learning technologies, we separately train two neural networks to extrapolate the low frequencies of elastic data (vertical and horizontal components of particle velocity), respectively. Numerical example on the Marmousi2 model shows that the 2-4Hz low frequency data extrapolated from band-limited data above 4Hz provide good starting models for elastic FWI of P-wave and S-wave velocities. Additionally, we study the generalization ability of the proposed neural network over different physical models. For elastic test data, collecting the training dataset by elastic simulation shows better extrapolation accuracy than acoustic simulation, i.e., a smaller generalization gap.
@inproceedings{sun_extrapolated_2019,
  address = {San Antonio, Texas},
  title = {Extrapolated full waveform inversion with convolutional neural networks},
  doi = {10.1190/segam2019-3197987.1},
  language = {en},
  urldate = {2024-08-08},
  booktitle = {{SEG} {Technical} {Program} {Expanded} {Abstracts} 2019},
  publisher = {Society of Exploration Geophysicists},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = aug,
  year = {2019},
  pages = {4962--4966},
  file = {Sun2019SEG.pdf}
}
Computational low frequency extrapolation is in principle the most direct way to address the cycle skipping problem in full waveform inversion (FWI). We propose a method of extrapolated full waveform inversion (EFWI), where FWI is allowed to make use of data augmented by increasing its frequency band with a convolutional neural network (CNN). In extrapolated FWI with CNN (EFWI-CNN), the low-wavenumber components of the model are determined from the extrapolated low frequencies, before proceeding with a frequency sweep of the bandlimited data. The proposed deep-learning method of low-frequency extrapolation shows adequate generalizability for the initialization step of EFWI. Numerical examples show that the neural network trained on several submodels of the Marmousi model is able to predict the low frequencies for the BP 2004 benchmark model. Additionally, the neural network can robustly process seismic data with uncertainties due to the existence of noise, unknown source wavelet, and different finite-difference scheme in the forward modeling operator.
@inproceedings{sun_lowfrequency_2018,
  address = {Anaheim, California},
  title = {Low-frequency extrapolation with deep learning},
  doi = {10.1190/segam2018-2997928.1},
  language = {en},
  urldate = {2024-08-08},
  booktitle = {{SEG} {Technical} {Program} {Expanded} {Abstracts} 2018},
  publisher = {Society of Exploration Geophysicists},
  author = {Sun, Hongyu and Demanet, Laurent},
  month = aug,
  year = {2018},
  pages = {2011--2015},
  file = {Sun2018SEG.pdf}
}
The lack of the low frequency information and good initial model can seriously affect the success of full waveform inversion (FWI) due to the inherent cycle skipping problem. Reasonable and reliable low frequency extrapolation is in principle the most direct way to solve this problem. In this paper, we propose a deep-learning-based bandwidth extension method by considering low frequency extrapolation as a regression problem. The Deep Neural Networks (DNNs) are trained to automatically extrapolate the low frequencies without preprocessing steps. The band-limited recordings are the inputs of the DNNs and, in our numerical experiments, the pretrained neural networks can predict the continuous-valued seismograms in the unobserved low frequency band. For the numerical experiments considered here, it is possible to find the amplitude and phase correlations among different frequency components by training the DNNs with enough data samples, and extrapolate the low frequencies from the band-limited seismic records trace by trace. The synthetic example shows that our approach is not subject to the structural limitations of other methods to bandwidth extension, and seems to offer a tantalizing solution to the problem of properly initializing FWI.
@inproceedings{sun_3d_2016,
  address = {Online,},
  title = {{3D} {Anisotropic} {Elastic} {Wave} {Illumination} and {Target}-oriented {Visibilty} {Analysis}},
  doi = {10.3997/2214-4609.201600856},
  language = {en},
  urldate = {2024-08-08},
  booktitle = {78th {EAGE} {Conference} and {Exhibition} 2016},
  publisher = {European Association of Geoscientists \& Engineers},
  author = {Sun, H.Y. and Han, L.G. and Zhang, T.Z. and Zhang, F.J.},
  year = {2016},
  pages = {1--5}
}
3D wave equation illumination analysis utilizing the prior model information adequately is a good way to design acquisition geometry and reduce the high risk and costs of 3D seismic exploration. In this paper, the anisotropic elastic wave illumination method is proposed to improve the exploration accuracy of complex area with anisotropy which is usually found in lithosphere. On the basis of mathematical statistics, we define the whole illumination intensity of target zone excited from one shot and scattered to one receiver as the visibility of the single shot-receiver pair. Then we can obtain the visibility of acquisition geometry for specific subsurface structure during the procedure of illumination analysis. This illumination based visibility analysis method quantify the illumination contribution of different shot-receiver pairs to objective imaging, which is a good way to optimize the seismic survey and improve the exploration accuracy without increasing exploration costs.
@inproceedings{sun_full_2015,
  address = {Madrid, Spain},
  title = {Full {Waveform} {Inversion} with the {Adaptive} {Illumination} {Compensation}},
  doi = {10.3997/2214-4609.201412462},
  urldate = {2024-08-08},
  booktitle = {77th {EAGE} {Conference} and {Exhibition} 2015},
  author = {Sun, H. and Han, L. and Gong, X. and Zhang, F.},
  year = {2015},
  pages = {1--3}
}
Full waveform inversion has become more and more popular in both academic research and commercial production in recent years. However, the lack of low frequency and maximum offset limited its field application. In this study a new FWI method taking adaptive illumination compensation into consideration is purposed to solve the limited maximum offset problem. Two-way wave equation illumination analysis method is applied to adaptively compensate and balance the gradients. The synthetic data test shows that our method can enhance the FWI iteration converge stability in poor illuminated area and provide better image at the same time.
 Email: hsun2@utep.edu 
 Last updated on Oct 15, 2025
500 W University
 El Paso, Texas 79902
© 2025 Hongyu Sun