Watermarking images is critical for tracking image provenance and proving ownership. With the advent of generative models, such as stable diffusion, that can create fake but realistic images, watermarking has become particularly important to make human-created images reliably identifiable. Unfortunately, the very same stable diffusion technology can remove watermarks injected using existing methods. To address this problem, we present ZoDiac, which uses a pre-trained stable diffusion model to inject a watermark into the trainable latent space, resulting in watermarks that can be reliably detected in the latent vector even when attacked. We evaluate ZoDiac on three benchmarks, MS-COCO, DiffusionDB, and WikiArt, and find that ZoDiac is robust against state-of-the-art watermark attacks, with a watermark detection rate above 98% and a false positive rate below 6.4%, outperforming state-of-the-art watermarking methods. We hypothesize that the reciprocating denoising process in diffusion models may inherently enhance the robustness of the watermark when faced with strong attacks and validate the hypothesis. Our research demonstrates that stable diffusion is a promising approach to robust watermarking, able to withstand even stable-diffusion--based attack methods. ZoDiac is open-sourced and available at https://github.com/zhanglijun95/ZoDiac/.
@inproceedings{Zhang24neurips, author = {Lijun Zhang and Xiao Liu and Antoni Viros i Martin and Cindy Xiong Bearfield and Yuriy Brun and Hui Guan}, title = {\href{http://people.cs.umass.edu/brun/pubs/pubs/Zhang24neurips.pdf}{Attack-Resilient Image Watermarking Using Stable Diffusion}}, booktitle = {Proceedings of the 38th Annual Conference on Neural Information Processing Systems (NeurIPS), Advances in Neural Information Processing Systems 38}, venue = {NeurIPS}, address = {Vancouver, BC, Canada}, month = {December}, date = {9--15}, year = {2024}, abstract = {<p>Watermarking images is critical for tracking image provenance and proving ownership. With the advent of generative models, such as stable diffusion, that can create fake but realistic images, watermarking has become particularly important to make human-created images reliably identifiable. Unfortunately, the very same stable diffusion technology can remove watermarks injected using existing methods. To address this problem, we present ZoDiac, which uses a pre-trained stable diffusion model to inject a watermark into the trainable latent space, resulting in watermarks that can be reliably detected in the latent vector even when attacked. We evaluate ZoDiac on three benchmarks, MS-COCO, DiffusionDB, and WikiArt, and find that ZoDiac is robust against state-of-the-art watermark attacks, with a watermark detection rate above 98% and a false positive rate below 6.4%, outperforming state-of-the-art watermarking methods. We hypothesize that the reciprocating denoising process in diffusion models may inherently enhance the robustness of the watermark when faced with strong attacks and validate the hypothesis. Our research demonstrates that stable diffusion is a promising approach to robust watermarking, able to withstand even stable-diffusion--based attack methods. ZoDiac is open-sourced and available at https://github.com/zhanglijun95/ZoDiac/.</p>}, accept = {$\frac{4,036}{15,671} \approx 26\%$}, fundedBy = {NSF CCF-2210243, NSF DMS-2220211, NSF CNS-2224054, NSF CNS-2338512, NSF CNS-2312396}, }