无恶意采集，取部分图片用来做相册测试的😄

效果图
在这里插入图片描述

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;
import com.la.selenium.utils.SeleniumUtil;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;/*** 功能描述** @author jason*/
@Slf4j
public class ImageSpider {private static String url;private static final String imagePath = "/data/pixabay/photos/image_url.txt";public static void main(String[] args) {// String url = "https://pixabay.com/zh/illustrations/search/?order=ec";// String url = "https://pixabay.com/zh/illustrations/search/?order=ec&pagi=2";String urlTemplate = "https://pixabay.com/zh/photos/search/?order=ec&pagi={page}";FileUtil.writeUtf8String("", imagePath);SeleniumUtil.exec((webDriver) -> {for (int i = 1; i <= 100; i++) {int finalI = i;url = StrUtil.format(urlTemplate, new HashMap<String, Object>() {{put("page", finalI);}});if (i == 1) {url = StrUtil.replace(url, "&pagi=1", "");}List<String> imageList = spiderImage(webDriver, url);FileUtil.appendUtf8Lines(imageList, imagePath);}});log.info("采集完成");}public static List<String> spiderImage(WebDriver driver, String url) {driver.get(url);WebDriverWait wait = new WebDriverWait(driver, 10);By locator = By.xpath("//*[@id=\"app\"]/div[1]/div/div[2]/div[2]/div/div");wait.until(ExpectedConditions.visibilityOfAllElementsLocatedBy(locator));//            String pageSource = driver.getPageSource();
//            System.out.println(pageSource);WebElement webElement1 = driver.findElement(locator);List<WebElement> webElement2 = webElement1.findElements(By.className("column--HhhwH"));List<String> imageList = new ArrayList<>();webElement2.forEach(webElement3 -> {List<WebElement> webElement4 = webElement3.findElements(By.className("cell--UMz-x"));webElement4.forEach(webElement5 -> {String webElement5Html = webElement5.getAttribute("outerHTML");// 获取里面的 JSON 字符串Document doc = Jsoup.parse(webElement5Html);Elements scriptElements = doc.select("script[type=application/ld+json]");String scriptJson = scriptElements.first().html();// 获取 contentUrlString contentUrl = JSONUtil.parseObj(scriptJson).getStr("contentUrl");log.info("采集链接：{}", contentUrl);imageList.add(contentUrl);});});return imageList;}}

相关文章

服务器托管需要注意什么事项？

微信小程序备忘

虚拟机NAT模式通过宿主机（Windows）上网不稳定解决办法（无法上网）（将宿主机设置固定ip并配置dns）

基于 HTML、CSS 和 JavaScript 的智能图像虚化系统

PS更改图像尺寸

分词器详解（一）

推荐一个论文阅读工具ivySCI

诊断服务器（Diagnostic Server）

如何开发一款高稳定、低延迟、功能全面的RTSP播放器？

数据结构——树（03二叉树，与路径有关的问题，代码练习）

Git配置：禁用全局HTTPS验证

【54页PPT】基于DeepSeek的数据治理技术（附下载方式）

2025年最新 unityHub游戏引擎开发2d手机游戏和桌面游戏教程

SuperMap GIS基础产品FAQ集锦(20250901)

react-native-reanimated-carousel的API记录

TypeScript 泛型入门（新手友好、完整详解）

Linux ARP老化机制/探测机制/ip neigh使用

便携式显示器怎么选？：6大关键指标全解析

以太坊网络

nano banana官方最强Prompt模板来了！六大场景模板详解