Java爬虫

实现目标：

1、要求爬到该站点的所有小说，（且具有定时去查看小说更新的功能）
2、爬到的内容要求包含简介、作者名称、封面图片、小说名称、具体小说章节、最新更新章节、完结状态
3、上述要求内容还必须存到数据库中
4、小说内容应该以怎么样的形式存到数据库中

实现方法

1、框架：seimicrawler、jsoup、WebMagic（决定采用jsoup）

2、采用selenium这类谷歌测试工具来爬取动态加载的网页

python思路

先摆上python的静态爬虫提供相似思路(使用Xpath的方法找到标签)

url = 'https://www.zwwx.com/book/67/67510/'
# header
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/'
                         '537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/'
                         '537.36 Edg/103.0.1264.49'}    # 伪装headers
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding  # 转中文码(自动分析解码)
# re的正则表达式的方法
# print(response.text)
href = re.findall('<dd><a href="(.*?)">.*?</a></dd>', response.text)    # 需要的用（.*?）代替，不需要的用.*?代替
name = re.findall('<dt>(.*?)</dt>', response.text)[0]   # 同上,而[i] i代表第几个数据，从第0个开始

for index in href:
    index_url = 'https://www.zwwx.com' + index  # 循环有点意思，实际上变量index就是href中的每一个链接
    res = requests.get(url=index_url, headers=headers)
    res.encoding = res.apparent_encoding
    # 有标签时用css和xpath(无法直接提取字符串数据，需要加.text),无标签用re正则(无需加.text)
    selector = parsel.Selector(res.text)
    title = selector.css('.bookname > h1::text').get()
    # 复杂版：#wrapper > div.content_read > div > div.bookname > h1::text
    # h1::text是提取标签内的文本,.get获取内容
    content_list = selector.css('#content::text').getall()
    content = '\n'.join(content_list)     # 列表转为字符串
    # .get()是取一个 .getall()是提取全部！！！！！！！！！！提取出来是列表，而非字符串
    with open(f'novel\\{name}.txt', mode='a', encoding='utf-8') as f:  # as f是将这个文件命名为f
        # 配置文件 文件路径 名字 保存方式（mode w写入数据(会覆盖) a追加保存 ） 编码格式
        # open()需要末尾加上f.close,而with open()则会自动关闭不需要加f.close
        f.write(title)  # 写入f文件中
        f.write('\n')
        f.write(content)
        f.write('\n')
        time.sleep(0.5)
    print('正在保存：', title)   # 直接文字需双引号，变量不用，用 , 连接

动态爬虫

opt = Options()
opt.add_argument('--headless')
opt.add_argument('--disable-gpu')
driver = Chrome(options=opt)
url = 'https://www.maofly.com/manga/6996/451102.html'

time.sleep(3)  # 这是为了让网页能够完全加载出来

driver.get(url=url)

for i in range(1, 10000):
    result = es.alert_is_present()(driver)

    if result:
        print(result.text)
        result.accept()
        time.sleep(3)
        i = 1
    else:
        print('没有警告窗')
        res = driver.page_source
        selector = parsel.Selector(res)
        # //*[@id="all"]/div/div[2]/div[1]/img
        img_url = selector.xpath('//*[@id="all"]/div/div[2]/div[1]/img/@src').get()
        pic_title = selector.xpath('/html/body/div/h2/text()').get()
        pic_name = selector.xpath('/html/body/div/h1/a/text()').get()
        if not os.path.exists('img\\' + pic_name):  # 如果该路径下没有这个文件夹，那就创建这个文件夹
            os.mkdir('img\\' + pic_name)
        img = requests.get(url=img_url).content
        name = pic_title, i     # 不能用pic_title + i
        print('正在保存:', name, img_url)  # 直接文字需双引号，变量不用，用 , 连接

        with open(f'img\\{pic_name}\\{name}.jpg', mode='wb') as f:  # as f是将这个文件命名为f,变量需要加进{ }\\内，定值直接输入\\
            # 配置文件 文件路径 名字 保存方式（mode w写入数据(会覆盖) a追加保存 ） 编码格式
            # open()需要末尾加上f.close,而with open()则会自动关闭不需要加f.close
            f.write(img)  # 写入f文件中
        print('保存完成:', name)  # 直接文字需双引号，变量不用，用 , 连接

        button = driver.find_element(By.XPATH, '/html/body/div/div[2]/nav/div/a[4]')
        button.click()

在爬取数据方面我觉得在看完狂神的视频后问题不大，最大的问题是数据库写入的实现，以及不定期爬取更新资源的实现（可以等网站测试做完再说）

Jsoup

爬取小说地址具体代码

public List<String> getBookUrl1() throws IOException, InterruptedException {



    ArrayList<String> bookUrlList1 = new ArrayList<>();
    //count用于计数完成了多少BookUrl的获取
    int count=0;
    //一共有5页
    int max = 5;
    for (int i=1;i<=max;i++){
        String urlEver = "https://www.9biqu.com/class/1/"+i+".html";


        try {
            Proxy currentProxy = proxies.get(currentProxyIndex);
            Document document = Jsoup.connect(urlEver)
                    .userAgent(ua)
                    .proxy(currentProxy)
                    .timeout(1000000000)
                    .get();
            Thread.sleep(3000);
            Elements NameUrlList = document.select("#newscontent > div.update-list > div > div > ul>li");
            for (Element el : NameUrlList){
                String BookUrlPart = el.select("span.s2 > a").attr("href");
                String BookUrl ="https://www.9biqu.com"+ BookUrlPart;
                bookUrlList1.add(BookUrl);
                count++;
                System.out.println("玄幻小说地址采集完成"+count+"个");
            }
        } catch (SocketException e) {
            System.out.println("IP地址被禁止，等待10秒钟...");
            Thread.sleep(10000);
            i--;
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }catch (IOException e){
            System.out.println("在爬取小说地址时，IP地址失效，将更换IP...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }
    }
    System.out.println("玄幻小说已经全部采集完成");


    return bookUrlList1;
}

爬取小说信息具体代码

public List<BookInfo> listBookDetails() throws IOException, InterruptedException {

    int count = 0;
    BUrl1 = getBookUrl1();
    for (int i=0;i< BUrl1.size();i++){
        String urlEverBook=BUrl1.get(i);
        try {

            count++;
            Proxy currentProxy = proxies.get(currentProxyIndex);
            Document document = Jsoup.connect(urlEverBook)
                    .userAgent(ua)
                    // 将代理服务器对象传递给Jsoup
                    .proxy(currentProxy)
                    .timeout(1000000000)
                    .get();

            String bookName = document.select("#info > h1").text();
            String authorName = document.select("#info > p:nth-child(2) > a").text();

            String picUrl = "https://www.9biqu.com" + document.select("#fmimg > img").attr("src");
            String lastChapterName = document.select("#info > p:nth-child(5) > a").text();

            if(authorName.equals("小说免费阅读")){
                authorName="Tec";
            }
            String bookIntro = document.select("#intro").text();

            System.out.println("第"+count+"本书名:"+bookName);
            System.out.println("第"+count+"本作者名:"+authorName);
            System.out.println("第"+count+"本介绍:"+bookIntro);
            System.out.println("第"+count+"本图片链接:"+picUrl);
            System.out.println("第"+count+"本最新章名:"+lastChapterName);



            QueryWrapper<BookInfo>BookInfoNameQueryWrapper =new QueryWrapper<>();
            BookInfoNameQueryWrapper.eq("book_name", bookName);
            BookInfo bookInfo = bookInfoMapper.selectOne(BookInfoNameQueryWrapper);

            if(bookInfo==null){
                BookInfo bookInfo1 = new BookInfo();
                bookInfo1.setWorkDirection(0);
                bookInfo1.setCategoryId(1L);
                bookInfo1.setCategoryName("玄幻奇幻");
                bookInfo1.setPicUrl(picUrl);
                bookInfo1.setBookName(bookName);
                bookInfo1.setAuthorId(0L);
                bookInfo1.setAuthorName(authorName);
                bookInfo1.setBookDesc(bookIntro);
                bookInfo1.setScore(6);
                bookInfo1.setBookStatus(0);
                bookInfo1.setVisitCount(100L);
                //bookInfo1.setWordCount();
                bookInfo1.setCommentCount(0);
                //bookInfo1.setLastChapterId();
                bookInfo1.setLastChapterName(lastChapterName);
                bookInfo1.setLastChapterUpdateTime(LocalDateTime.now());
                bookInfo1.setCreateTime(LocalDateTime.now());
                bookInfo1.setUpdateTime(LocalDateTime.now());
                bookInfo1.setIsVip(0);

                bookInfoMapper.insert(bookInfo1);
                System.out.println("数据表bookInfo数据存入成功");
            }else{
                System.out.println("在数据表bookInfo中书名为"+bookName+"的小说在数据库中已经存在");
            }



            Thread.sleep(4000);
        } catch (SocketException e) {
            System.out.println("在爬取小说信息时，IP地址被禁止，等待10秒钟...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }catch (IOException e){
            System.out.println("在爬取小说信息时，IP地址失效，将更换IP...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }

    }
    return null;
}

爬取小说章节内容具体代码

public String getBookContent(String BookContentUrl) throws IOException, InterruptedException {

    String bookContent = null;
    while (bookContent == null){
        try {
            Proxy currentProxy = proxies.get(currentProxyIndex);
            Document document = Jsoup.connect(BookContentUrl)
                    .userAgent(ua)
                    .proxy(currentProxy)
                    .timeout(1000000000)
                    .get();
            Thread.sleep(4000);
            Elements bookContentList = document.select("#content > *:not(p:first-child)");

            bookContent = bookContentList.toString();

        }catch (SocketException e){
            System.out.println("在爬取小说章节内容时，IP地址被禁止，等待10秒钟...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }catch (IOException e){
            System.out.println("在爬取小说章节内容时，IP地址失效，将更换IP...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }
    }
    return bookContent;
}
/**统记中文字数
 *
 * */
public int countChineseCharacters(String bookContent) {
    if (bookContent == null || bookContent.trim().isEmpty()) {
        return 0;
    }

    int count = 0;
    String[] words = bookContent.trim().split("\\s+");
    for (String word : words) {
        for (char c : word.toCharArray()) {
            if (isChineseCharacter(c)) {
                count++;
            }
        }
    }
    return count;
}

/**判断是否是中文
 *
 * */
public boolean isChineseCharacter(char c) {
    Character.UnicodeScript script = Character.UnicodeScript.of(c);
    return script == Character.UnicodeScript.HAN;
}

爬取小说章节具体代码

public List<BookInfo> listBookContent() throws IOException, InterruptedException {

    int count = 0;
    int countChapter = 0;
    BUrl1 = getBookUrl1();
    for (int i=0;i< BUrl1.size();i++){
        String urlEverBook=BUrl1.get(i);
        try {
            count++;
            Proxy currentProxy = proxies.get(currentProxyIndex);
            Document document = Jsoup.connect(urlEverBook)
                    .userAgent(ua)
                    // 将代理服务器对象传递给Jsoup
                    .proxy(currentProxy)
                    .timeout(1000000000)
                    .get();

            String bookName = document.select("#info > h1").text();

            //以下代码是取得每本书的每个章节名以及链接还有章节内容
            String bookChapterName;
            String bookChapterUrlPart;
            String bookChapterUrl;

            String bookContent;

            Long bookId= 0L;



            Elements bookChapter = document.select("#list > dl > dd:gt(13)");
            for(Element el:bookChapter){
                countChapter++;
                bookChapterName = el.select("a").text();
                bookChapterUrlPart = el.select("a").attr("href");
                bookChapterUrl = "https://www.9biqu.com"+ bookChapterUrlPart;
                bookContent = getBookContent(bookChapterUrl);
                int chineseCharCount = countChineseCharacters(bookContent);

                System.out.println("第"+count+"本的第"+countChapter+"章名"+bookChapterName);
                System.out.println("第"+count+"本的第"+countChapter+"章链接:"+bookChapterUrl);
                //System.out.println("第"+count+"本的第"+countChapter+"章内容:"+bookContent);

                //查找是否有该小说的存在
                QueryWrapper<BookInfo>BookInfoNameQueryWrapper =new QueryWrapper<>();
                BookInfoNameQueryWrapper.eq("book_name", bookName);
                BookInfo bookInfoText = bookInfoMapper.selectOne(BookInfoNameQueryWrapper);


                BookInfo bookInfoId = bookInfoMapper.selectOne(BookInfoNameQueryWrapper);
                bookId = bookInfoId.getId();

                //查找是否有重复章节内容的存在
                QueryWrapper<BookChapter>BookChapterTextQueryWrapper = new QueryWrapper<>();
                BookChapterTextQueryWrapper.eq("chapter_name",bookChapterName);
                BookChapterTextQueryWrapper.eq("book_id",bookId);
                BookChapter bookChapterText = bookChapterMapper.selectOne(BookChapterTextQueryWrapper);


                
                // 如果不存在相同的章节名字，则插入书籍信息
                if(bookInfoText != null){
                    if (bookChapterText == null ) {


                        //将数据存入bookChapter
                        BookChapter bookChapter1 = new BookChapter();
                        bookChapter1.setBookId(bookId);
                        bookChapter1.setChapterNum(countChapter);
                        bookChapter1.setChapterName(bookChapterName);
                        bookChapter1.setWordCount(chineseCharCount);
                        bookChapter1.setIsVip(0);
                        bookChapter1.setCreateTime(LocalDateTime.now());
                        bookChapter1.setUpdateTime(LocalDateTime.now());

                        bookChapterMapper.insert(bookChapter1);

                        System.out.println("数据表bookChapter数据存入成功");

                    } else {
                        // 如果已经存在相同的书籍信息，则直接使用已存在的书籍信息的id
                        System.out.println("在数据表bookChapter中章节名为"+bookChapterName+"的小说"+bookName+"在数据库中已经存在");
                    }
                }else {
                    System.out.println("在数据表bookChapter中书名为"+bookName+"的小说在数据库中不存在");
                }


                Long chapterId = 0L;

                // 查询是否存在该章节和该书名
                QueryWrapper<BookChapter> bookChapterQueryWrapper = new QueryWrapper<>();
                bookChapterQueryWrapper.eq("book_id", bookId);
                bookChapterQueryWrapper.eq("chapter_name", bookChapterName);
                BookChapter bookChapterText2 = bookChapterMapper.selectOne(bookChapterQueryWrapper);

                // 如果章节存在，则获取其 ID
                if ( bookChapterText2 != null) {
                    chapterId = bookChapterText2.getId();
                }

                QueryWrapper<BookContent>BookContentQueryWrapper =new QueryWrapper<>();
                BookContentQueryWrapper.eq("chapter_id",chapterId);
                BookContent bookContentText = bookContentMapper.selectOne(BookContentQueryWrapper);


                // 如果不存在相同的章节内容，则插入书籍信息
                if (bookContentText==null && bookInfoText !=null){
                    BookContent bookContent1 = new BookContent();
                    bookContent1.setChapterId(chapterId);
                    bookContent1.setContent(bookContent);
                    bookContent1.setCreateTime(LocalDateTime.now());
                    bookContent1.setUpdateTime(LocalDateTime.now());

                    bookContentMapper.insert(bookContent1);

                    System.out.println("数据表bookContent数据存入成功");
                }else {
                    // 如果已经存在相同的书籍信息，则直接使用已存在的书籍信息的id
                    System.out.println("在数据表bookContent中书名为"+bookName+"的小说在数据库中已经存在");
                }

            }
            Thread.sleep(4000);
        } catch (SocketException e) {
            System.out.println("在爬取小说章节时，IP地址被禁止，等待10秒钟...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }catch (IOException e){
            System.out.println("在爬取小说章节时，IP地址失效，将更换IP...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }

    }
    return null;
}

相关知识点

解析html的两种方式

1、Jsoup.parse

1	Document document= Jsoup.parse(new URL(urlEver), 300000);

2、Jsoup.connect

userAgent更换访问头

proxy更换IP

timeout防止超时异常

Document document = Jsoup.connect(urlEverBook)
        .userAgent(ua)
        .proxy(currentProxy)
        .timeout(1000000000)
        .get();

获取元素的方式

DOM解析

.getElementsByTag(“h1”)——标签名

getElementById(“intro”)——id名

.text()获取文字

.attr(“href”)获取元素的某个属性

1 2	String bookName = document.getElementsByTag("h1").get(0).text(); String src = el.getElementsByTag("a").eq(0).attr("href");

CSS选择器

.select（”#info > h1”）——selector地址

.text()获取文字

.attr(“href”)获取元素的某个属性

//取单条数据：
String bookName = document.select("#info > h1").text();

//取多条数据：
//注意这个select取得的位置一定要看清楚
//select("span.s2 > a")
Elements NameUrlList = document.select("#newscontent > div.update-list > div > div > ul>li");
for (Element el : NameUrlList){
      String BookUrlPart = el.select("span.s2 > a").attr("href");
      String BookUrl ="https://www.9biqu.com"+ BookUrlPart;
      bookUrlList1.add(BookUrl);
      count++;
      System.out.println("小说地址采集完成"+count+"个");
}

//按照条件取多条数据：
//从第13个数据开始取
//#list > dl > dd:gt(13)
Elements bookChapter = document.select("#list > dl > dd:gt(13)");
for(Element el:bookChapter){
     countChapter++;
     bookChapterName = el.select("a").text();
     bookChapterUrlPart = el.select("a").attr("href");
     bookChapterUrl = "https://www.9biqu.com"+ bookChapterUrlPart;

     System.out.println("第"+count+"本的第"+countChapter+"章名"+bookChapterName);
     System.out.println("第"+count+"本的第"+countChapter+"章链接"+bookChapterUrl);


     chapterNameList.add(bookChapterName);
     chapterUrlList.add(bookChapterUrl);
}
//选择除了第一个以外的所有数据：
public String getBookContent(String BookContentUrl) throws IOException, InterruptedException {

     String bookContent;
     Document document = Jsoup.connect(BookContentUrl)
             .userAgent(ua)
             .proxy(proxy)
             .get();
     Thread.sleep(4000);
   	 //#content > *:not(p:first-child)
     //选择id为content的元素下除第一个p元素外的所有元素
     Elements bookContentList = document.select("#content > *:not(p:first-child)");
	 //toString()
     //将Elements对象转化为String对象
     bookContent = bookContentList.toString();

     return bookContent;
}

Jsoup的选择器

伪选择器selectors

:lt(n): 查找哪些元素的同级索引值（它的位置在DOM树中是相对于它的父节点）小于n，比如：td:lt(3) 表示小于三列的元素

:gt(n):查找哪些元素的同级索引值大于n，比如： div p:gt(2)表示哪些div中有包含2个以上的p元素

:eq(n): 查找哪些元素的同级索引值与n相等，比如：form input:eq(1)表示包含一个input标签的Form元素

:has(seletor): 查找匹配选择器包含元素的元素，比如：div:has(p)表示哪些div包含了p元素

:not(selector): 查找与选择器不匹配的元素，比如： div:not(.logo) 表示不包含 class=logo 元素的所有 div 列表

:contains(text): 查找包含给定文本的元素，搜索不区分大不写，比如： p:contains(jsoup)

:containsOwn(text): 查找直接包含给定文本的元素

:matches(regex): 查找哪些元素的文本匹配指定的正则表达式，比如：div:matches((?i)login)

:matchesOwn(regex): 查找自身包含文本匹配指定正则表达式的元素

异常处理

使用try、catch来进行异常处理，一般在第一次出现异常后，根据异常提示，来进行进一步操作，比如输出信息或者线程休眠（这里注意try是否在循环语句里，可以通过类似**i–**的方法来保证循环的正常进行）

也可以在try语句中主动抛出异常，这里如果bookContentList==null直接存入数据库中，会有空指针的异常。所以提前报错到catch语句里，再一次获取bookContentList

try {
        Proxy currentProxy = proxies.get(currentProxyIndex);
        Document document = Jsoup.connect(BookContentUrl)
                .userAgent(ua)
                .proxy(currentProxy)
                .timeout(1000000000)
                .get();
        Thread.sleep(4000);
        Elements bookContentList = document.select("#content > *:not(p:first-child)");


        if(bookContentList!=null){
            bookContent = bookContentList.toString();
        }else {
            throw new SocketException("获取书籍内容失败，将再次尝试获取");
        }
    }catch (SocketException e){
        System.out.println("在爬取小说章节内容时，IP地址被禁止，等待10秒钟...");
        Thread.sleep(10000);
        currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
    }catch (IOException e){
            System.out.println("在爬取小说章节内容时，IP地址失效，将更换IP...");
            Thread.sleep(10000);
            currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
        }

IP代理池的创建

先创建一个List存放IP以及端口以及定义currentProxyIndex存放位置

List<Proxy> proxies = Arrays.asList(
            new Proxy(Proxy.Type.HTTP, new InetSocketAddress("139.200.74.178", 4283)),
            new Proxy(Proxy.Type.HTTP, new InetSocketAddress("60.169.245.20", 4215)),
            new Proxy(Proxy.Type.HTTP, new InetSocketAddress("183.147.27.251", 4231))
    );
int currentProxyIndex = 0;

从List中获取到一个IP以及地址，赋值给currentProxy

1	Proxy currentProxy = proxies.get(currentProxyIndex);

然后使用proxy方法使用连接池

1
2
3

Document document = Jsoup.connect(urlEverBook)
                        .proxy(currentProxy)
                        .get();

最后是根据情况来更换IP

catch (SocketException e) {
    System.out.println("在爬取小说信息时，IP地址被禁止，将更换IP...");
    //下面是换IP的逻辑计算
    currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
}

存入数据库时需要的处理

防止重复数据

//先通过QueryWrapper来约束条件
QueryWrapper<BookInfo>BookInfoNameQueryWrapper =new QueryWrapper<>();
      BookInfoNameQueryWrapper.eq("book_name", bookName);
      BookInfo bookInfo = bookInfoMapper.selectOne(BookInfoNameQueryWrapper);
//通过selectOne后为空，可以判断不存在该数据，则存入数据库
      if(bookInfo==null){
      BookInfo bookInfo1 = new BookInfo();
      bookInfo1.setBookName(bookName);
//·········
          
      bookInfoMapper.insert(bookInfo1);
      System.out.println("数据表bookInfo数据存入成功");
      }else{
      System.out.println("在数据表bookInfo中书名为"+bookName+"的小说在数据库中已经存在");
      }

获取其他表的数据存入另一张表

//先通过QueryWrapper来约束条件
QueryWrapper<BookInfo>BookInfoNameQueryWrapper =new QueryWrapper<>();
      BookInfoNameQueryWrapper.eq("book_name", bookName);
      BookInfo bookInfoId = bookInfoMapper.selectOne(BookInfoNameQueryWrapper);
//获取到这个ID（这是一个自增的数据，所以需要在其其他数据存入之后，再进行getId来获取）
      bookId = bookInfoId.getId();
//存入数据到另外一张数据表book_capter中
BookChapter bookChapter1 = new BookChapter();
      bookChapter1.setBookId(bookId);
bookChapterMapper.insert(bookChapter1);

防止数据为空

String bookContent = null;
//如果数据bookContent一直为空，则一直进行try的语句
      while (bookContent == null){
      	try {
      	Proxy currentProxy = proxies.get(currentProxyIndex);
      	Document document = Jsoup.connect(BookContentUrl)
      			.userAgent(ua)
      			.proxy(currentProxy)
      			.timeout(1000000000)
      			.get();
      	Thread.sleep(4000);
      	Elements bookContentList = document.select("#content > *:not(p:first-child)");

      	bookContent = bookContentList.toString();

      	}catch (SocketException e){
      	System.out.println("在爬取小说章节内容时，IP地址被禁止，等待10秒钟...");
      	Thread.sleep(10000);
      	currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
      	}catch (IOException e){
      	System.out.println("在爬取小说章节内容时，IP地址失效，将更换IP...");
      	Thread.sleep(10000);
      	currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
      	}
      }
      return bookContent;