# 标题
article_title = response.xpath('//div[@class="entry-header"]/h1/text()').extract()[0]
print(article_title)
# 时间
article_time = response.xpath('//p[@class="entry-meta-hide-on-mobile"]/text()').extract()[0].strip().replace(
'·', '').strip()
print(article_time)
# 点赞数
article_praise = response.xpath('//span[contains(@class,"vote-post-up")]/h10/text()').extract()[0]
print(article_praise)
# 收藏数
bookmark = response.xpath('//span[contains(@class,"bookmark-btn")]/text()').extract()[0]
# 正则提取收藏数字
match_bookmark = re.match('.*(\d+).*', bookmark)
if match_bookmark:
article_bookmark = match_bookmark.group(1)
print(article_bookmark)
# 评论数
comments = response.xpath('//a[@href="#article-comment"]/text()').extract()[0]
match_comments = re.match('.*(\d+).*', comments)
if match_comments:
article_comments = match_comments.group(1)
print(article_comments)
# 文章详情
article_contents = response.xpath('//div[@class="entry"]').extract()[0]
# 文章标签
tag_list = response.xpath('//p[@class="entry-meta-hide-on-mobile"]/a/text()').extract()
# 去重标签
tag_list = [element for element in tag_list if not element.strip().endswith("评论")]
tags = ','.join(tag_list)
print(tags)
---------------------
作者:sy_y
来源:CSDN
原文:
https://blog.csdn.net/s740556472/article/details/81023624 版权声明:本文为博主原创文章,转载请附上博文链接!