shell批量采集百度下拉词
1、# 接口
https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=关键词
测试下:
https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=seo
可以用

2、# 测试
# curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=seo"

3、# 编码有点问题,换编码
curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=seo"|iconv -f gbk -t utf-8
# iconv -f gbk -t utf-8 是把gbk换成utf-8的,如果没有编码问题就不用替换。

4、#提取关键词-初步提取
curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=seo"|iconv -f gbk -t utf-8|awk -F":" '{print $4}'
# awk -F":" '{print $4}' 意思指数以“:”为分隔符,取第四项。

5、#提取关键词-正则提取
curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=seo"|iconv -f gbk -t utf-8|awk -F":" '{print $4}'|grep -oP '(?<=").*?(?=")'

6、# 去掉 ,
curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd=seo"|iconv -f gbk -t utf-8|awk -F":" '{print $4}'|grep -oP '(?<=").*?(?=")'|sed 's/,//g'

1、汇总下:批量采集
cat keywords.txt|while read line;do curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd="$line|iconv -f gbk -t utf-8|awk -F":" '{print $4}'|grep -oP '(?<=").*?(?=")'|sed 's/,//g';done
# keywords.txt 一行一词 utf-8

2、# 存入1.txt
cat keywords.txt|while read line;do curl -s "https://sp0.baidu.com/5a1Fazu8AA54nxGko9WTAnF6hhy/su?wd="$line|iconv -f gbk -t utf-8|awk -F":" '{print $4}'|grep -oP '(?<=").*?(?=")'|sed 's/,//g';done > 1.txt