#!/bin/sh
page=0
echo > 3.txt
for (( page = 0; page < 70; page=page+10)) do urlStr="http://www.baidu.com/s?wd=keyword%20site%3Aicaifu.com&pn=”$page”&rsv_spt=1&issp=1&f=8&rsv_bp=0&rsv_idx=2&ie=utf-8″
echo $urlStr
wget -q $urlStr -O 1.txt
sleep 1
cat 1.txt|egrep “href = (.+)www.baidu.com.+link.+”” -o > 2.txt
cat 2.txt|while read rawurl
do
if [[ $rawurl =~ “(.+)” ]]; then
n=${#BASH_REMATCH[*]}
if [ $n -ge 2 ];then
echo ${BASH_REMATCH[1]} >> 3.txt
fi
fi
done
done
wget -i 3.txt -w 3 -O 4.txt -o location_log.txt
grep “Location” location_log.txt|sort|uniq > icaifu_url.txt
#提取url
cat icaifu_url.txt|while read line
do
if [[ $line =~ (http.+icaifu.+shtml) ]];then
echo ${BASH_REMATCH[1]}
fi
done