kindle manager

This commit is contained in:
gavin
2020-06-18 09:45:49 +08:00
parent d98d2b2cd0
commit bccb6a68cb
19 changed files with 26603 additions and 255 deletions

View File

@@ -85,81 +85,24 @@ testbooks =['24堂财富课',
'中国为什么有前途对外经济关系的战略潜能第3版',
'日本的世界观(《剑桥日本史》主编凝练之作三个人物故事串起日本两百年变局了解近代日本转向的必读之书理想国出品))']
testresp = """<div class="result">
<div class="pic">
<a class="nbg" href="https://www.douban.com/link2/?url=https%3A%2F%2Fbook.douban.com%2Fsubject%2F30329810%2F&amp;query=%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E&amp;cat_id=1001&amp;type=search&amp;pos=1" target="_blank" onclick="moreurl(this,{i: '1', query: '%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E', from: 'dou_search_book', sid: 30329810, qcat: ''})" title="解读中国经济" ><img src="https://img3.doubanio.com/view/subject/s/public/s29872890.jpg"></a>
</div>
<div class="content">
<div class="title">
<h3>
<span>[书籍]</span>&nbsp;<a href="https://www.douban.com/link2/?url=https%3A%2F%2Fbook.douban.com%2Fsubject%2F30329810%2F&amp;query=%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E&amp;cat_id=1001&amp;type=search&amp;pos=1" target="_blank" onclick="moreurl(this,{i: '1', query: '%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E', from: 'dou_search_book', sid: 30329810, qcat: ''})" >解读中国经济 </a>
</h3>
<div class="rating-info">
<span class="allstar45"></span>
<span class="rating_nums">9.4</span>
<span>(145人评价)</span>
<span class="subject-cast">林毅夫 / 北京大学出版社 / 2018</span>
</div>
</div>
<p>《解读中国经济》是解读中国经济之作,总结了中国与其他国家、地区经济发展和改革活动的经验,提出了一个经济发展和转型的一般理论,并以此理论分析中国在改革和发展过...</p>
</div>
</div>
div class="result">
<div class="pic">
<a class="nbg" href="https://www.douban.com/link2/?url=https%3A%2F%2Fbook.douban.com%2Fsubject%2F30329810%2F&amp;query=%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E&amp;cat_id=1001&amp;type=search&amp;pos=1" target="_blank" onclick="moreurl(this,{i: '1', query: '%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E', from: 'dou_search_book', sid: 30329810, qcat: ''})" title="解读中国经济" ><img src="https://img3.doubanio.com/view/subject/s/public/s29872890.jpg"></a>
</div>
<div class="content">
<div class="title">
<h3>
<span>[书籍]</span>&nbsp;<a href="https://www.douban.com/link2/?url=https%3A%2F%2Fbook.douban.com%2Fsubject%2F30329810%2F&amp;query=%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E&amp;cat_id=1001&amp;type=search&amp;pos=1" target="_blank" onclick="moreurl(this,{i: '1', query: '%E8%A7%A3%E8%AF%BB%E4%B8%AD%E5%9B%BD%E7%BB%8F%E6%B5%8E', from: 'dou_search_book', sid: 30329810, qcat: ''})" >解读中国经济 </a>
</h3>
<div class="rating-info">
<span class="allstar45"></span>
<span class="rating_nums">9.4</span>
<span>(145人评价)</span>
<span class="subject-cast">林毅夫 / 北京大学出版社 / 2018</span>
</div>
</div>
<p>《解读中国经济》是解读中国经济之作,总结了中国与其他国家、地区经济发展和改革活动的经验,提出了一个经济发展和转型的一般理论,并以此理论分析中国在改革和发展过...</p>
</div>
</div>"""
class bookInfoSpide():
"""
re_bn = re.compile(r'''
class=\"nbg.+?sid: (\d+?),.+?
title=\"(.+?)\".+?
img src=\"(.+?)\".+?
rating_nums\">(.+?)<
''',flags=re.S|re.X)
"""
[re_bn,re_bn,re_rate,re_norate,re_author,re_end] = [None,None,None,None,None,None]
[re_bn,re_bn,re_score,re_star,re_author,re_end]=[None,None,None,None,None,None]
if ISDOUBAN==1:
re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\".+?rating_nums\">(.+?)<''', re.S)
re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\"''')
re_rate = re.compile(r'''class=\"rating_nums\">(.+?)<''')
re_norate = re.compile(r'''class=\"allstar00\"''')
re_author = re.compile(r'''class=\"subject-cast\">(.+?)<''')
re_bn=re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\".+?rating_nums\">(.+?)<''', re.S)
re_bn=re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\"''')
re_star=re.compile(r'''^<span class=\"allstar(\d+)\"></span>''')
re_score=re.compile(r'''class=\"rating_nums\">(.+?)<''')
re_ratenum=re.compile(r'''^<span>\((\d+)人评价\)</span>''')
re_author=re.compile(r'''class=\"subject-cast\">(.+?)<''')
else:
re_asin = re.compile(r'''^<div data-asin=\"(.+?)\" data-index''')
re_img = re.compile(r'''^<img src=\"(.+?)\"$''')
re_bn = re.compile(r'''^alt=\"(.+?)\"$''')
re_author = re.compile(r'''^<div class=.+auto\"><\/span>.+$''')
re_rate = re.compile(r'''^<span aria-label=\"(.+?)\">$''')
#re_end = re.compile(r'''<\/body><\/html>''')
re_end = re.compile(r'''^<span class=\"a-letter-space\"><\/span><\/div><\/div>''')
re_asin=re.compile(r'''^<div data-asin=\"(.+?)\" data-index''')
re_img=re.compile(r'''^<img src=\"(.+?)\"$''')
re_bn=re.compile(r'''^alt=\"(.+?)\"$''')
re_author=re.compile(r'''^<div class=.+auto\"><\/span>.+$''')
re_rate=re.compile(r'''^<span aria-label=\"(.+?)\">$''')
#re_end=re.compile(r'''<\/body><\/html>''')
re_end=re.compile(r'''^<span class=\"a-letter-space\"><\/span><\/div><\/div>''')
def __init__(self):
pass
@@ -171,8 +114,11 @@ class bookInfoSpide():
"link":"https://....xxxxx"
"bookname": "庆余年",
"img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg",
"rate": "8.0",
"score": "8.0",
"ratenum": "1000",
"author": "猫腻"
"publisher": "中华书局"
"publishing": "2015"
},...}
"""
@@ -187,15 +133,8 @@ class bookInfoSpide():
session.params = mparams
r = session.get( url=murl, headers=mheaders, params=mparams)
#r = requests.get( url=murl, headers=mheaders, params=mparams)
except requests.exceptions.ConnectionError:
print('ConnectionError -- please wait 3 seconds')
time.sleep(3)
except requests.exceptions.ChunkedEncodingError:
print('ChunkedEncodingError -- please wait 3 seconds')
time.sleep(3)
except:
print('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
time.sleep(3)
except Exception as e:
print(e)
if r.status_code != 200:
print('grab book {} info from webside failure'.format(mbkn))
@@ -207,35 +146,50 @@ class bookInfoSpide():
resp = r.text
if ISDOUBAN==1:
stat = 'SID'
stat='SID'
for line in resp.split('\n'):
line = line.strip()
line=line.strip()
if line=='': continue
if stat=='SID':
ret=re.search(self.re_bn, line)
if ret:
sid = ret.group(1)
bkinfo[sid]['link'] = os.path.join(LINKPREF,sid)
bkinfo[sid]['bookname'] = ret.group(2)
bkinfo[sid]['img'] = ret.group(3)
stat = 'RATE'
sid=ret.group(1)
bkinfo[sid]['link']=os.path.join(LINKPREF,sid)
bkinfo[sid]['bookname']=ret.group(2)
bkinfo[sid]['img']=ret.group(3)
stat='STAR'
continue
elif stat=='RATE':
# if no rate, goto next bookname state
if re.search(self.re_norate, line):
stat = 'SID'
continue
ret=re.search(self.re_rate, line)
elif stat=='STAR':
ret=re.search(self.re_star, line)
if ret:
bkinfo[sid]['rate'] = ret.group(1)
stat = 'AUTHOR'
star = ret.group(1)
if star=='00':
stat='AUTHOR'
elif int(star) > 0:
stat='SCORE'
elif stat=='SCORE':
ret=re.search(self.re_score, line)
if ret:
bkinfo[sid]['score']=ret.group(1)
stat='RATENUM'
continue
elif stat=='RATENUM':
ret=re.search(self.re_ratenum, line)
if ret:
bkinfo[sid]['ratenum']=ret.group(1)
stat='AUTHOR'
continue
elif stat=='AUTHOR':
ret=re.search(self.re_author, line)
if ret:
bkinfo[sid]['author'] = ret.group(1).split(' ')[0]
stat = 'SID'
tt=ret.group(1).split(' / ')
if len(tt)>=3:
*author, bkinfo[sid]['publisher'], bkinfo[sid]['publishing']=tt
bkinfo[sid]['author']='/'.join(author)
else:
bkinfo[sid]['author']=ret[0]
stat='SID'
else: continue
else:
stat='ASIN'
@@ -288,13 +242,15 @@ class bookInfoSpide():
"""
mbkinfo:
douban
{
"庆余年": {
"link":"https://....25853071",
"bookname": "庆余年xxx",
"img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg",
"rate": "8.0",
"author": "猫腻"
"10530219": {
"link": "https://book.douban.com/subject/10530219",
"bookname": "市场的逻辑",
"img": "https://img3.doubanio.com/view/subject/s/public/s8912552.jpg",
"score": "8.3",
"ratenum": "218",
"publisher": "世纪文景 上海人民出版社",
"publishing": "2012",
"author": "张维迎"
},...}
amazon
"孟子": {
@@ -310,21 +266,21 @@ class bookInfoSpide():
# f1/d1: mbkn include in bookname
# f2/d2: bookname include mbkn
# f3/d3: mbkn and bookname different
[f1,f2,f3] = [0,0,0]
[d1,d2,d3] = [{},{},{}]
mbkn = mbkinfo[0]
[f1,f2,f3]=[0,0,0]
[d1,d2,d3] =[{},{},{}]
mbkn=mbkinfo[0]
for k,v in mbkinfo[1].items():
bkn = v['bookname']
if len(v) == 5:
bkn=v['bookname']
if len(v)==8:
if (not f1) and (mbkn in bkn):
f1 = 1
d1 = {mbkn:v}
f1=1
d1={mbkn:v}
elif (not f1) and (not f2) and (bkn in mbkn):
f2 = 1
d2 = {mbkn:v}
f2=1
d2={mbkn:v}
elif (not f3):
f3 = 1
d3 = {mbkn:v}
f3=1
d3={mbkn:v}
else: continue
else:
continue