kindle manager

This commit is contained in:
gavin
2020-06-14 21:33:42 +08:00
parent 9d5b787ff3
commit 7cb8e61519
36 changed files with 99 additions and 28 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@@ -91,9 +91,9 @@ b['1']['2'] = {'3':1} # OK
- import function: - import function:
- local **done** - local **done**
- kindle **done** - kindle **done**
- duokan - duokan **doing**
- amazon - amazon **doing**
- link to douban - link to douban and amazon **doing**
- export function: - export function:
- to [evernote](https://github.com/benhorvath/kindle2evernote/blob/master/kindle2evernote.py) - to [evernote](https://github.com/benhorvath/kindle2evernote/blob/master/kindle2evernote.py)
- to web html format - to web html format
@@ -115,3 +115,5 @@ b['1']['2'] = {'3':1} # OK
- chrome extension - chrome extension
- clean (sdr) - clean (sdr)
- sync modify to kindle device - sync modify to kindle device
- compare parse html info with xpath & beautisoap & regex

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

BIN
downimg/p2575362797.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
downimg/s1026024.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
downimg/s1670329.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
downimg/s1738643.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
downimg/s24399718.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
downimg/s26303695.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
downimg/s2689149.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
downimg/s27246465.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
downimg/s27276912.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
downimg/s27653114.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
downimg/s28283792.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

BIN
downimg/s29357535.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
downimg/s29399593.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

BIN
downimg/s29581756.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

BIN
downimg/s29811329.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
downimg/s29841565.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
downimg/s29879195.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

BIN
downimg/s33314966.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

BIN
downimg/s33551591.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

BIN
downimg/s33559469.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
downimg/s33587329.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
downimg/s3948396.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
downimg/s4052388.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
downimg/s4386858.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
downimg/s4397638.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

BIN
downimg/s5641654.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
downimg/s6979148.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
downimg/s7046197.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
downimg/s8488837.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -11,6 +11,7 @@ import sys
import os import os
from time import sleep from time import sleep
import pandas as pd import pandas as pd
import threading
from PySide2.QtWidgets import * from PySide2.QtWidgets import *
@@ -23,6 +24,7 @@ from PySide2.QtGui import (QBrush, QColor, QConicalGradient, QCursor, QFont,
from mainwindow import Ui_MainWindow from mainwindow import Ui_MainWindow
from kman import * from kman import *
from parseweb import *
# import binary resource file(kmanapp_rc.py) # import binary resource file(kmanapp_rc.py)
import kmanapp_rc import kmanapp_rc
@@ -63,6 +65,8 @@ class kmanWindow(QMainWindow):
self.tree_selected = 'note_root' self.tree_selected = 'note_root'
self.km = kMan() self.km = kMan()
self.spide = bookInfoSpide()
# initial check order: # initial check order:
# 1. backup file bk.data -> # 1. backup file bk.data ->
# 2. kindle(My Clippings.txt) -> # 2. kindle(My Clippings.txt) ->
@@ -520,6 +524,16 @@ class kmanWindow(QMainWindow):
# stop check thread # stop check thread
self.flag = False self.flag = False
def grab_all_book_info(self):
for bkname in self.books_data.keys():
bkname = re.split(r'[\(\-\:_\s]',bkname.strip())[0]
print(bkname)
bkinfo = self.spide.grab_book_info(bkname)
filter_bkinfo = self.spide.filter_spide_book(bkinfo)
if filter_bkinfo:
self.spide.down_book_img(filter_bkinfo)
# thanks Martin Fitzpatrick ^_^ # thanks Martin Fitzpatrick ^_^
# https://www.learnpyqt.com/courses/model-views/qtableview-modelviews-numpy-pandas/ # https://www.learnpyqt.com/courses/model-views/qtableview-modelviews-numpy-pandas/
class nTableModel(QAbstractTableModel): class nTableModel(QAbstractTableModel):
@@ -562,6 +576,10 @@ if __name__ == "__main__":
#kmw.showFullScreen() #kmw.showFullScreen()
kmw.show() kmw.show()
trd = threading.Thread(target=kmw.grab_all_book_info)
trd.setDaemon(True)
trd.start()
# loop check kindle is connected or not # loop check kindle is connected or not
# BUG to be implement XXXX # BUG to be implement XXXX
""" """

View File

@@ -22,17 +22,21 @@ logger = logging.getLogger()
logger.addHandler(logging.FileHandler('log')) logger.addHandler(logging.FileHandler('log'))
logger.setLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
spidetp = 1 # 0 - douban 1- amazon ISDOUBAN = 1
IMGPATH = './downimg'
LINKPREF = 'https://book.douban.com/subject/' \
if ISDOUBAN else 'https://www.amazon.cn/s?k='
mheaders = { mheaders = {
'Host': 'www.douban.com', 'Host': 'www.douban.com',
'Referer': 'http://www.douban.com', 'Referer': 'http://www.douban.com',
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" 'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
} }
#"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
mparams = {} mparams = {}
murl = "" murl = ""
if spidetp==0: if ISDOUBAN==1:
mparams['Host']='www.douban.com', mparams['Host']='www.douban.com',
mparams['search_text'] = 'bkname_xxx' mparams['search_text'] = 'bkname_xxx'
mparams['cat']='1001' mparams['cat']='1001'
@@ -141,7 +145,7 @@ class bookInfoSpide():
[re_bn,re_bn,re_rate,re_norate,re_author,re_end] = [None,None,None,None,None,None] [re_bn,re_bn,re_rate,re_norate,re_author,re_end] = [None,None,None,None,None,None]
if spidetp==0: if ISDOUBAN==1:
re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\".+?rating_nums\">(.+?)<''', re.S) re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\".+?rating_nums\">(.+?)<''', re.S)
re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\"''') re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\"''')
re_rate = re.compile(r'''class=\"rating_nums\">(.+?)<''') re_rate = re.compile(r'''class=\"rating_nums\">(.+?)<''')
@@ -164,6 +168,7 @@ class bookInfoSpide():
"""mbkn - bookname to be spided """mbkn - bookname to be spided
return: { return: {
"25853071": { # sid "25853071": { # sid
"link":"https://....xxxxx"
"bookname": "庆余年", "bookname": "庆余年",
"img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg", "img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg",
"rate": "8.0", "rate": "8.0",
@@ -171,15 +176,32 @@ class bookInfoSpide():
},...} },...}
""" """
if spidetp==0: #douban if ISDOUBAN==1: #douban
mparams['search_text'] = mbkn mparams['search_text'] = mbkn
else: #amazon else: #amazon
mparams['k'] = mbkn mparams['k'] = mbkn
r = requests.get( url=murl, headers=mheaders, params=mparams) try:
s = requests.Session()
s.header = mheaders
s.params = mparams
r = s.get(murl)
#r = requests.get( url=murl, headers=mheaders, params=mparams)
except requests.exceptions.ConnectionError:
print('ConnectionError -- please wait 3 seconds')
time.sleep(3)
except requests.exceptions.ChunkedEncodingError:
print('ChunkedEncodingError -- please wait 3 seconds')
time.sleep(3)
except:
print('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
time.sleep(3)
if r.status_code != 200: if r.status_code != 200:
raise Exception("请求失败") print('grab book {} info from webside failure'.format(mbkn))
bkinfo = defaultdict(dict) bkinfo = defaultdict(dict)
sid = None sid = None
@@ -187,7 +209,7 @@ class bookInfoSpide():
resp = r.text resp = r.text
if spidetp==0: if ISDOUBAN==1:
stat = 'SID' stat = 'SID'
for line in resp.split('\n'): for line in resp.split('\n'):
line = line.strip() line = line.strip()
@@ -197,6 +219,7 @@ class bookInfoSpide():
ret=re.search(self.re_bn, line) ret=re.search(self.re_bn, line)
if ret: if ret:
sid = ret.group(1) sid = ret.group(1)
bkinfo[sid]['link'] = os.path.join(LINKPREF,sid)
bkinfo[sid]['bookname'] = ret.group(2) bkinfo[sid]['bookname'] = ret.group(2)
bkinfo[sid]['img'] = ret.group(3) bkinfo[sid]['img'] = ret.group(3)
stat = 'RATE' stat = 'RATE'
@@ -226,7 +249,8 @@ class bookInfoSpide():
if stat=='ASIN': if stat=='ASIN':
ret=re.search(self.re_asin, line) ret=re.search(self.re_asin, line)
if ret: if ret:
sid = ret.group(1) sid=ret.group(1)
bkinfo[sid]['link'] = os.path.join(LINKPREF,ret.group(1))
stat = 'IMG' stat = 'IMG'
continue continue
elif stat=='IMG': elif stat=='IMG':
@@ -261,25 +285,27 @@ class bookInfoSpide():
stat=='ASIN' stat=='ASIN'
continue continue
return bkinfo return [mbkn, bkinfo]
def filter_spide_books(self, mbkn, mbkinfo): def filter_spide_book(self, mbkinfo):
""" mbkn - bookname to be spide """
mbkinfo: mbkinfo:
douban douban
{ {
"25853071": { # sid "庆余年": {
"bookname": "庆余年", "link":"https://....25853071",
"bookname": "庆余年xxx",
"img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg", "img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg",
"rate": "8.0", "rate": "8.0",
"author": "猫腻" "author": "猫腻"
},...} },...}
amazon amazon
"B07RN73425": { "孟子": {
"img": "https://images-cn.ssl-images-amazon.com/images/I/511vbVrhIBL._AC_UY218_.jpg", "link": "https://....B07RN73425",
"bookname": "古典名著普及文库:孟子", "bookname": "古典名著普及文库:孟子",
"author": "孙钦善", "img": "https://images-cn.ssl-images-amazon.com/images/I/511vbVrhIBL._AC_UY218_.jpg",
"rate": "3.9" "rate": "3.9"
"author": "孙钦善",
} }
""" """
@@ -289,18 +315,19 @@ class bookInfoSpide():
# f3/d3: mbkn and bookname different # f3/d3: mbkn and bookname different
[f1,f2,f3] = [0,0,0] [f1,f2,f3] = [0,0,0]
[d1,d2,d3] = [{},{},{}] [d1,d2,d3] = [{},{},{}]
for k,v in mbkinfo.items(): mbkn = mbkinfo[0]
for k,v in mbkinfo[1].items():
bkn = v['bookname'] bkn = v['bookname']
if len(v) == 4: if len(v) == 5:
if (not f1) and (mbkn in bkn): if (not f1) and (mbkn in bkn):
f1 = 1 f1 = 1
d1 = {k:v} d1 = {mbkn:v}
elif (not f1) and (not f2) and (bkn in mbkn): elif (not f1) and (not f2) and (bkn in mbkn):
f2 = 1 f2 = 1
d2 = {k:v} d2 = {mbkn:v}
elif (not f3): elif (not f3):
f3 = 1 f3 = 1
d3 = {k:v} d3 = {mbkn:v}
else: continue else: continue
else: else:
continue continue
@@ -312,7 +339,28 @@ class bookInfoSpide():
elif f3: elif f3:
return d3 return d3
return 0 return None
def down_book_img(self, mbkinfo):
import os
import socket
from urllib.request import urlretrieve
headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
for k,v in mbkinfo.items():
link = v['img']
if not os.path.exists(IMGPATH): os.mkdir(IMGPATH)
p=os.path.join(IMGPATH,link.split('/')[-1])
try:
img = requests.get(link, headers=headers)
if img.status_code == 200:
with open(p, 'wb') as fp:
fp.write(img.content)
except Exception as e:
print(e)
if __name__ == '__main__': if __name__ == '__main__':
@@ -322,9 +370,10 @@ if __name__ == '__main__':
bkname = re.split(r'[\(\-\:_\s]',bkname.strip())[0] bkname = re.split(r'[\(\-\:_\s]',bkname.strip())[0]
print(bkname) print(bkname)
bkinfo = spide.grab_book_info(bkname) bkinfo = spide.grab_book_info(bkname)
filter_bkinfo = spide.filter_spide_books(bkname, bkinfo) filter_bkinfo = spide.filter_spide_book(bkinfo)
if filter_bkinfo: spide.down_book_img(filter_bkinfo)
logger.debug('================ {} ================'.format(bkname)) logger.debug('================ {} ================'.format(bkname))
#logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False)) logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False))
logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False)) logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False))

View File

@@ -1,4 +1,6 @@
# from webside not test, only for reference XXX
#coding=utf8 #coding=utf8
import random import random
import requests import requests