kindle manager

This commit is contained in:
gavin
2020-06-14 21:33:42 +08:00
parent 9d5b787ff3
commit 7cb8e61519
36 changed files with 99 additions and 28 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@@ -91,9 +91,9 @@ b['1']['2'] = {'3':1} # OK
- import function:
- local **done**
- kindle **done**
- duokan
- amazon
- link to douban
- duokan **doing**
- amazon **doing**
- link to douban and amazon **doing**
- export function:
- to [evernote](https://github.com/benhorvath/kindle2evernote/blob/master/kindle2evernote.py)
- to web html format
@@ -115,3 +115,5 @@ b['1']['2'] = {'3':1} # OK
- chrome extension
- clean (sdr)
- sync modify to kindle device
- compare parse html info with xpath & beautisoap & regex

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.8 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

BIN
downimg/p2575362797.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
downimg/s1026024.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
downimg/s1670329.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
downimg/s1738643.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
downimg/s24399718.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

BIN
downimg/s26303695.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
downimg/s2689149.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
downimg/s27246465.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
downimg/s27276912.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
downimg/s27653114.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
downimg/s28283792.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.7 KiB

BIN
downimg/s29357535.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
downimg/s29399593.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.1 KiB

BIN
downimg/s29581756.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.4 KiB

BIN
downimg/s29811329.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
downimg/s29841565.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
downimg/s29879195.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

BIN
downimg/s33314966.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 27 KiB

BIN
downimg/s33551591.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

BIN
downimg/s33559469.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
downimg/s33587329.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
downimg/s3948396.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
downimg/s4052388.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
downimg/s4386858.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
downimg/s4397638.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

BIN
downimg/s5641654.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
downimg/s6979148.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
downimg/s7046197.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
downimg/s8488837.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -11,6 +11,7 @@ import sys
import os
from time import sleep
import pandas as pd
import threading
from PySide2.QtWidgets import *
@@ -23,6 +24,7 @@ from PySide2.QtGui import (QBrush, QColor, QConicalGradient, QCursor, QFont,
from mainwindow import Ui_MainWindow
from kman import *
from parseweb import *
# import binary resource file(kmanapp_rc.py)
import kmanapp_rc
@@ -63,6 +65,8 @@ class kmanWindow(QMainWindow):
self.tree_selected = 'note_root'
self.km = kMan()
self.spide = bookInfoSpide()
# initial check order:
# 1. backup file bk.data ->
# 2. kindle(My Clippings.txt) ->
@@ -520,6 +524,16 @@ class kmanWindow(QMainWindow):
# stop check thread
self.flag = False
def grab_all_book_info(self):
for bkname in self.books_data.keys():
bkname = re.split(r'[\(\-\:_\s]',bkname.strip())[0]
print(bkname)
bkinfo = self.spide.grab_book_info(bkname)
filter_bkinfo = self.spide.filter_spide_book(bkinfo)
if filter_bkinfo:
self.spide.down_book_img(filter_bkinfo)
# thanks Martin Fitzpatrick ^_^
# https://www.learnpyqt.com/courses/model-views/qtableview-modelviews-numpy-pandas/
class nTableModel(QAbstractTableModel):
@@ -562,6 +576,10 @@ if __name__ == "__main__":
#kmw.showFullScreen()
kmw.show()
trd = threading.Thread(target=kmw.grab_all_book_info)
trd.setDaemon(True)
trd.start()
# loop check kindle is connected or not
# BUG to be implement XXXX
"""

View File

@@ -22,17 +22,21 @@ logger = logging.getLogger()
logger.addHandler(logging.FileHandler('log'))
logger.setLevel(logging.DEBUG)
spidetp = 1 # 0 - douban 1- amazon
ISDOUBAN = 1
IMGPATH = './downimg'
LINKPREF = 'https://book.douban.com/subject/' \
if ISDOUBAN else 'https://www.amazon.cn/s?k='
mheaders = {
'Host': 'www.douban.com',
'Referer': 'http://www.douban.com',
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}
#"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
mparams = {}
murl = ""
if spidetp==0:
if ISDOUBAN==1:
mparams['Host']='www.douban.com',
mparams['search_text'] = 'bkname_xxx'
mparams['cat']='1001'
@@ -141,7 +145,7 @@ class bookInfoSpide():
[re_bn,re_bn,re_rate,re_norate,re_author,re_end] = [None,None,None,None,None,None]
if spidetp==0:
if ISDOUBAN==1:
re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\".+?rating_nums\">(.+?)<''', re.S)
re_bn = re.compile(r'''class=\"nbg.+?sid: (\d+?),.+?title=\"(.+?)\".+?img src=\"(.+?)\"''')
re_rate = re.compile(r'''class=\"rating_nums\">(.+?)<''')
@@ -164,6 +168,7 @@ class bookInfoSpide():
"""mbkn - bookname to be spided
return: {
"25853071": { # sid
"link":"https://....xxxxx"
"bookname": "庆余年",
"img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg",
"rate": "8.0",
@@ -171,15 +176,32 @@ class bookInfoSpide():
},...}
"""
if spidetp==0: #douban
if ISDOUBAN==1: #douban
mparams['search_text'] = mbkn
else: #amazon
mparams['k'] = mbkn
r = requests.get( url=murl, headers=mheaders, params=mparams)
try:
s = requests.Session()
s.header = mheaders
s.params = mparams
r = s.get(murl)
#r = requests.get( url=murl, headers=mheaders, params=mparams)
except requests.exceptions.ConnectionError:
print('ConnectionError -- please wait 3 seconds')
time.sleep(3)
except requests.exceptions.ChunkedEncodingError:
print('ChunkedEncodingError -- please wait 3 seconds')
time.sleep(3)
except:
print('Unfortunitely -- An Unknow Error Happened, Please wait 3 seconds')
time.sleep(3)
if r.status_code != 200:
raise Exception("请求失败")
print('grab book {} info from webside failure'.format(mbkn))
bkinfo = defaultdict(dict)
sid = None
@@ -187,7 +209,7 @@ class bookInfoSpide():
resp = r.text
if spidetp==0:
if ISDOUBAN==1:
stat = 'SID'
for line in resp.split('\n'):
line = line.strip()
@@ -197,6 +219,7 @@ class bookInfoSpide():
ret=re.search(self.re_bn, line)
if ret:
sid = ret.group(1)
bkinfo[sid]['link'] = os.path.join(LINKPREF,sid)
bkinfo[sid]['bookname'] = ret.group(2)
bkinfo[sid]['img'] = ret.group(3)
stat = 'RATE'
@@ -226,7 +249,8 @@ class bookInfoSpide():
if stat=='ASIN':
ret=re.search(self.re_asin, line)
if ret:
sid = ret.group(1)
sid=ret.group(1)
bkinfo[sid]['link'] = os.path.join(LINKPREF,ret.group(1))
stat = 'IMG'
continue
elif stat=='IMG':
@@ -261,25 +285,27 @@ class bookInfoSpide():
stat=='ASIN'
continue
return bkinfo
return [mbkn, bkinfo]
def filter_spide_books(self, mbkn, mbkinfo):
""" mbkn - bookname to be spide
def filter_spide_book(self, mbkinfo):
"""
mbkinfo:
douban
{
"25853071": { # sid
"bookname": "庆余年",
"庆余年": {
"link":"https://....25853071",
"bookname": "庆余年xxx",
"img": "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2575362797.jpg",
"rate": "8.0",
"author": "猫腻"
},...}
amazon
"B07RN73425": {
"img": "https://images-cn.ssl-images-amazon.com/images/I/511vbVrhIBL._AC_UY218_.jpg",
"孟子": {
"link": "https://....B07RN73425",
"bookname": "古典名著普及文库:孟子",
"author": "孙钦善",
"img": "https://images-cn.ssl-images-amazon.com/images/I/511vbVrhIBL._AC_UY218_.jpg",
"rate": "3.9"
"author": "孙钦善",
}
"""
@@ -289,18 +315,19 @@ class bookInfoSpide():
# f3/d3: mbkn and bookname different
[f1,f2,f3] = [0,0,0]
[d1,d2,d3] = [{},{},{}]
for k,v in mbkinfo.items():
mbkn = mbkinfo[0]
for k,v in mbkinfo[1].items():
bkn = v['bookname']
if len(v) == 4:
if len(v) == 5:
if (not f1) and (mbkn in bkn):
f1 = 1
d1 = {k:v}
d1 = {mbkn:v}
elif (not f1) and (not f2) and (bkn in mbkn):
f2 = 1
d2 = {k:v}
d2 = {mbkn:v}
elif (not f3):
f3 = 1
d3 = {k:v}
d3 = {mbkn:v}
else: continue
else:
continue
@@ -312,7 +339,28 @@ class bookInfoSpide():
elif f3:
return d3
return 0
return None
def down_book_img(self, mbkinfo):
import os
import socket
from urllib.request import urlretrieve
headers = {'User-Agent': 'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'}
for k,v in mbkinfo.items():
link = v['img']
if not os.path.exists(IMGPATH): os.mkdir(IMGPATH)
p=os.path.join(IMGPATH,link.split('/')[-1])
try:
img = requests.get(link, headers=headers)
if img.status_code == 200:
with open(p, 'wb') as fp:
fp.write(img.content)
except Exception as e:
print(e)
if __name__ == '__main__':
@@ -322,9 +370,10 @@ if __name__ == '__main__':
bkname = re.split(r'[\(\-\:_\s]',bkname.strip())[0]
print(bkname)
bkinfo = spide.grab_book_info(bkname)
filter_bkinfo = spide.filter_spide_books(bkname, bkinfo)
filter_bkinfo = spide.filter_spide_book(bkinfo)
if filter_bkinfo: spide.down_book_img(filter_bkinfo)
logger.debug('================ {} ================'.format(bkname))
#logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False))
logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False))
logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False))

View File

@@ -1,4 +1,6 @@
# from webside not test, only for reference XXX
#coding=utf8
import random
import requests