kindle manager
This commit is contained in:
@@ -1,8 +1,10 @@
|
|||||||
|
|
||||||
#########################################################
|
#########################################################
|
||||||
## @file : tbook.py
|
## @file : parseweb.py
|
||||||
## @desc : douban book spide
|
## @desc : douban and amazon book spide
|
||||||
## @create : 2020/6/13
|
## amazon ASIN B07VKS1DRZ - https://www.amazon.cn/s?k=B07VKS1DRZ
|
||||||
|
## douban SID 25742200 - https://book.douban.com/subject/25742200
|
||||||
|
## @create : 2020/6/14
|
||||||
## @author : Chengan
|
## @author : Chengan
|
||||||
## @email : douboer@gmail.com
|
## @email : douboer@gmail.com
|
||||||
#########################################################
|
#########################################################
|
||||||
@@ -126,7 +128,7 @@ div class="result">
|
|||||||
</div>
|
</div>
|
||||||
</div>"""
|
</div>"""
|
||||||
|
|
||||||
class doubanSpide():
|
class bookInfoSpide():
|
||||||
|
|
||||||
"""
|
"""
|
||||||
re_bn = re.compile(r'''
|
re_bn = re.compile(r'''
|
||||||
@@ -154,12 +156,11 @@ class doubanSpide():
|
|||||||
#re_end = re.compile(r'''<\/body><\/html>''')
|
#re_end = re.compile(r'''<\/body><\/html>''')
|
||||||
re_end = re.compile(r'''^<span class=\"a-letter-space\"><\/span><\/div><\/div>''')
|
re_end = re.compile(r'''^<span class=\"a-letter-space\"><\/span><\/div><\/div>''')
|
||||||
|
|
||||||
#amazon ASIN B07VKS1DRZ - https://www.amazon.cn/s?k=B07VKS1DRZ
|
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def parse_books(self, mbkn: str):
|
def grab_book_info(self, mbkn: str):
|
||||||
"""mbkn - bookname to be spided
|
"""mbkn - bookname to be spided
|
||||||
return: {
|
return: {
|
||||||
"25853071": { # sid
|
"25853071": { # sid
|
||||||
@@ -315,12 +316,12 @@ class doubanSpide():
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
spide = doubanSpide()
|
spide = bookInfoSpide()
|
||||||
|
|
||||||
for bkname in testbooks:
|
for bkname in testbooks:
|
||||||
bkname = re.split(r'[\((\-\::_\s]',bkname.strip())[0]
|
bkname = re.split(r'[\((\-\::_\s]',bkname.strip())[0]
|
||||||
print(bkname)
|
print(bkname)
|
||||||
bkinfo = spide.parse_books(bkname)
|
bkinfo = spide.grab_book_info(bkname)
|
||||||
filter_bkinfo = spide.filter_spide_books(bkname, bkinfo)
|
filter_bkinfo = spide.filter_spide_books(bkname, bkinfo)
|
||||||
|
|
||||||
logger.debug('================ {} ================'.format(bkname))
|
logger.debug('================ {} ================'.format(bkname))
|
||||||
Reference in New Issue
Block a user