From 854d167d20e82a0b6fd5ed2fcc612cac89942a75 Mon Sep 17 00:00:00 2001 From: gavin Date: Sun, 14 Jun 2020 18:35:55 +0800 Subject: [PATCH] kindle manager --- tbook.py => parseweb.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) rename tbook.py => parseweb.py (97%) diff --git a/tbook.py b/parseweb.py similarity index 97% rename from tbook.py rename to parseweb.py index 08fd083..6c7b8f3 100644 --- a/tbook.py +++ b/parseweb.py @@ -1,8 +1,10 @@ ######################################################### -## @file : tbook.py -## @desc : douban book spide -## @create : 2020/6/13 +## @file : parseweb.py +## @desc : douban and amazon book spide +## amazon ASIN B07VKS1DRZ - https://www.amazon.cn/s?k=B07VKS1DRZ +## douban SID 25742200 - https://book.douban.com/subject/25742200 +## @create : 2020/6/14 ## @author : Chengan ## @email : douboer@gmail.com ######################################################### @@ -126,7 +128,7 @@ div class="result"> """ -class doubanSpide(): +class bookInfoSpide(): """ re_bn = re.compile(r''' @@ -154,12 +156,11 @@ class doubanSpide(): #re_end = re.compile(r'''<\/body><\/html>''') re_end = re.compile(r'''^<\/span><\/div><\/div>''') - #amazon ASIN B07VKS1DRZ - https://www.amazon.cn/s?k=B07VKS1DRZ def __init__(self): pass - def parse_books(self, mbkn: str): + def grab_book_info(self, mbkn: str): """mbkn - bookname to be spided return: { "25853071": { # sid @@ -315,12 +316,12 @@ class doubanSpide(): if __name__ == '__main__': - spide = doubanSpide() + spide = bookInfoSpide() for bkname in testbooks: bkname = re.split(r'[\((\-\::_\s]',bkname.strip())[0] print(bkname) - bkinfo = spide.parse_books(bkname) + bkinfo = spide.grab_book_info(bkname) filter_bkinfo = spide.filter_spide_books(bkname, bkinfo) logger.debug('================ {} ================'.format(bkname))