######################################################### ## @file : tparseamazon.py ## @desc : test parse amazon response ## @create : 2020/05/26 ## @author : Chengan ## @email : douboer@gmail.com ######################################################### import re import json s =['''

丹·琼斯(Dan Jones), 杰弗里·瓦夫罗(Geoffrey Wawro), 克里斯托弗·希伯特(Christopher Hibbert), 罗斯·金(Ross King)等等。

''', '''

马克·哈里斯、黎绮妮

''', '''

马克·哈里斯、黎绮妮

''', '''

[美]威廉·厄本(William Urban), 陆大鹏、刘晓晖

''', '''

[英]安德鲁·罗伯茨(Andrew Roberts)、苏然

''', '''

[英]安德鲁·罗伯茨(Andrew Roberts)、苏然

''', '''

堀田江理(Eri Hotta)

''', '''

景跃进, 张小劲、余逊达

'''] tre = ['''

''', '''

野田洋次郎、蒋青青

''', '''''', '''

'''] for t in s: ret = re.split('|<\/span',t) fret = ret[3::4] #print(json.dumps(re.split('|<\/span',t), indent=2, ensure_ascii=False)) print(','.join(fret)) re_asin = re.compile(r'''^

<\/span>.+$''') re_rate = re.compile(r'''^$''') #re_end = re.compile(r'''<\/body><\/html>''') re_end = re.compile(r'''^<\/span><\/div><\/div>''') print(re.search(re_asin, tre[0]).group(1)) print(re.search(re_img , tre[1]).group(1)) print(re.search(re_bn , tre[2]).group(1)) print(re.search(re_author,tre[3]).group(0)) print(re.search(re_rate, tre[4]).group(1)) print(re.search(re_end , tre[5]).group(0))