From 4e3b8abc34284f9b27b1d2bce61b4bd2cc186298 Mon Sep 17 00:00:00 2001 From: douboer Date: Fri, 15 Aug 2025 17:20:30 +0800 Subject: [PATCH] 'update' --- __pycache__/annotationdata.cpython-312.pyc | Bin 4678 -> 5789 bytes __pycache__/booklist_parse.cpython-312.pyc | Bin 4358 -> 4300 bytes __pycache__/opf_parse.cpython-312.pyc | Bin 2225 -> 2609 bytes __pycache__/toc_parse.cpython-312.pyc | Bin 10702 -> 10476 bytes annotationdata.py | 183 ++++++------ booklist_parse.py | 128 ++++----- data/Books.plist | Bin 541171 -> 541171 bytes detaildesign.md | 26 +- ...export_B18FCD9F90FD43C2373AE52BAEF9A77C.md | 2 +- exportbooknotes.py | 240 +++++++--------- opf_parse.py | 75 ++--- toc_parse.py | 268 +++++++----------- 12 files changed, 406 insertions(+), 516 deletions(-) diff --git a/__pycache__/annotationdata.cpython-312.pyc b/__pycache__/annotationdata.cpython-312.pyc index 41b3f8fde41e1837ec5713a1988cd79c48a3d80a..e515272143cce82eaf43d3ae273312f082fa9c3f 100644 GIT binary patch delta 3280 zcma(TZA@F&^*+Dn?@w%uF~(p6!I(h6U@%`LfzUuoVv^=#ij&APevbs#VEXKYhCOB` zvB^kmaq=eNEY0SwZR#lP=4z3Kv}r=hHnrLxJ5Gk<(kc7ik7;vCs!jQ`-D~FoP4Z($ z_BrRBd%oT|_nvouQ+J~|{9CzP0wAn@`GfOoY5@L(9ylnhz=zFx41g6t0v8}LlIO=< zSb_3fyaL5tc!3fw0#e-1pAxtP7~p|nKoX09=eMu-r%hQsz=YpS4BnB7PR1Qhg}rlwE504hp|!O5@U?cYw~7lnOdj-M~4Wf?PwX z$r4iiq2@MTbcTUTyh}W5>4nQMGl^$R26mD65!;3z z;A&V%;#r}=W)pl|0Th6(0AFL3dpoPct{CCnE-+( z3&f*W-Ry*D1u)oKSWpE^5d!Q-c;zSq+*{h0z%9&a#d890V0vQmTuyUnZ2!O^_khzm zc6{LY!7-OE7$^koz-xc44OrebIUZR)1Nc6EkChFM~aVAX5 z;8IR8Dxg)6f@lHc6`({L;b+xl5#d9nE~_bzS)%pE!E2YEM)c!`xRT$)N~;!&Dm|epi-w^PZ_S zYKfVzHYUl4Fsm$$o{M}VB{I?q80ss{5JEwr8A+nh4B&%;=fQ`wPx*a=FI!@8hM2(N zS+fB!&ksQ-MeqqYZkzhVEGaN!Quf;d^MIU_`%q5L0rS!{xMMRfUFQ5*rN! zqgvoav1J~kX37{XqhwU1>TNzF4((#2;%cFR0I;@>LTyuG;qG0WJ%e%eV#K~~PP$E@ zJx^lkZsR0%fL352v~9mN80Xce!1CfOcnd!TW-)7t^C`B6R!e69cXy|}nmsJ*7L>t2w3c0#l?#l81JAxM`?U@=H^=w-13@Z>!$D`&(NZX4 zza?+#wW2kG4$KB~(jD`UZbot(%5y?bFi3?aNxB+}XfzFI6GCS8A97Pzi&zvkh&B_- z>m^Ve1Ip)S?40?f~vXvmbbM+P=D+7-tY$4dW|{aZmZYS{tW15`x#Y|PJY1>D z3xGx+)qg3(<&}8}kZYbTRjQrTi>cvs?hvTv>Qk*+o}@=$5W^CTsUBxoSp6G!MojKlg zM{|ct%EI?`uVjt2@$>8TUFrI+waQd|SEk;XHaf5IHuSaeLw9QL>pNHb)_cd&y<@2( z$5JQ0mOACi^qyYt^`?8h_j{=w=D*UeF`0qm1s^BU#uL|gkGSybMn~G{Se30ETN_Vx z9sHyxV>}t-Wy3XfA6VY8#PtbtV(=#;w?+~@$)id7XBR)dm^`1V-Wwf(<*BM!H@2pY zt;uqj{*p8DYSfWcY9dTrmNMBNs>&ak8nT9(tiCE+UYRvDJ(dbu6p_)l@!0TnSz5L$ zPG)4g^A=EAp0@+B{IX<8vMeh$Vk2CtiwW4#xZ#89cdGAbk|%#^SRK!p2U42Blx#4o ztVoF}{_~_7l$*Z*Scz?;x+dP2scyYGlE>lW)1U@!u)96`%SQTnpY)d=)bc+q#UQ59 z2f!-K!LQ`BXNq#Wa}u|EDnMTFBVOiqzj?vqM=N2sn?^G`CkvwMhsnv9&>fvk#lY=7ZPqyncQ5EM%}L-$gOi4ufV>Ry z6O(>wCJ^i~J;fwB)1a;E!2Y4`1Kq6$hV1S8+u9xN?58Rf*2m^lqv6t=(CsDzUf2;4 zDzpWm5`@sDG$-{2{C>)du8ct%ecOxe7)5Kssp4HQrwCEO(AGsIn8RUi#b$2JIl8{! zt)Ms_2)qH)MUAGz`w>2d&=^8nZGS52EAE_r`Yc>P_v_#n#QJ5EozaWS*!Gd#B0#sEIJgYaY>s!)? z^ess|qqi?9BK*kgLyYvKHo3j;U8cQY_KsIgg`^euS_9i5GWZiXqKc~oMk76b7&~w)V>Om z9kC-ts%fL)?scm>lGRex%~hyMwM|7^|6o5AssGrR2;(w|saduCHMdE$Et9IfGjS8` ztGmzpzW4q4-ur&XQ-~A2Ihf$A~rRA3zd=9pw5m4pFM&U-~oP5!*KHuy{4DlZNH8F@z8Gmp5368}gH+2Zf)IqdF)uG=} z-$kF14H4m@KNJqgQ^E0K)R2HREAbl+V2FYO56qV6Ku)km2e&v|PPE4-Vpr0%>#yzI z*Uv3ZE=(qz$+-=ob*(NsxLxH+Oe{60EbCQGvB7xnD?=+}bYR3t3be>LIDVeh=zb7~XD#Rh#tS8Mhw(b9OIn&)b(VOw28pZ$J;hwQS-ba1t-6xd ztw*_szM+{X_n)>-r= zzl3g@JBiC5?(*|OHM${m)ao}9()%_~;+hWR`EXz=Fs=p^H87{{b)pBtpQ`n0%Nxh1 zg5f}(z@@y`kvBc6V3ZM?p&NZEI=uC|77+^0=1u-^IG|ooLKV7mMhAxumFQrZo6>`Y zIQ0E8cf^d@y!e035$eOMxQRjdFusx~*x{^;JqtYvZ{o!)=Sg}pTz#}ZC)(oE8POX( z_5>%C$LErjS#whiZi|-1*@fA~D+^Z={yU<3o43TrlD)~c^w++XwvOK6#|lOe5%%4u z0B>77vT!8PmF2wHui3JAX5q}MXQR*LOy;yn%F)82upkrwQOOr5AeP5YKQIwYWq}2Z z_?6^7AQ*Fj2E0}GmF1k#{%x!4hLW{*N-sti_ zVg1pmweIZU$g@Qs$1~FL*ZOu!(Kn^GjMTO=vSwSWOSccKUCv71j`!ti+;2_1IgvP> z^rsB()9=#B%Q(rWKkWRVbEPp|eJD=jqpWI7(OK!hf)F#tW_LI-)}0thJ6m(SeaBg! zbJXO@t8(_roU`$Bj@rk@BHZgo6DBm0p*_j^EZtDp2g>Y)gTTnVJal#F+Q{w=+%2=k zpFv#W%v$0xHX|X3w_vE;WG+FWQ`(9voeF31kWxKj2p=GO^e?%7u zJbf6p;Zpj8^Jv8}$#7e;A8R$<-VZV6Y1t%C`!CGMGWv~u4{S#}_Th+_H_5UR9GB%h zH6ENfe_{X@KmlXT7Rkhh;y;K_r@h|xG~2O3c5abu>`>yx)TujU PTY(a><$gd`6?6O_Ur;~S diff --git a/__pycache__/booklist_parse.cpython-312.pyc b/__pycache__/booklist_parse.cpython-312.pyc index 9d811e59c8e56de4a19f362477844cbefb99754a..60acbb2021af29f668c4db05b73abe98c4648dde 100644 GIT binary patch literal 4300 zcmb_fYfKy26}~eb+vB&54VbqR1HoAyAJsb!b}OpHxthVZD_ zQYk;omKC6_7SL`AE2XWgq-rXys;FCS)uulx^^YC1#Z0nNq&%AZsmP{Ps#L0a?$~1p zOE#+7i#YerJ?DJ)+&lMtbNpw6L5rZB{l||7Z@}m-evq3cGk9Vo5n4nHVu&E}rhCFm zNYA8~lzN3%0X-R{hLv6=fk<>5F^Y?bp{{^0ZeA6uW|Zfu5aRBTh*e!lPa`kQ(nHqF z!?rG~*8;98s2kRKbr_e`GwLfu+E98nu-c)LZH}3t>@V9g1Vb017IfB$(03pvugPo1 zXsqc&#Aq&&vYmvfW6g|~(OuPFQEXdD<9>W-F+4G{&tt*a@KTY;q*t9OCOWkz<#v_J7riwO!4TW1qx$dD-)@vUzYS zHr56*_mWDsBG7Crg(ipz!j<<31A1J9sAJfE|7cKP0)0Z?_Ej=39zAiu>>^?jzA|uK zlni;1fa?O+!%Hy~NF@NOM3XxlKGzxGg}3}6{{YK*`Z9+vfnqfL{N$p979qw7-b^4t zQerVAua})cD1v<1*$DX+57jo)%n&>%d6vQc%IBW49uvq#cwA~x)fW!+2L?n-=gH=y zoj&);lO0{YZ+5nKb&J}OAZ*4r;ui)*b6VZm?ml(&RFAL4EvlK`v`Xvig?;mZP=8p| zrN_ST2pe)KM2csF{Txn1P6vX=`*%1T&&umg^DM{L4frnvLUjS`Ux!6zJ&tE`ag3mW;-*K|FvIax*uYs6GslxJ`g-5Qc)~k*rs*XgdSyL*%^k)8{%a!YU4<+^< zf*F0PuvAhMl|`w~Y5Nm!M17fD#ABp~f9WMPz_BmF@eN7S&x;@Ecc1S`S2UD~3!v%;AwC2eLN6 zB0oZCii%LXS}L+JD6@*bX?Jc#t4_eBa&RNsKhsx z7cK6N<7|lK`~u6g3+!+^BO2TtEdhQc=%4WThglJcT0bX1o=HzycL#Rto)}?8IsgvR zEG4C&$=%T&g0v9=;ZO?zirUc;26{Yp(LiHq)DT8h`A3DpFeg$1MpEJ23NUFztmleg zGa`k(WfVMi(Hy>N9KOY%1wIrhoXJuKi?l=s!9cG_1;c(uR1C0!OT`(nu?DY@rU0T} zCJun~ixk#~%HDIn0K;SUf&7QY;TtL%0V2Z~m_8;;@#eQNXzlZQ?8E;X1Z2C;G}koK z6xUUztoe&g3r%tT-rKga%T&@<5!Ku_Smw(Y>lW%(8TGEc&dQOJGRnA&w^+5jhOo{wz_Cb%4T0|TWE`y97x*gqpcfOd&+FTQ|4S5UmlN{ zuID7n8i7Jqs+Ox(8?WUg3ma1Q;uU(CUM-GsN&CT6VOgr!vC_8Ow(13SaaNrd%Sqa+ zGsf{f2d^DW+FwtV?p+yN9$bxFb0_Bd^S z>V$Zuojl_)GkliUVpROmsBFnsKgVF0?j;8M8}Qoyox!r7kKOodD9cz7L#6@K?ZRF% zzwji^B@Gg?d^Kd2J-7O=$Cx4`4y}IiGuAidcp=T@7{TMNZGTcLXCQm!MT02{tyB*%6U0to+?Jd=g$)KMXa2n$j zcN{x)@-2tg-I4i{OID6=x3!*X-C@|?Zq!(aUL{=&P5 z9S?u`>HWD604&hU`MHT&{(KNf>;C0mJ^bV+haD~xX8>b~PPhsfiDZ~hQ;&)r2g>86 zsz6ky-Ch;dP>Y0EU>#N8D8~bRso8P1Zxl$1?q`L*L4PnPD*J+Ao)tB%<9+N1rb1CU z!UaNtOOs|It;9r9s3L{&xYzN@6c--jMFmKa_VYZ@PJrR+v4&uITt!H%w8c!;E#}E? zebV`2ddeE%J8$PYF58m1l~L_&lVxsfW^C@A znRk|$mEdx4&GbsjS`ycnY?21e5#qMB5OdpV!_OK&ZcJJa07K;#ES_07^ZwcBv6Mb% z&NJhQ7hPD>kEN^y*fMD?kG9;(FPc}RDqRV)bDmyuiPoY!cIRr(rh=@_n?JEMe$!U9 zsYdqVO+7ML=1$CVa+P~0dsHrFX`)G1-?B0U}`j-i;t zUml-N)cSnGVP-Ul$9kXd{HQ;e)^IrYxjHP|AbyTGNI4{ZDL71PlE8HnkLNF7CfoVRTieLu_0b;*dGY_d{Ex2Mx^^OngFYfY^?bo+9*CJy!eBUrXT)p;la%N z_kTP6VDZGD60&2uZn3o`OHP6BYX81|+uw&BTs1gFhEHC(nM-Drl5iVCg zhwDjh9~KQ*90Vci`oh5=5kZ@%P z;x01WLZ(~DcnfL1K+dPst3=^r1Y&cDK$Jc@c~eoaVJt}+%chT~%=zo)%7nQxX|9@X z|AH#npfqufxIr7E z=a){$>OZGzKoK2$&jdH$QvFzql-jtqdX1_{S?ueU>V%~_X{nt)af`BIaPN9{pI{oc%Xqh!iZ8hv}+P?|85E(xpOiV;b}{&-DO?CtfMrbJEC rpUAbEj)b8jO3DEpobR3QUMgATR+-qIxUm{SdJM7DY!)FZeRux@XG7iL literal 4358 zcmb7HYfu~46~4P#Nvj3DK;q?KFW4BA05;Avc8Hyfd88&#y8+zBnzS0hu8m|Nk=-TO zL~4}APNW0}(L5jtwn_6S9p~XRZsWvG(x&~pcy`k(;Tiq zNQjY$*Q~Ai43S!I&jy`ZkG@-P*Aocv29D0ily@Va?Jilp*O~NG2FsD0t_U@tpKL_v zQ8=;PY|p{kc=PLsV@{G8oT5|D=WtnE_PhEss+5}a&W2^wKpBN}DSM2|v!Id5>}=0f zykW6hVAjazy1$E+^~+X*Ev7;+!)^sV7B~29W#i0T4wsX$RY6s*#3005-_6Zf+7DuT zJ|I))xA6Jk1uJibKJTO^gCfvCBZZEVM@d_LZvvdgbohLSJuXpl^f(2PukJa@RIO}G z{JGP~r$4)P@im5pR>dZN^=9H!#8n4N#ktpxDbMO2tmWFpfy7`WKKM!^988RcXFqvo z?!y!D<4-4EKASl4{{Zv?;_48b!qIB+F^@~)VV~p+ALD&K7)*@5i}!i$SYkAo{9-5(eldPAm{JBC zCZGQ#acVU6nh1Ra3*)ChjX(R$wyj%hs+8aKcFgSOCzHQ@1y0%N^B!~^0#~qe7f#Qf z56->+#$4o?l%4Tk49p)tG56W~^KX2DjbvueUr4_5RN{rVgwu1AnlPuyp-1Lal&`= z_3+*buH%2GUdu0Q>`}g?uDq6Aau{s8>>1?{9#=;t+v)Ro_)f{?^NN*h>YLnM$@<`h zb4tAA>gE|He*Ap=)bn#MjVC^OIpZ3|B`)tlAG?cP?WDBlTlUrOZgJG@+t=LY_+d*^ zTf0p;ddi*nys&sJFWj=crKzrUck2O1L!FJ8$FZ~5M#xzlf6&?MkvLbU)S2cZC9ble z$IP(!511WB(vnPqAW5||>A3(QsWWqaVAwNh#P?8ul(ZR%o6d}upZ+CI3aB{XXAWi7 z66i^lJ8K!48FA#HB@8g#O9%xhKeZ-OwPc^#uNH}c{M+CtPMz8HHVDnPBD6Ol)VI+> z--KD)V%@hQm~Ta7EhD(>QQA-V)jPp?1FulXPqd+ps0N8dA4#ACh=8Agb4OGMP#02v<16keUtg2y(EfUhbD;1A96m&s!#k6j;A!aHGHeJ!_L++7HG2MnJRXQ! zHVhd?`o_t>o9+rxQzpwWJw!(fD`Tdrkb2r^iFD1Z+i-fvt2@T_oo$V++cA|_G~6}R zHR2y{jpc2fvJ?#O8QL@Q==ipnrFtrF-BiIHr|DPeu_6!`EQxTw}v z*)~?^upL5s|l3q>f~8l^1%Dn{xC0<6Cw;&>g+ zt1naiu}ocGt-Xa$FheMO^4$N!ClsKS=EmM%(cGE3?xuM^%0#x^3$)Qo2$J8h@%4Z&wvZs z(xX1252fQ?-TMqIWAOE!z3{@yvLaPeq^g$HSyNe+%G{97ZptJTI1nMN4=51?d3)~M zFXI;>=MBUMj>jWsZlkJyKik&W(pcZl+IP3LHMTc3RI>dz3FC;yfzCeIy6*?99jr{{ zG6lu{cu!+%;|gF?8@qR3JG*y(OA9OOmJ)gIW;kr}y?5e6&(*TY4=zB#wDQRDw?0gs zd%Kq1Ytt*NBILsone>S%^3m`|fb>LJo6hmF7Lu$NGQX_p>=i`dE-inA@9YJJq7U*? zXP49Ck=30ZpUBI)#v`4459UEx-6OcXl1(RT)0K*>6rQ3|y~q^SDcp%SrUYM~D662C zX{RVcspH~=GAto@5eEYPa!M0Q@oerIb%3xPB)D7_{{USOeZ!!f;?ee)xit9Dl`Q?> z_VD(>`@{E-lwZm!oiY|i>B0rdpx$}Km^b)j_{ovJn6VtF$7~&5H?;1>;*e&BHVhiV zhG@QUi591fR*Z@n*${O(H$PNARa!QYy&<$G;*yQ|GnNfwg$pXOA}7L(?D)#G3An>j zu#k;(`avel3}i3RC_67oS^xcw7Wm|!h=6@*ebjW%*~iXyMh&%-^nFqKzL_kb8tZ=+ zv>?7Au7^|nW_L~TiIZ>4z4htr=g(ga4$Wg`xVY3y; zT8D%4bvhhy`IH)6z%(Fd-BgUGGNXA_1*xPY0sp5Z;Fyq2Da3M-s9YNCN1S*a4xt!B z@5S8~jHhtUlhruai!yOoCLR}R@H~y<6x%72;5!{2al8j|D#=N|MvDMJ%0<0F@9g$* zy&nF4p%v!vfyJ%RFQ^EDxK8E})Z%7D7%n5@*J$HRN%>^Sy%Qz(E+P|QT`Wa}@iNlQ zu$w2@?Gx`?z#s={eg$=K$}(%OmA+L-CSVAI!B@ie82G8->Zo2E^c z$bn(UkRxW@95a=VljG)?>8>ypB0@FOv_4{vs3J9?Lj#6s+7NndWdC^0-{>k3gt|@` zQ)!RUsOjq_q-LT_+Nbn} z&=cXGhR7NH`p_ej`t=j~^&`^Q!{bCue`mDnf$>Kss~(uBdf?CGrK;u$eRGJ+*t0Ft g5osSO9uvm6@sg;a5-hr|L*}Xl8={JYhrx6I1se1Q2LJ#7 diff --git a/__pycache__/opf_parse.cpython-312.pyc b/__pycache__/opf_parse.cpython-312.pyc index 4948c4c922a9edcfd94cf1fcde529633bb1945b4..b4070f68978eba1a18cc2df813f69bdd095d3ae7 100644 GIT binary patch delta 1342 zcmZ8hT}&KR6h3!;c4udIVV7Z-EeIlRVYd>f*ff%$2|uD%gEiH}PGct9ouMr3pEHAi zF#&NKd1&I|XqHk-sC@uKtPqn%V@;ITCPuPa)=7;`bc=m*DH;s%!Fz`vjVGD!o;l~- z^PTzb&8&D=D$8!V-8^7R?92_cbJJyk@iqjI0~s_N;D*WIf<6Ek_aX&2cva=dQx8lYe0_G^QI9y5@=5QM>R5N~x zfiZrJe;AbVAgDgkV^x$Xo=b3<+V6wlG|0T@Ipeu7w7ZU=5g4UU!w3qONB=t;jb|gz zRx^ei=zF6A43g;o>`fTor#J;+G~9>td_A_}`5VP+U);?t{QBk9jrq4WW;6G$TraLn z7T@{m?gz`i-?&h`Hh1sh`QMh`-gtkp{)LmhCnKt+s4Z!-NU3!HFB6kiqcCMU4$`LM zARk61*B^^3ucRaTfJQ!`e&@l~mvIlR^<3ojSh6*CHkBG`#jPZ3Q%ibGWpJ4Vq+au3 zn1DYxaQW7qb!*PLnbEv6^c@r0!U-@2(cvee#M`&MI{;e`;7AS>5RnWbrO-0Uz|vlo z$c#~8S|wV2mPHH5Az&`+ON)2atYRWC`=F*{5U@>7( zZKQw*@P=@p=f86MN&Tl7uppICQTyRbd2hp%?Y7&S4b5~fHhu4ID*mk-U>XLWeWc4+`#m}jBX^e zbfKyyH#$8!Z_8ITWf@cO&mNoaSvYa!#MS55gU8o`$MeC**MpsF!OpK6e+WKXXlTs# z&OG+sb0r5HYAf;M;PFqnw*|~=4YZ><@zitfu6BC0o$aRVePPz*BqoDoF+A)>8SCu{fOXjgbKd8bzf-B7h0^$`&y=YjWGA5s~o&FrE*}Z|0*>Y8rK-O-br7fK6n?WiYwxbTUfZ#2^3%v|;$SLpN|XZ}05u6#EXJWIU*a9Asc}^!P|H@j68CH$mhD<2<{o$t6}hl(gT`86sF$A)I%Nh5D77Lbs6=J zpT$t+tNZ{Ebr7t$7Vad3Nq7}*aC>DCOoPE|;WOcD3%hdk{HGE7DV}?p<6+QY;RJY zpl5F~!>4Ux*~7EBOU3fAS#=7PvT5aJoZ<{|4|83YsbuCXo9GoYZ}l-|$SbkQjvBm( zL8$xNSX1e*WgkmQz0ly-X8)8jZ9u~QjrSg_eT|RTgr+)F$FF!6YWpA@TdZEKHhP-s zU~TfDUv0(uc4DdRSZXc*AatSKa&C_3I+Yz_JG!?bn_|yVVK*<~ zJ`+Eb*5NrLgVpP1q(txU47PcM(90;G!N1_$N1#0O#Zk1KaZAz%Nm)uijQ2MjSFdMV z!R~r?aq8++W9Y}o#=vi>`@u=0`7U;9(12eL;HUENFC$2ElwEg6Sn+; Kv4pUd@bFtpE(*c` diff --git a/__pycache__/toc_parse.cpython-312.pyc b/__pycache__/toc_parse.cpython-312.pyc index 6a4aef8600f1397f18fb7aff5f73b757ae147c8f..a328671f6629bbb9e80cc46d068236910a129dee 100644 GIT binary patch literal 10476 zcmdTpZB$cNw)f^cAwYmY_>zP#Aqa#3@>OyCK&eX68Ff_DW@7FYl0m?mAd(3!cI*>d zszL1-td=lshxlrhPNy?CYHMfu*7CjSkHqLC-ul*ZW=z0egLT&Gd-l)U=iVg5fZBO$ zy&vzeI5~HpefHUB@3TM7Ie&_emtatG{_EWip9;hNhCafD>4A?&dj%MF8Y8eajKB#_ zC*Fp`FSnDkkK4w@Ss1U4$HAI0f_E4r_#bju5^a1^KnQ+XjA3>TMU%KGAxwJimk?Uv zgAftocJ5O!Bn(CzA$=-FJcE%yBt8{G#$Y5p10$KiNO>-X{23Uj42`sBU?>=jbcE3^ zY7>#7Cd`iSK89htxER(ZZj-dd5u7$D2P1i=%sDBATcr3yoNYH@yH|xqCA1~B$w(PM zs(?lknPe&qL6SjD!1 z+f*Wx6u*NJS)>H&Y%&h&MdyTJZgGT z6xIT8E}2B+MP^qJ9P}6d2D%E9mc@^*iTp7AwhWSQS{mKQw#f`r?R+RvKonkB9Ht(j zkqtCnn2$|nlM*s6%x_x`v%VrShvJYqT{uzv zoW)GOoM8MRjqjzMq56!q7qUC+FU~B!@BZeU8S%tUYH8le_2}rP};B%C9)vG z<6?$ZIk_09ECwo?C#BOB$MjdAxuSQ69UmUyq5Y9cR7TAyZ&QPvlsv^_R3R&CTk=mx zWl5MeWrexY$R+;|_oA9CiP?+Qq#880^c)xVq>#)B>jKC6IYdqL7bD*sa>cji6 z@YpA*4!_I7)wR8HFn@`q+o0>wQ)aTfr(adPapR`xqeE&*+5932yfHTO^J{;TWl06lY0HX+9m;OLcx3SA&E{?R+fXvE}l{$26LMR}}>DMj; zlQW;63cT}fcxCfMyVux7=ory~vMn9OUEKs(ssbUGszNd@RiO?v4=e59@DY&K6KVkM zvJ)(*17->{h~C)UrQ6f5i`GLi(gHJcsVW-IE(_`V!QfHQ4=5^l_QJEY#7c-EXE9nj z$vMi17Au@Nl?R<+9Y?gaiq!X7jC*@KH+T2;RKu!!D3D{Rs@q(uvh?>DyV@lWkbiAe z<8=HJ$cAXQ8;YFRqb&eEjgc5+YFHbGzz<_>T#`p{4rAdw0CF=P!HxPBz&B)Xk$xe3 z3kkttJOUB*^VK3+9N8X*P%0AP08Rgh$4uRisqxnHP|-XP?OroNRXtW}w!*eDYufb( zj9nVzp6>3SYNAb@7U^`xE~7=KvnEHTDG$Tafrgn6)ga~_ zO+an12~5%`V5mt|EDSd#&c_XNO9&5OlpMw-&Bp-=h{6&uKz!JY0p8Pi!iP5i$4^-g zg%5WO|B{ChLXhM#kfhMYHKoj-(H6-k2ShdzBT3QoIAbLFTr8G<9_*QZY=MZ!2}do9 zJj@kDJ~;dA0Q_)FQ;S<^oO;afwd^gcJCD;Mvdhp-fayMtL$+VeI0K@L$Zro|B-NK2!A zAdT+@%LQjd^B{8VqjJ!MJTwu_wdmVvyu&O85Q@1>q0kqd{VaEDT|8&f&%#4%GZ%pA z4q~&sWO35nq*S}nnd0iZl~g;6;qkRo+PjJZ=MP=+UPX1#aB?{DX6(!P&c*9*SOKsxY67AL-*DlHyd~O7v+xa9NlSOOJ}P5%0i#A z#-prp@b2W~`IY%&{b%}JTQ2Q5zvGhbyv|#;+`UTgDbxFlmP`zc4@^}%2fRfqZmL%J z^NW1>b)Nh>N7FY#Oqo9`!t#nI7L6}*2p+{_S-G=9ETig?980gYbN%x4)9XgoISeOT z?KqtxcO;Evj%KH2$pYv4+tGSCgZ`o29)e*FF@&pjjyPQ)r80kF$}At6kIe><)@un(C73G5q|Y z0e$>Y%+%?bs9lASz7(XOIul3rB7Rem@XI_AYL|$cYPer6#i4$}*GC-f3DDAW9W5T^ zXz?gV%bDkBIVKc5VhYt@TM%wST*MTr;QQm6(2k8MVYjy#E-^x`hzk_sV$k5z#tpkZ zaQi&Q74b~*^YIA5g^1H)T+wr}p5=;$PedG1{ztMHkWg-S*MWP4OTs`5}(0F|0gq3Z;y2vu-(fKf|ORSFdojHAq77MllI zGv`b`wyzQq_!KH+=92Ge=7}lCt#4Qz#F%N+&tN0q6!~fpyQo-aHXY<`ZWDDBe3gG;;gjEe-cdN>|=T^o;^NS_vQ2&9KjA}&+ zs+ZuXF6XbR75<@0h}sR})dKDx`8d>17!`_~NFR@W%ovps8^CQCY{GGfhaA|1&=p%u z(b-R^bD>-%ysa1NBk;&ZMq7OWqarxyu3-SOMUCXW`oTHi0FQ9V}!jF#7_T2*GDeD74ZruY~9<53&TI4Tn(+_>A6EM`sLX^d7(#M=#!UtWS7iFL^6q9A`m&dLvX@SF_-dLxHO)69?w1Yjnr3ee;msx=iutJtLu(-Rm1u_6 z+Urg)8(HS$c~c5+N*B*&VHtU|*_bSCD4s$txz!LIE|6jw`*{VRr@=uFa@w&0ZuA+z z60w(XwomW^Z~4C5FJz;HfDnddM~K*YzaQTJ)v5_Nb;PWiidr?d9YZtQajM#e zL#>6g*Fs(<;x7P4X_0}Vm|{XvF{YSOV*v2^oDqrzMxfIyQ^Gzi{EN;umWWSV=oKKm zE895(Vw)Itl%WH)0hM6VMw&-;cUx#&NAuBCW-~IraG(nPvhk_~I-=uM z%u3OXmN^kbHi+GeOB`C}Pe~nWx*L~jC(btAip%vEmQFT#3hV4mr#FmjIQ7z$Woqxx zny*tc?YKWB)1g1H3{G~={Vv70)!FG$R@z_mE7BdQBL|&TCth_myEabkzn*c~cBAj+ z4}R=d6;4RTC9XQJs@lQ#D+|s@=fakLO7ii(>zi*Bxi|0hX794Ep%vK-%4`yrmNlD< z$rSFyystBI>?{3>9B1wc8=MneTGzJo4X#p8evLx}$34f^(Ymqaqsv|SuKjT2o6PlO zRZnq07he`%&%eIkTet2;uBWc$W>$;8Ks~X3e7)vuehWRj=}9tR7nL&nkSMn&=&Xq}f~Wf#+`I3Y8cfSS|VigGX=+*0pDJ6xjE?dv9bncXhc>Gt>PJJ}uz zrJ!)tVbNQRhJB=^qnn`00L0h;lQU6hBT~puFr?Ydi4r%)0+A;-Z-eSE_6-N;$Il8d zLGnY6oG*XGh1M);&6YASX#@r>$XgJejGq?5gVt;Tz`o%WhwvVv7CR@CC&Fs>>5W}* z7n-0UdwTci_I6YI^cJ1gDu-(v@}Q2%DRmXvrHl)q<3p)wOuxbYReBIM@C~Y7}eG!rbE+HRslqrHj{*F0>>SJi*H(!QM#;woY+!PQNb?7njH%d1-@LHg) z3^-rg)J+)lW{Xu}=m>G14-4F+r}Ryv*-}<*6^5RSdeNy_(g;PxKC`HhE*ex;p$e@R z8j|p11+@7pgxQO-hTz$MMbAS+lOaM4s+dDL%{>V65yp3PaXXHBeiVVq9N? zsy8i~Ap6{*VV@D0c*0B=^~}ILFaSF|RpWA1FLJpNZrD-1CjoZ<$}!ME@RPTLgXe-j z|77~?oA6{aeeBJ^#iP?>pM<*34hG(z0I5Cr==H!G?*tu};OrEf9JxRBR^Sci3_Lsy z4hP=3Hhs!9^V?70Oog611`kb-eioQGd4KAS>8rm%?=!Ar)5ngWA-{YVrU3lF?@t0e zICv#^>bHT{Ke&J8t-$e1_pf}=yBPS2zK3LAT|kP6Oz4RYy`F$fP#P9l*7Q45f!99? zE$)dPq$T#L4aT4w5mt?X_dlNb%?AKl4xniLHmKN*Iep>O%*9D|&JY(Th8bdvbXjS$ z>P7+%ZD;hHfnFM!EzZ7KsjM8B29nSyZkd%JQB0nLm%CWasY_|0PDg;vL9AvU@@VcJ zb2W2cL$v}W#urgPffg-Yvwh`Dn_AXv)-|pB5iPO6_SM1GhG;EfyatNQ>@6=7>zPBN z2-!1A3aG7!7^IIF^)jk-s47BLE~+-7Y6Da>pLvjCZ;jAWm`fsB2%8t2R1fPonR6^d z+Cq^e<40*pOs>Kv1p@M*(QGlZPFI7-A-yw>m==a!>zKpy9G5@DP(8bei}9pGRxdXLyt_o^6K8wG*>0uABi6XJ%{P>8(GI^j@kwyI+h}!* zel`!j@_MCPw00i2>G~eGXk7&C==X?=-6hMfbKIiUvCtQ9LnaaJxT<-I|igNaHUvn=l^R#iq z=oER=)I-gFh03R>@F*%K6+Z1sk9Ott68HAqZtY61_GPa^2bUNbdA^KFPe$cr-PDd7 zLT|>Vq4mu4g&(gUH@Ta3xeIr@lV83i)%nvi$AqK8G3lt(x!u)&>DBYEPMN)$)z?>g zG;6)-%|q+_iAl%%j`kgY<>)I8fH4|ai{5T$@a{0FjX(dF7;>T`!ZK}GFM#BxY6Rx++i0oqn7`B(d9n(wl4Sb zZg*PGZQ1_6AX;TEqid%-t^Ss*;lF3=L3s~X;22-xmTb7iYw;&(2G@fU2V4B|OrN~S zBQJ87yx@_)F!+*RC_d76sPD)thhDMoyCp2R6Q5)+V;)VIdra|?DeavUl{3k?#+ye}WjYxI;gPMO?WcDTzLy=86Ql%23~ zQgeK%<(|}X*TJdG>$|+EKN^zU5yjhu4vF37taK+V{@CP_+!pP3ukC^QFQRz2EYI2K z)VLEWZ;PsC_v08Z^V@3BF?!I@d^ejDx>zj+?butLT4%mHd9hDgbW>XN9ayQj`0=+g zU=y~5c*$dPBN*_Pnz$A*=gS7khGqPJV{X{g;tTU_YI&0l^zr9aR2#9Ux zuKVL27CZa#?Qehk+xvU&KdIG90zUs|Z?=A0LJ(i0hVZE(mHTZnf*2xb!bs30Eovu? zB>aloMSH|XF^T#l?UFrGqjZnVDBB}9%J)!43ipfJ6-EUueVH&SX&IDpw49MLDn`9q zlmWExtozROgY3Rfj@gpFTFhnw)@<@nO43S1_9|@6;Au_%LA`?SoeJ@1z zcR(z_IC8!NA{RsG5JU@QOk$E62p752N)XSA3Bs6cOfjb7GfXq4(ITT(L@;S>6*Jrt zYE1GwBwueJc0LiAong#0W-(dNQw_MXne4X51o)ZXcOq3f51osR9MHlF%V)-gTHE77 zZRVHBV6xv4@ol4)B&?)_V6=ea3BC^0>gaqX`6NLXFey+LGO185JS`WfB@w&|!xbU8 z)1sM?fmaj+jN~y|x|l9GuYU(Pg1#lN7w8u=*>q`i^?X`{{)9H*&2M{>hmS4kMFM_f zA>fE-3fnfuj`MZ8j4nUFSilvXQ3Nv*m?9>HNfl@@7DJzoHZaAkh$*&`bj9pF{1d!P zW>VUoo`;r7VSSoJG9^%D)IS3E)`xL0dI1N!1n7;MX&V<|`l}c{oJ%#c2zIduc2NVn zkO*@?Gb}Nd0i2T2GiA~B%kj*mOgYRchdH&gcW#Ztz^RA0hFNAt^Cd36k(n?7T`?gKdR1^O&mtXLqTBsho3{mNRgN=oP2MaHr%H?V_TSqMNHLf{`QD^Ll0JQJ`BF=n!NP()QOj2{$i_n-_*$q!K-hCesN&( z@^AloW#DfAk>J%Mp*LR+UjAt69ryIBFNWNMQFltDM$Uu|ejL0o5PIW-_U2to`}FTF20wnpOdHeAnC%R0XZG2Hr#!9p zJ?%UhljH9OFPxaX`no{CfxEvPo>@Kk%9Y@SOTk~Am^%A9+EmDWC3xj9(9V+;I(X^s zfy1E>Uz@)82AZUt{P^_rhra-Jm^-YyEiJ$<-PGlg$?^WF3!_u6pYtTb9`3&K`qb46 zcSkRwS(D?Jr{CTmI&>y@_$`5xnA;|6^WM%5i`8x`iPrQwU4f409M5YQ*^FR-eN!)9 z0@9{GITk$mrm&wm4Bc(9(k9F^E zq$fG`EMUEalQdY&c8+?g6K%4&os%?LY<5n(%GzU&*i5Xc!B-=YWxLNp?+|!a_J}1X z2nY7x&Wi;Nv4rRo*&|jEMn$wl&vK5BcxFK`n?pTt3e z#g;k9Hu)@2+z`A2ff)nM7x(eX1wti9#3ze}UCrGN~rVJiaN(ocky zX;Sr<>O_}*c#Eg@rn)*zkc#TD=e|tVxf9)+{K-|$hHu5t6_ydH8AF=}H+fQAoBo`# z*xA4(WVuxnx+I+@Xc=klyMqk}Vf88eU(_3%UZM~DRU?l5!mP^K^We2kICbFu0 zSyirvuQGE2S-B&1C+a*Y7c(|$NSMK!bcLoZI&#o9S zTQHGZ*z-9)1y%^t6O1RVw3&f{$p>|aR#i#K}sLVq{pH{3=z|ZgN zQR5E^T&8EDdNl(2OqPtwOcIp~qz%RL&+;i$FI6-w6@Rvbgwh<<_OM>m794@z@3jSF zj4gneUeNu0qK9mOs7(Uc=X`9vW8Qw0J(&Bkg+Cm&4PD(ipP-qy5VS0!EyaK_ZVnhU z9b!Q{gJqInZ7FF}KMY69&r<><)|TH3^;7C+g#tUoBqbn6%Eij68+?oAm5Ivvt1o@&(!(z;Ob3||GKr(&{}0^tv0mQ7+RMYKw&z65v%W5 z#77pHCKmCDMNDX7k=c8OO@mTTvFWHED=#Ttk43~|^~L6*S`I37Q0S#>9;z3hq7W6u zsDOx{upu`AYZ+D#HPCm+hd0f*O>8-IFGG;Og95Y%rTW3zL!~)f|7!hk;YjK6(uvGP zH!>FmlC?wigY^^1g}&s%iR4mWa;ZPL%&RK<8nn*;UYs+Kw&KM(WnOXq5zzi({1f#c z&QB~BpI)Vg>Zgm+q5Qoh36xmfx{t^D`Za#XKZtd@!YASI#XWGrLvVy&}e z799{eg8Pv40GYKAeWZhcJ3a&+fe7w649)%8V*S6!;C$YiAT{%i(WG$u`;a8fBeOKl z+4V_iG4jpWH1N#kkxW`VqdA@hy85JjGAl?P4Zq;Ei5*gh%pR3fSZO|4lwoO%>`I`E zR-BT6Pb7DcFl!!fjaCYfDBU!0TtYa=ErdR<>l`$(jKxMUx@d5tn-jzYKO6|28Pd_< zZ9^Ot8yMEZRc_uhGqYYd%SYEmqC+QgXb=<*ks|1W&Y|g14<@6YKlOUuoNx&bb|J4h z|1bTg{?dP%6Wdtx1G3;Q(&r||Sf6ti*?jI3<8V5$#4d z*Vfez;fT=1E(Jgqr4rd%R3HK8q=1?2u_1>SYXm5;QeaW?4;LGuVtWq?&;u$WUemw! zcFF=L70{$lX!3lTya`Q_Pg69ZDf4N{{F=oRni`*`W^9LF^SDzQh){b0$I4p%SQrd-IrmfdDbnAGuTOV>piJgFJU%9NrD`Q09D& zH~}j_*7I)}ed@mWz67u&30PJhMdN&XQ^)S@J7`?W(e1lA*9EiY#Fh-4L2T0Q zp8R>x<^1cFgP4ZMG+3oxH8$@5O|%u-FfxrthuUNInJ>)_o�%_G=>0-1my@pd-Y!<7{-fT59oCL9ilkIJ8?_zA6qzzJ;Tmp`GF>i6QkX;GW zQC@(ybzAJMoT$UbK8d>Iw1veHyS#aq4Q-aAOeVx5lZlgop)_~XoXlcFJ=oRdB;C#J zKhta2wTP%F$viSCvL7~%+zXbJ>E;(MLvT-_rIcn!A26&5R^;MrOfze4MRkCuvH#6U z*p3c6N18Y(TFz!e3IyG)*pIikn-Ee|Fz|2tZ@7eDakB|Z)$a%-BswKu$`f6MrzAJ! z3j%p1V-jCpjY~2VHyC$JJzg_jRt#Ylq zqfH-99o#qc{NVF$v!@cGaZj^PTQRo!ckAC91LHt=U(z?uK?OYh_so=|<6y~wl5({^stc=a{OwN`)jQ?50fHXDzHwM1H0I0rc6WMAq%Vjeb+ zv>$JWv_?+#Z7T6js&;7G;I<*tpvnE?v)e|uoi&Y`KG^PGxYD2cxKkO>=DF+KRcGr) z>)v|In`Usv-A>E~zNKf5$dAiMRL52Br#)hSdYNR`7 zwLW#|wcv$=QzP$3hEDYde{~kPcK6*^g0G$o4PS)tCo~3m;Maq%x~JixtA8MP^6Jzv z&-8EKhp-5qtET(+PaXd_c=jmdil?r80CT52ho%l4KvRDCCd!hWyc+!IDD;Q=FNco( zHu%b!$;+-U(vG*lBnvoO^P5QB#w?nv5LT(~9 zrfehBI4MsVa%=g_6v|w2au5Mfb)9@Xi?P~ShT)WR3_fpGWz0T!TeGp)66fRjk;g?= zhgu{|w077T5N8x}t8hJ{8aeeYOS9Ev>#(*Um3EH6nid=32h{RC9duVavyycH8Wh#r zWWe6Am?X)2A`L0IpHGnSf0q$t&VLeRexmF)q5ejeO=`Ybo=9rKMd@UcyU=a(G`M?4 z4fhFXhUbRCPl<0yK08TgLOV7 zB))Nbbq^uIGEHP0?O&ZlzCA z>8)OW9WXb=bg%F!R(K!Z?lnH^rFMoB6-l!G4S!1^^wmD9&`H75#I~=gM93CGav^8o z@UD@T<1OyiKj#<@HaQzyd9JMycOzw}cChwnom=5g)(3L)M!Ju8k2sDyJPl)c{@fbZ zx?3q3VL5mR-%x}m{gx&bjC5|ry?8m9RuDs58i);PG&-k@lo$GF8lz7s| z>c(6A8Ec&z@QMrH+c4VZZFttZaHluvr#Dq5IMj@+s5i3R({u5Kb1#hB{D#%nR{0ET z{pn9S*Mmnm2~^iofqXVKU*pDwwy(A#A4*8QtDt@);AS6D*mxPv3P#sO{=@9ssFF=Y7$QB zUL#3Jm0sn>o07&miH80Sa9sV3cTzGZQi^>k#op4#d?}ChZ@Mi}2jq$a-TS)_Jiq^W z*Pfg5!dvk8RPIjpbl+6hqO|Rz@yc8H@o9}exs-o=T68BVZAd?;A71BATIiJDO3ruJ zP83x83aZC8TwmxdsP-3Z_9s8>q`pdpbM*9%FZZXeb1H99YL|Ri>2kQMyzxcvwRx0Z zP|tbSc0&0TrS@v_-7DM%Z+z7kRCRa_NKWRr%izG^B}i@fN4SvCWWY-h9I`6W)dW6s zfj6mWLREZ2Rs0VSdPT{DZ_|NTOz{KT3eX3CEQo826a8_qa^p(rAD698fyRHNC>! 0 else None + filepos = matches[1] if len(matches) > 1 else None return idref, filepos - # 统一处理,提取前两个[]内容 - matches = re.findall(r'\[(.*?)\]', location) if location else [] - idref = matches[0] if len(matches) > 0 else None - filepos = matches[1] if len(matches) > 1 else None - return idref, filepos -def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None): - # 检查WAL模式相关文件 - base = db_path.rsplit('.', 1)[0] - wal_path = base + '.sqlite-wal' - shm_path = base + '.sqlite-shm' - for f in [db_path, wal_path, shm_path]: - if not os.path.exists(f): - print(f'警告: 缺少 {f},可能无法获取全部最新笔记') - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - if bookid is not None: - cursor.execute(''' - SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID - FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=? - ''', (bookid,)) - else: - cursor.execute(''' - SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID - FROM ZAEANNOTATION - ''') - rows = cursor.fetchall() - annotations = defaultdict(dict) - import datetime - for row in rows: - assetid, creationdate, location, note, selectedtext, uuid = row - # 转换 creationdate 格式,支持苹果时间戳(以2001-01-01为基准) - date_str = creationdate - if creationdate: - try: - origin = datetime.datetime(2001, 1, 1) - # 苹果时间戳 float/int 或数字字符串 - if isinstance(creationdate, (int, float)): - dt = origin + datetime.timedelta(seconds=creationdate) - elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit(): - dt = origin + datetime.timedelta(seconds=float(creationdate)) - else: - dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d") - date_str = f"{dt.year}/{dt.month}/{dt.day}" - except Exception: - date_str = str(creationdate) - idref, filepos = parse_location(location) - # 跳过note和selectedtext都为None的笔记 - if note is None and selectedtext is None: - continue - annotations[str(assetid)][uuid] = { - 'creationdate': date_str, - 'filepos': filepos, - 'idref': idref, - 'note': note, - 'selectedtext': selectedtext - } - conn.close() - if bookid is not None: - # 只返回特定bookid的笔记结构 - return {str(bookid): annotations.get(str(bookid), {})} - return annotations + def get_annotations(self, bookid=None): + # 检查WAL模式相关文件 + base = self.db_path.rsplit('.', 1)[0] + wal_path = base + '.sqlite-wal' + shm_path = base + '.sqlite-shm' + for f in [self.db_path, wal_path, shm_path]: + if not os.path.exists(f): + print(f'警告: 缺少 {f},可能无法获取全部最新笔记') + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + if bookid is not None: + cursor.execute(''' + SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID + FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=? + ''', (bookid,)) + else: + cursor.execute(''' + SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID + FROM ZAEANNOTATION + ''') + rows = cursor.fetchall() + annotations = defaultdict(dict) + import datetime + for row in rows: + assetid, creationdate, location, note, selectedtext, uuid = row + # 转换 creationdate 格式,支持苹果时间戳(以2001-01-01为基准) + date_str = creationdate + if creationdate: + try: + origin = datetime.datetime(2001, 1, 1) + if isinstance(creationdate, (int, float)): + dt = origin + datetime.timedelta(seconds=creationdate) + elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit(): + dt = origin + datetime.timedelta(seconds=float(creationdate)) + else: + dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d") + date_str = f"{dt.year}/{dt.month}/{dt.day}" + except Exception: + date_str = str(creationdate) + idref, filepos = self.parse_location(location) + if note is None and selectedtext is None: + continue + annotations[str(assetid)][uuid] = { + 'creationdate': date_str, + 'filepos': filepos, + 'idref': idref, + 'note': note, + 'selectedtext': selectedtext + } + conn.close() + if bookid is not None: + return {str(bookid): annotations.get(str(bookid), {})} + return annotations -# 用法示例:输出每本书的前3条笔记 if __name__ == "__main__": + manager = AnnotationManager() # 测试 parse_location - ''' test_locations = [ 'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8', 'epubcfi(/6/22[id15]!/4/156/1,:21,:157)', 'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)' ] for loc in test_locations: - idref, filepos = parse_location(loc) + idref, filepos = manager.parse_location(loc) print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n") - ''' # 测试只获取特定 assetid 的笔记 test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C" - annotations = get_annotations(bookid=test_bookid) - - # 格式化打印该书的所有笔记 + annotations = manager.get_annotations(bookid=test_bookid) from pprint import pprint print(f"\nAssetID={test_bookid} 的所有笔记:") pprint(annotations, indent=2, sort_dicts=False) - - # 输出每本书的前3条笔记 - ''' - book_notes = defaultdict(list) - for assetid, notes_dict in annotations.items(): - for uuid, ann in notes_dict.items(): - book_notes[assetid].append({**ann, 'uuid': uuid}) - for assetid, notes in book_notes.items(): - print(f"\nAssetID: {assetid}") - for i, note in enumerate(notes[:3]): - print(f" 笔记{i+1}:") - print(f" creationdate: {note['creationdate']}") - print(f" idref: {note['idref']}") - print(f" filepos: {note['filepos']}") - print(f" note: {note['note']}") - print(f" selectedtext: {note['selectedtext']}") - print(f" uuid: {note['uuid']}") - ''' diff --git a/booklist_parse.py b/booklist_parse.py index 28ca146..fb1c9ab 100644 --- a/booklist_parse.py +++ b/booklist_parse.py @@ -1,75 +1,66 @@ -""" -booklist_parse.py ------------------ -功能: - - 解析iBooks的Books.plist,提取所有书籍元数据(书名、作者、路径、时间等)。 - - 解析BKLibrary.sqlite,获取每本书的最近打开时间(苹果时间戳,基准2001-01-01)。 -依赖:config.py 统一管理路径和配置项。 - -主要接口: - - parse_books_plist(plist_path):返回所有书籍元数据,结构为{bk_id: {...}} - - get_books_last_open(db_path):返回所有书籍最近打开时间,结构为{bk_id: {'last_open': 时间戳}} - -依赖:plistlib, collections, sqlite3, os, datetime - -典型用法: - booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST) - books_open = get_books_last_open(config.LOCAL_LIBRARY_DB) -""" import config import plistlib -from collections import defaultdict - -def parse_books_plist(plist_path=config.LOCAL_BOOKS_PLIST): - booksinfo = defaultdict(dict) - with open(plist_path, 'rb') as f: plist_data = plistlib.load(f) - for book in plist_data.get('Books', []): - bk_id = book.get('BKGeneratedItemId') - if not bk_id: continue - booksinfo[bk_id] = { - 'displayname': book.get('BKDisplayName', ''), - 'author': book.get('artistName', ''), - 'type': book.get('BKBookType', ''), - 'bookid': bk_id, - 'itemname': book.get('itemName', ''), - 'path': book.get('path', ''), - 'date': book.get('BKInsertionDate',''), - 'updatedate': book.get('updateDate','') - } - return booksinfo import sqlite3 import os +from collections import defaultdict -def get_books_last_open(db_path=config.LOCAL_LIBRARY_DB): - """ - 从BKLibrary.sqlite获取书籍最近打开时间 - 返回:defaultdict(dict),bk_id为索引,包含最近打开时间 - """ - books_open = defaultdict(dict) - if not os.path.exists(db_path): +class BookListManager: + def __init__(self, plist_path=None, db_path=None): + self.plist_path = plist_path or config.LOCAL_BOOKS_PLIST + self.db_path = db_path or config.LOCAL_LIBRARY_DB + self._booksinfo = None + self._books_open = None + + def get_books_info(self): + if self._booksinfo is not None: + return self._booksinfo + booksinfo = defaultdict(dict) + with open(self.plist_path, 'rb') as f: + plist_data = plistlib.load(f) + for book in plist_data.get('Books', []): + bk_id = book.get('BKGeneratedItemId') + if not bk_id: + continue + booksinfo[bk_id] = { + 'displayname': book.get('BKDisplayName', ''), + 'author': book.get('artistName', ''), + 'type': book.get('BKBookType', ''), + 'bookid': bk_id, + 'itemname': book.get('itemName', ''), + 'path': book.get('path', ''), + 'date': book.get('BKInsertionDate',''), + 'updatedate': book.get('updateDate','') + } + self._booksinfo = booksinfo + return booksinfo + + def get_books_last_open(self): + if self._books_open is not None: + return self._books_open + books_open = defaultdict(dict) + if not os.path.exists(self.db_path): + return books_open + try: + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + cursor.execute(''' SELECT ZASSETID, zlastopendate FROM ZBKLIBRARYASSET WHERE zlastopendate IS NOT NULL ''') + rows = cursor.fetchall() + for row in rows: + asset_id, last_open = row + if asset_id: + books_open[asset_id] = { + 'last_open': last_open + } + conn.close() + except Exception as e: + print(f'警告: 读取BKLibrary.sqlite失败: {e}') + self._books_open = books_open return books_open - try: - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - # ZBKLIBRARYASSET表包含书籍信息 - cursor.execute(''' SELECT ZASSETID, zlastopendate FROM ZBKLIBRARYASSET WHERE zlastopendate IS NOT NULL ''') - rows = cursor.fetchall() - for row in rows: - asset_id, last_open = row - if asset_id: - books_open[asset_id] = { - 'last_open': last_open # 苹果时间戳,基准时间为2001-01-01 - } - conn.close() - except Exception as e: - print(f'警告: 读取BKLibrary.sqlite失败: {e}') - - return books_open - if __name__ == '__main__': - booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST) + manager = BookListManager() + booksinfo = manager.get_books_info() from pprint import pprint print("\n【前三条示例】") for k, v in list(booksinfo.items())[:3]: @@ -77,19 +68,10 @@ if __name__ == '__main__': pprint(v, sort_dicts=False, indent=2) print('-' * 60) - ''' - print("\n【全部内容】") - for k, v in booksinfo.items(): - print(f"{k}:") - pprint(v, sort_dicts=False, indent=2) - print('-' * 60) - ''' - # 测试最近打开时间 print("\n【最近打开时间示例】") - books_open = get_books_last_open() + books_open = manager.get_books_last_open() import datetime for k, v in list(books_open.items())[:3]: ts = v['last_open'] - # 苹果时间戳,基准2001-01-01 dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=ts) print(f"{k}: {dt} (timestamp: {ts})") \ No newline at end of file diff --git a/data/Books.plist b/data/Books.plist index fcb2e39f7d1f7a560d45d01a72201895262ccebb..4966ff0f257e1ce6461860b39d9060ff76413d5f 100644 GIT binary patch delta 17879 zcmaKzcUVi5n@C*V|-b-u4v6mpD)DDhxH>r>@9CUV@VYGZ0U4 zDKyfa?smgSJjUI_$jEXJLnEs^4UJ@547C@QwHn#}k2Q`t>&?k2j#zQL@>-RjYovRp zqG5#>DjAB8RyGvxscI zE}=e`yI#kdFS~Waqa!TzEk}>{>Cqf(u8#=7!sn5%a7AzW+%Ot(5BmpV1NEb4W8v?% z8d&H!XaciPTk(qUNdRCySVr8KIdk>kIwhj|9~@B5fhT& zma{>$Em=W)sUQ|Mn8mXMHiRXzWR}8)_O2N^ATc;@c&#BxQE}R-S<<{br^&79)(NAS zXS8kC(7Ij9epp98iCW&suj^wsc1*dAdvn9ID6DDyW*Qzg`QJ%c^ViuOux9yrXR%QE zz9km+SkMmJ+_>l!9`kw2a`EGXSHt0XY8zvhUu?6R~pqj zZr4w^+uH5fjl)s;Koi`W{s&#KaQxwLEWCB3Ar@9WT8M?!PV|-xc2MoK1J>R22RG6^ zaA7po?R-hat%|<-ab^0DD16xV)?UAZgRuX5a&6)d}&dclf zAiQ`9S@mHF$-QIcwc<$R`0q#`;ydSPa7Q8%+7Ehi<=^`~^WHPUu=D^!0m7-?a2u zBfbtxogE4tRcSNP_C?W5k>EnzsaDOIu4x~gFgUg-eUzPEATK zS|P2JR!h0knwZe;18amAb%_<@+-N-0PP@`|R-0{8`l+GmL_{TA!HKDGDNWy_z7f_k zMo|d#qCIgGmU`1>So+w9GAu1BNAV^p0xM8|tefZu(fxci5lj8)7Bp-`|7z41>&~h{ z*J5eA+SI|oLOogy>t=sJJ+X9eLyGqdaXNrL!jU}Q*hqS$3B^0IP=aU)RNLxuEhrL- zTCHe3gPA5$zM|K#6aCxLHrPgfd+Nn#DRH7BU5B<7F})kb`II;xLaQ3p>(-qbOvkJq zbg029DZF~qo7B<#@qCn{pOAz@U-}dKOm?b2UGceWQZ!xlxvW7f&HY^VU=ZE^xok%~ zJ!W^pE={CXZ9oe35_=PA4r9S}waY0SrCwrV3N4QVdLmHHWV+^y*bV@~|@4Sj)z(YEhsArcBYn>NDL zwVg{RVCmU;Mgc0LQJjy9vf0!Vhpp8@qsA31rZ_(pwR5PzKD=CRr2EAxTFUOo7?4Y0 zKOAsmc$M^1k!d1n9gSmRd@e0u+Vu@I5cPW1jr2XPywMh;0Ho#7a#(j&zR~)3UeGF7 z*Lf%X5=$d~rp{P8ayKl~=I`_g9*@<|(8YM%oI6Jq z?AoIXMi%a0q}8!*#VgR{R{x$bk>#c^Q!_nLHiZ+5sfVejDZTBxP>wk|s zty&4|bt-eM#Wi}_5xRp~L(K9$AaZjif zTgob+ld!b%bBa@F@$E|*`$6}UWL3~N^rgY+L#S`*G3>?7ck~Y$VK;tvKgRDq#P5OW zq3Mz7vFVAa!1T;i*w4kYec}jP>aZcU#9^tv{=T8M=;1@-hQ--ZA(`^)Weh8dH0I@g( zIy83p(xHh%Q->gj<_;}lB0R$X(NlOapbGm(uT(9mwOA?F7vn7Krej$fsRE~hs=>+q z1g?5=HRguL$ch?F!uf)z!yaMXt@RmB{l%IF?2O&GX#uR978t~QW$4{9E?RJJI!{Q= zSOY4y1+aX{FQ)60Sogm3`w2VAvNkq|S)F3K?6ghIxI9fyx|^RYzF`rwnzLl8EeK+f znyDqLiWcwYEm>VWyC!_aTH_X;Zp-i$ldyDP1F-IlPRw9V=XYVZ@O;Vb#=`B!$TO5# z`GE8c(KL)*rlMgeQ|bR7^5W0*c85dWjiKrGBGfUQSskK6r-T+lbX+s{KAkU;da^E5 z#D%jVFd`>hpUv>FI3K}cadrc*lBSion=vRWdy6ke4_>qS`7Qf zzh3`oIis1?IVSy7Y+{$#F`c0gIxHoc>wGCwJ2#Gbh(_a>6BT<#vm+EHYFkpPYtzZ= zBC6va89bgDoWaCI7KEcLe+`ol4f%UhV3k4A*3)5uL(-|=FnsftQMCH(UB4+yZc1JgoV$^D8a1dC#hT-c+;l18yo;z)1E_S^N*bH+trhA*w zIm481bHwqjY$g-on^_vG5Iwmt{)}yty%9DgpIO!DgtlE`#|MiV+gWucR^+p3%y;07 z^oT>YO+*Z8lJz>cBOX)ZcCu|~nP1&%4?{u`a)9~a1tH@QGe|#||H@R{H#>e~t?Wj@ z>tgl6u~ka z5o$|_t=Z0&kSuDMd3`3@6tR1bmY6Q{d%qf9w`(eHW`+|txZ7FpVkAB7#@(@JuiUx8 zE*4$J$ifUSUK(52=*@eX1O9!3Aj{1UK6dUS=-kKVC+4T-0`oKTbMp)H%i^_4&ybqT z)>3LihIF+Cw=&VZ3Rfh}qc%r(@gl4?H@FdVtjC|>eb?~|j_) zZOn&Y-6u_W2$psY;+?Q`Z3}L2)3&P>H@F(O)rR-R7P_|Q1}Ed}j@;n>?QCZlIB1A_ zcH;)iSi=y080(t5^LlvdbdKP+?M(-*QzW;VKTTA%oYgE#`1awQs2Ci{eW)cSa{BN( z!<2|xcn_M@m$%2R-0ja7VCkG_ZjjM$wQ++}UztJt9=6~V&-Y^K0qp`NyN@Cocj;{0M#u>vl}zC1lipF=M$2 zJ3M+kH@LRiH<1@$yL%_|%UHTTgAc}bD^250SlV(1-;JftGx;VgJvNKu>`D~Q;gzti z|2*!4r6E~HwUx`}2A2bD0sjtLSh9$ZQQFvBmz@4_tgUZ4PVb4unK>R-9F5 zlfF%-^>&`I7W&=YSEDiNJZ;e`URJzb$$hC7x`IFTj9z?vZ1vbTcv%iu&EMF~1Ujtc z))HYYlak`WeESL7sSG|()Lh3mQf<&$Uc=fuJuWRSaiC6&4bPUM^_=1cJ8d#rXv%Lf znpx?29KW;`bp&sN?Pl!Yaoo;MsF`+gt5&d=m)6(upru;f?QEXtzlYDJ!haXfp`Ou$ zCXU}Vyc`ac&px9arNsf>5xcSAAWz0p%ddO{mM-~?8(e_AI?4_1ntPq#2B(H?r}#W< zH{uL`iDyxtv)tf}vhF-j!xrjYcr3vH8{FV6Plen3Z=B`Tyw6`? zX`RO$pJ!J;eaf*5MbC_y*69UrjCJR~;sz%Mwl((6b zj$IfcOB=DYnn}8ed#!8<3BRcl4b0ML?ARV>$>2R}X*a1ZuC2SfWU$Ij_mB*BhD1+k zD6Z|Px3m=7U2K(bzPb9UuR$E2R5WUu>MxbSmDQ|joyT^A+DZnuxZdq0qpwIrkB*YT z)lEcaX{galr%PQWgXC#zh}0j)t$es-kcI~Kl#VJWLhk*fj~}Lenk+aDkbXt&TYW3q zAhUdKlYT_1V%s38yxlG7*}@ zH+0f8X+8F<=}gInbXRYgCE1P1f96PVVE@0jI&P$Coy>ht=ZRx;rA}14KS%2589n=P z-?vM$@z&OOo^%zD-YQwrW8{bM%a&v;4Ok%Ij|oMUMbbHJ*J+8QU<(y#*w^i(^!_+}()o8%`# z@}&T(W#vilJ)^@01t&yy!_nTfO)^Lf=Wd6Yj%L`=ozgxmUARj!NU`(wNUgB0ydNeA zvRn5TDFjDw?qMnE^LO}8ze(sjfwM=Y7p#;Sz981WyZ-RiwR`bb1(#1q7eBXf@OR1J z1T*gsnB>T>U(QJ(c87VLKP9UYH9OQ6QT*aEQ~Um+V)*kf~uLYGaNs}B=z1#0gNA1?T_>n%n!U`i}Ki(Xd+pD?$33mW{pGY`k>9<>jWR$!E*Um2ur>KiS}>ak#(ig#)~(s%+4m`>V_T zSod`;8D~i%pssAjQ3-7z-^WX7$40WjiQ;6Sd;zy&dlOg+P>ZLxkPQaf&#h&H%Z%8z zatE9Q?CC5&#ckUaEW2Xq?2pkZ9XtS_OUyb-bTw-S6gC0lryQRXBps_b916?j%Jx+uTB1e z3eOnXnfeYKw`zO=ysC)rT4G40neljpXAFe5X(;CJ;$(w6fbSDzgOg-oqTCHz2u_jh zzX)HFD#J^Rk+;7&6rx0S6Hmv=`BdymmFtzLFzCbD6xFkL6CwuI9h#2aJ~3W4xSa1l zNj5mf-1}N?i5(1_DktOa-#)`g*I}k?kVUdt@-u9KWy<62cHM&cvQ>h&w8gu_e6903 z*-dO%E1UE;@g@8;-%-4rc5<2QEnd%;o!sHRnoR4Ip3&EYH;Uia$*=T;?#L$jI37c* zev}PPPJeEbGqBT%JLSt%|A^#gc@$0!|JWyA!J~Zt0l5fEPaKrLbw%T8@Hu%X?t+IG zV8ukboiE7-AAf~hkqhiC^`iGR+2A^T!VNhJSMltYY;agDaaT6@gz5b~xf2dpuSarC zqjSvnPh^8Eb6SD?8xGZgLb(F=ZSD(s7?zfPEr(%g+F!E4Noe*vqlISAKeE9^$D8-^ zer)#$Q}C6Bm?kR*9X?-GI^*~}E1_8RuaaCGmHHBttzDHW`cNI>u8h>>w>^~BdRfg& zX{wilyp_>OT<9^CN`tPHal}h^ah1HZJ`tP4>Di8HCw~q3SUVc$u zsjBPuZKOQV2XgyBCAV0=iE>?+pJ}1=;a0ECUcO#kNRC$pudZI*^mNcGm@M=1_X_c< zLTy^tuapYf^$fL?HuNi{qc-8DQc@(eQ%W$Qwozi4cJV8vhV|nS<*JLDR}5IPdBu7S zBz|6lyyA+}%Xu9XU%kToE{eDAz^P!RwqDZiN*ldw*Hg*R%j1zsj_!DJU!}APMUdw7yEMz>R#s`yJ*OM+#HC6VMsf>_i4#emK9)nCAC@hitMS1!sMvn5s{O{lIco&>Uznt#Ih8OG&Ifs>#x_szw%3Hl0a!hg1|331h zaz+0gep>lf|9$hUGG6b(=!=TbOYdtR!fd{&4AcKU@s6@yfwI-qJC;;a_39?Ns)zKlou?|m{I;^{S^dxF%B#iRZ}U_4>oX|bU+t%R zbD@fQL;wAAHT5sO+*U(Xbos7Y>NWlM;yP*r{r8!AYD2v&*+4z4dwn`UeX0NbNMqHe z%WF4Pv-R>wka|~d?UvSRsu{{k!D>7Gj>Fo>8bETajjdl=o9KR9o04VLAnP)1YMA=} z-z)SFR~Zv4!_=Rs7S~g)3I;NJs>L5bCPb_1_hf)XBQ1t5elyx;%ThI#$;&lctu|%bKG;RK&-s&GiVjo}^ya z<^CD!-}-YEXQ+*IdE_j$g+8ShWvR<_yUl-4UG*|#k@{5EZ@E;>(|^BOt|sf{z}4z3 zUH*EFI!7<3uUEg-OOH)zkpB1Zt!nX_7?!Wz)cpz&>J;Uxt%S&jfK$M2;0f@C5Jdq> z0zN<$pgs@)L=vKw1*|}Opd&C$@ZIWe)qn6MLIz(3ZV(c`2-r`-NC+@#3i;!&^09M3A;CDi{3*b-SDj_=< z&{<6WO+Cwc-5E{DoiP9?-faj36LJsQa<3{-3($WLCgkBx;20r~;BSwd0T-Y&@GEeW zkjGHz;~fAr;qi6gjmSHumSYi333YA;v;yt{&j@wN0(Jnqf$P8nLR~X~AAr-qCEzk} zgHX48pn%X)2Y@@kLqgr1fVQIgNwt((=^UYn=K;`9`kByV2LS3xDG9g$^?@`IeNO$I z`{zz0G@~Eds}i%z!KK8vqS@ zzXCW6z!UGE5=P8g*X!!e3Z=3LE2R19aBDxuF*J3OGwxz&YR&VS&?u9|&vQ78nd90ds-ngat(aQNRLV z1+WrWOIULoFom!dEr1vx4)_+>AkN){aoQ_1kg(9kKoC$sSeOiK11j`>2y@6&xcQNv|T13M~+$C(pJ;Fx*1iU3I?HB+R zrM)C9-4lRFjGhR9m!tOpzXC^qA!8 z!j^>qYY1CD34rD=hvqMb%2t@QqzCGuUYq+7wz)q5A=v^RZ^JH zM*$ck$6pY3q6|<^9C)N=QUBY}vA3aPZ$ro40c&@lBkz6*^aY{;2*=$_G5(1fOe?b@ zXnYX_swkYW_YmRt?Ffhejpn2d&;aNOOct-7s?Df>l?cMC^aA=3UbQ8VO?b62z!G2u zaG3DwE+X=o+LHT6rV<`G3`i%ucMYHw@QUz0UVs&70rV6HUZ}ez{}sInU(pAMB7CJQ z;0K%r;DMFz2ww#ctg5T6{!0z2G=%WAi2&GN*91r*e0^g89$ycSug?H}6eZuP7V5w2 zOTu@7(k`fMw*=G#3JBj*3h)Hr?|WiI$9Jl=q<_3Sk>X1O7T^Mr5*V-#xCPuN(hxIH zSDU4n2K%QM5h?vWkw!_te~1Ljr!@L7@Bny9q%m$lkSM2`!m0n9LL$w14!j}K+;zZy zB4rK-z6TZodx3Lefyq>h`R6SsQr-$+4UvBA4~!;Kem$T&&|z4;0LO{!ngQej8-SC*L+zcjX>JAUNFrO)h+M8N(1pmp@SJZopf(T!B#BKf zrW@QprXrDJ{D7*!6C&HZfonvLD*^3* z&OkCSS{x{C8cHk7|Cz}1cM&-&21p{Z<^co(&42+wg6L7k6koFQWTJfa9k2}80PF)! z0Dlst?JeLw@Bny9ly>WZXGCfLEAS8~AW8=bs0`Er+6(83rutI%F+@q53SA%3c2@}}c6 ziITMv*a7?koCj_K;GiZ0Wq=AmRiGA78~B?j->bkz;1uu#fC{r+0Y3orvReS%0cb?_ z5+E1I1I_|hiSk1UqAW@zis)X=luOx`c=5ivshNrKCnD$zQ$4v_*m|OdZ2`6df~eh- Xftf@NZv(^uiNG9SqgW6K|A_NH+y=&U delta 17897 zcmai*cUTqI7U*}Ga?nE+3kNxf0*Z>gpi!~+MiE7@0S85pVu@hGUSl+ZPV8dC*iaF- znpmRO7;~+Ov6om9yC8Or`qrTLzC735^ZxjK^UXSI)?RzX&6z%4)3?nw9lfsg)n3G@~`$YA!Q(bR)eh5mQ)dT>Mp1hLj^! zh&yp6zQl>R5HoQkWr-_sBi8(AlZkoSQ^+iJf!b9VZoWrdk%jyuCkJLAp6*g?q&>^+ zrjdA@dpRQ`YswiKSzp1>$YP73_OgmrBisMBCNO8cIyof~D~VTr8=nP6x_?zMEbyYA zq4;=JL-BzCL-CFpca0*lDX`E`T>HL}czxZsM&cdy4Yjv4G!$=W{J_Y_+9rnDE59(T z&VuG|jEpP~HWY7YWmwA%Z46_Q)9yYkX|jKG@%$wnRv8u8rPIuhy8mq6o1NVy)aG)J z8(8ys&z`tVKl;TPoX(1I^t^D%CTCn^pLmikLGw{eD@2p-aeyEzW zQp(|Cwx*?GVc^JGSUB{{T}IWco6!*Kjv8AB3wMrpXU+b1jmR1{gmxfnNhK6j5<&U0PA3L>Y`k%N(x6F*en$|fpan~vMTEdz?&+m*ieX`GCq4I5O zEbP6sBeuD9`3v0d^H*=cj}KiRhabPXVHtjW=9X(l<8E}`Xl&r!_8La5-(+Vc7G$qY zc?0%VZ&TigSa@R|#GCM@{0rWkx8T9~-S#x6vdN8fXsLxPVA=V#g%vvR+zp{Ovu5wD zjXLnpd)FEHFk#>KxCuKR*pEX~_HZ*?vw=rkurTvz1Qy;t)(8s&ju&HL^;6N3!HTN= z=74qce&(Kh0Dh6Nit>;Kmz&;h?O%wbJgK(&sO$KBuhzIHxST zYvp?DRNR*JWAp5M_d5}^eoV;NQQiq8D)Vd7k5u~Rct{r_k~$4}eC>btfvmd^N~~TK zAKI(!X@2X%B$n^|@E*0q*P)#b{E%b)QewmP`bY{7zIE6 z-3rIl^}{G66bmWsj-|z%TCmhpp&hU^%|y*u`c)~~083AqX%%}7qnVwlRlDy(zZ1=! z=@uH65R%lReAvqmmj3A(^S+!KK4N}YY|_y9_ZegCGz3{At@t;#OiQ8Ca=lwoBowvU z(0T@=OQf}%5>uXw! zgo4hejd5`u7SM@UdM?{2K;<=xlXFpVF|B~Z)@GSe<%(8NoVJSEIaFXDp06>|ZLpq} zvD-%mZJ<{1WFswN`pQ~mzE)|doGpC5qo=7u+@NEls%4ysO4n{|p-oT+2-r$LU_Tmf zH;P4iKJ~`B>kEt~#S4N~!@ACU=@(cU`2%&v($V{=(f(2lKS)!scF`d^5K9Lfp*Ud? z;YaCotb6YmjCMPFBW?e2i1Ol-bONU)t<`3hrMa||Br*O3tw_D&A_ImE?H(W9C1!B< z(N9t8Ap(D;k8%I3ewMDl{pkF8s$l;fUNo|B?=r1{b*o&1dfUyQP2r{%dIoKZAQn?^ zQy){Lsjn%@)X&u4G(ZcvMX!pke^8nG_5IH#s7;&Li8rF1M?W<i#2h74#K-Zm=y8>Kl3j zdvWV6{r&$FzdN7ecMsxs-}J!r(Dcak*i>YCVk#~$vtJ!V>GzZ}t>_)~tTZ^JOK5;C z?}O9Gp@VwG9K*fiy#q5!Ld*>B*~EM&W-xnsyRruM_e>7npFDm}EDk;nl^v=$_&WGG zRCTE45FoxT%d8B#wd;GDEo!*4SnBhSmXFE!E9oZo`cQe+&F+W^urRCWR*_kpL{AHQ zNB!a=%SF7?b8{Q12FZRvNa_HA?M|uA+;D5Jt;r;u6NozOA=cedpW&2VeAkejwd=1I z#Jt7YrmR5H=h5;mT1Yg6Bc%yzNVQ!-tdehBkH_)u{TB`pb`oN3d@!?WTUxOw-AK#R zSvQ7ei{>p^D$K>*_u6JmyE-#WGqq*`XfAHqn$^YQWMVtk7T55%4$#`jf~7MXgr&2) zGK0}s(4F1JJ%2+_7GXC=6~drR%k^Sv$sXHxVAhQq+1j_^AS?=F{F=5lgieo0;n9p$ExDd(Wag4h4V|ZUH&JSSQ za5NUhu>M%eZR{zIhCG-}u@?=oI>E@8a}vX6HDX8#GuS;WNn-}NR@6w=#a_YM!7m|b zAsMWWK5C2HXqGlNowXF-k7kQ#z&{qFAuFobi+#>mX5}d^w@UWrV%K=~Emfe`c3wp^ z*M(A+R(?DyCq|EBPB3;e&cr8oj~~~~MmmsMJ(^G16j=ke&(KU}u=Ns?STK&cJQcce-Z!>od&ZD+y!hNLs*Fns!`ZJx_~Xp%TN7v?nd$lH0WGM3g{U^I_3n%A<>lB1HE2eljbr0iVtYR0sd)woARTV&O+ zL?Y&TMZ2RDNiimu8RY*Pzhn6LQFv}PngpD-G8eo41#JWW9`0c2Obpn@=CT@bz1ok> z8L51g6Fa3i@vJS+UZ0y@01uxNY-UNe>YJ+hw3f0(+g)q{ts0xup?iF0$l%!-kw4k; zh#1mr(aVr7xSLJb%XXo~Ah*o{hJ+&YF!RMT#cY-PLzzI zJ0C|;>?yX2ismQSb4UwqGos?d9jEq=N$hI0)Onm?;kdwYzcGV!$MGCfa78L#H0q-r zF0;Pa?zXE8@0>*bb!M;oLN{m-`_V#P~lut&cBnmJ-)Z{ITeo3d)}nYCaFzw0P^zGwBB zZ)BJ)DL$~HEh#mw`@-lKqw4k;fh#%9i5r~mY<4k{{^rKrv0pFTxxw-lTi(dRY!6-* zTiEK!`Wow(YvGk> zUVq*RyK-kBUy7yQ#BzhAe}|15Z1Ktu;dikGr$l}bORo;&cq^ctP2oOjl2ZUw%1^8N ziZ0Zm(s(7YJ(Y*x-r_o(pTp7@BlxF%=09Esio`GZ8LZnSotKi)EEPANo3O)UGr7T; z)sK^S5w?493cre_o2T)i*skA9?u4bSXY>77>O7a{Vd;r^9A{gi_#5trb^Wus7nX)D zGAhk`F*i5~U`zSe*uu)?e4Ju;a&mg-SX*C#oIVhXGjZ0BmbkOztT?OACavyzUMQm1 z^53bCzBH#tJI~%2yW-^d8u732WE_;sU)f#vJ8$GxZRciQQw-b4JFqI@ty5AGqcalH z6Osq(#M$tec)yubTrH zI~TmFBpK5V?&o!cw4V>8fkUHSPYfr4dLENlWo{1=*gY`fUF?uanfpe0h{aY<8F|ZN<{+Cg~P# zwTh)A{H{tgG)rT#V+WiigSV??-K4s>v|jF#!2~y}oMf;lBv+7zic!^6f2lk!EHJ>xF}E6$JJxMjOENea8d%4`F_BYGDmI~ZU;TyD9arr` zbLkFlpy$ETD=huHm1Hnn`?QrVV7tK`B!kmh&rXujXC$I`7s=pEC$gI~+~{S~l^&8o z-n1iB8i?cO6CoL7piTNn#}yPI_W{zUuhagSCpZq0en$1ny&Y?iR6eyyJCUl`HAM2U zyEr`=Dp^I{VNzE~-)I%Wi4I-Dvc#4|X)5)KNp~OfWo$y{i7AWv<27nwn$!~~i_b<$ z|F*l9T+EQHTJSjOym+4>wPI@QfWspdc=1$jykwB@kC$RNyc`e{rA;_BN}Db*T<6I% zrOnu{=5r+*(#_pIPqG`He}4ncp3joH=%=*J>uBGkuxv4Bfz%Z;ga5qCiJkwb|C^PI z@iNymTe^;WalB{xAzkV$~y|i3>S#lR|FG(iq7c)Q17CD$ig_Vz*L;4M<)T8#8 zLOdH(y&_F^MCI;9&GizzRq6jos!8j|g%?M~ z>xaro|M~hWvv)`;5w^VH87mj}s6C_^?hW2WlEF5lZLwr<%s2YEl!aT(J=a(f)d&_aOOoA;h(lRw_SPMw= z_m1+`8%LY%BR5AiJ?A4=#pRlPWrI`4QU0$WdM zcE!@G;qpaP3++gR>|-ib_8XW>%k+1XN?} zD7juKeVH=1)Xt26@2m!llq=F2aijm7^HZo2)pN+l87ihvbTc9b*BzdLD{v}PHaNE* zI9WE>+1#Bfx5lnEnIWg*-mrVNk*>pB*&wlG^W-Pk0?U#!?S^3KLI^>UCU0Of zkjLP3@b@3(Yq*0SIxN4((o;v|uUt_t8hTzHj>G-nB21J>x7!uj;G3_|YjTmj$z4SM zAsd{gPrNC|;3A&fmJRl*rS8ZEpDTU1D|f{K>-$g+G}^{|`&c$eEN2$Uzu-^}DwZo_ z-xfTRM`CH&mvT6kroWaAHbV2?8qF~W-pK~%9Irmehp^paOu;7_Vy3JZH26YQ>4xL; zq?7_*w3vm9qf%djYg<>Pn%-51xhtb}`Jd&Kw)(ZYhtgcX4)IjR>et&|ilcrl_Es+I z&zLr(xbdXm*1{vIJ7)jUGA9_^IMT26|Rpmpl31eS8E zdGYpAn2{!~(zaJqmWkONm4j4VZl^df>!-cRbr&~}IIw5)i1!#wd_9JEB$T9;*`1Y2 zda(uF6;IubGa*WC{YrZ&?e%NNKFT!xdNNAM(cMq&uas5cI^CxHpx?I{qSV#zN2Ms^ zbUVc(6n_caw4e7i38zt1ZJVBP=5=qqIipik~*JiSpH=0g9L7r+2w- zHD3i~n>3%HaxJZBqmm*XjaAlBA!R7-nI$%~Le$Jq%XJrL%XG^OFg(*T%Q9OxoSQ#h zk)hz7la(Ufp>@-gd$hFKvKGo)XE{i1V(|<`VPfqpr4ua|6Y^!$lq;5%mQ`RQ$Fkb8 zrX&jQXDh1yurgQKuGe<=d?iu8_svq=xYg6u(+&RFnB(d0S=O_h{(xtBvf8tPr-!G7 z+O+URP@AZ9WwUTxq734STVTtP+f7D5m8xKR(Q;*_UR0wT zWuSi5)++sFXS3%dusqpwisw{n^PDE6jmiW5pGQUE7G)fFz2-T@b0v5))N_^R#}AOF zY*$X}Ho^;(ApJUbxAHID{m}1~OZqi%zcNXe=l!TW(PNc!R5`87hyS9y(XYc!C=UAX zqfaZ>^!te4l&|#rTj!Kay-{N?D?-0|{_!!!=3B~0{r5?KDVr6z)_SCz)qm&3%6t8J z_WWa~nfXfjMb``aTiM8+&0fpMYOm>D%e`iLt)R97vwD$>$6P%^EB^Ph0BB&ZiC%dm z$7{0JHm~g^jq{VNw%5IOEUk{vOPl1X{-j?!R!{|4-&IjPr~iE3M=f!Fm#=zAA6Oaw z>HyuDi`CSd`uz{p)z|uUS4~yX<@;)>f9Urs>ZlF%`?K}bM*6jML-nZc^lw4xbN%;Y zO;wvNuiac-tY41>t9SIuZf~oOFvHa^MD55e*45U=)*zB&ZEF3(+D!M`+MKMm21Eat z5w2Rr)(G``DmI6!?Em(Jgg$Bj7|80QmVEx06s4Ba3!c(n{ar68au5`3$FR5%t6Fr$ zr#5w#{^0lowOIE!HBsH6-&aXdr|6#Mj!>WI^2MXn@w$HbbhWI04IJ~aAU~w%S;iN6k}P>0^BPB6YQHx8)MmRlkNVR||Fh)~nQf{r<%oHC4Y3 z&Q<5>@|WMK-{{v_o7J!MYq>l%SpPj@hgvdKh8L)}biaaxI$deEju3ed@Gsy3a07S% zye32`4R`_7fcn5QLe%mA=&6?oG0A`mz{`#O>VDOKSO-Febp*N*l57G3fZK$mfKrMX zs15WKcaFecf+|lQL&)T@giNUkv;v9=nOYXG0L_6=(c`G<#QnudLc}THEFrtW)jg$v zLPGYs0pQNwroc2YwE7fLhc;}juUc!60nSr2O+>nAOq0v3xK`A2|^w!gcSY& z>;n!FQZx#fPso!tKs=BLd=0D<`6pCw7Wo^Y&S!y(gt|-sW)SLH6X*ei1EYb7Ko+nB z0DU*mbISy#5?ZD=uz*l^8!!`?3v2`Ki5jQXGOFJ}LX&?4ej+qw9Z*bY>KWh}@S4!H ziohTddtUvO`)^7ibWa6v>}Ry(uGr~49mI*QOU3~&W@ z1DAn62>q1;2Q8joWBMAv~M^R9^L|I13V+FmjkdDxC-1P zEJ6k>fEQ2)NFc2DIp911j`W!aY#=NWf)Y6afS^P|P$Kt<(SNG%X+(w-VHwT<7#rgQ zKp|tx0~LW_ARU+lOa~4SHhv~B3;3F_35|iaz$3ykU4XIx)Hbu7`1c)k0`p(HnXt86 zfE|Rbn*%^0>!FJ4p^EEo5tgd}?X;Bp>Q8-loFr_=Dc~$&`FQ|1vJ>ui76UH{D}Y)S zKrIU500?`*TEceM1OkD2z;nX(Ky&Q@yL-Xz-qJuVptqR!Ky{-2ryx|P;D=N2!>L06 zgz$7WAb=l$>%bdv_@TOp`rm^R@4W)v5q5t)kWbiyXdn}q3giMiMdo8QgjVI|T7;Wx z0}X)Jggg2J4+wX10?Gi5fI;GAq1uA_*KALC%??0U!UN%BO+xe3JOibP=f*Y9X$<-qU5;ejG!~=;$?d=P+0^Sg{j~7rGXa~fIrA^>( GIR6Lc-MJ+I diff --git a/detaildesign.md b/detaildesign.md index 92acacf..5dc875e 100644 --- a/detaildesign.md +++ b/detaildesign.md @@ -158,31 +158,43 @@ answer = inquirer.fuzzy( ## 9.1 主要代码文件说明(细化) + - `exportbooknotes.py` + - 采用 OOP 设计,核心类为 `BookNotesExporter`: + - `build_booksnote(bookid=None)`:构建结构化笔记数据。 + - `export_booksnote_to_md(booksnote, booksinfo, out_path=None)`:导出为 Markdown。 + - `find_file_by_ext`、`get_toc_tree` 等辅助方法。 - 数据同步:自动复制 iBooks 数据库和元数据到本地。 - 菜单交互:按最近打开时间戳排序,显示“书名 [时间戳]”,支持模糊搜索。 - 只处理用户选中书籍的笔记,按章节分组导出 Markdown。 - 依赖核心解析模块,负责主流程调度。 - `annotationdata.py` + - OOP 设计,核心类为 `AnnotationManager`: + - `get_annotations(bookid=None)`:返回所有或指定 assetid 的笔记。 + - `parse_location(location)`:静态方法,解析定位信息。 - 解析 AEAnnotation.sqlite,提取所有或指定 assetid 的笔记。 - 支持苹果时间戳转换,结构化输出。 - - parse_location 辅助函数,统一解析笔记定位信息。 - `booklist_parse.py` + - OOP 设计,核心类为 `BookListManager`: + - `get_books_info()`:获取书籍元数据。 + - `get_books_last_open()`:获取每本书的最近打开时间。 - 解析 Books.plist,获取书籍元数据(书名、作者、路径、时间等)。 - - 解析 BKLibrary.sqlite,获取每本书的最近打开时间(zlastopendate,苹果时间戳)。 - - 提供统一数据接口,便于主流程排序和展示。 + - 解析 BKLibrary.sqlite,获取每本书的最近打开时间。 - `opf_parse.py` + - OOP 设计,核心类为 `OPFParser`: + - `parse_opf(filepath)`:静态方法,返回 id->href 映射。 - 解析 epub 的 OPF 文件,获取章节与文件映射关系(idref -> href)。 - - 支持多种 epub 目录结构。 - `toc_parse.py` + - OOP 设计,核心类为 `TOCParser`: + - `parse_navpoints(navpoints)`:递归解析 navPoint 节点。 + - `find_label_path(node, ref, filepos, path)`:查找章节路径。 + - `find_section_by_selectedtext(html_path, selectedtext)`:通过选中文本定位章节标题。 + - `parse_html_title(html_path)`:解析 html 文件标题。 - 解析 NCX 目录文件,递归构建章节树结构。 - - find_label_path:支持通过 ref 和 filepos 查找完整 label 路径。 - - find_section_by_selectedtext:通过选中文本在 html 文件中定位章节标题。 - - parse_html_title:解析 html 文件标题。 - `backup/booksnote.py` - 历史/备份脚本,辅助数据迁移或格式转换。 diff --git a/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md b/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md index d414ade..8a33fc7 100644 --- a/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md +++ b/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md @@ -1,4 +1,4 @@ -# 笔记导出 2025-08-15 13:25 +# 笔记导出 2025-08-15 17:20 ## 传统十论 diff --git a/exportbooknotes.py b/exportbooknotes.py index e9f8dd3..90ad1b8 100644 --- a/exportbooknotes.py +++ b/exportbooknotes.py @@ -1,31 +1,17 @@ """ -exportbooknotes.py ------------------- +exportbooknotes.py (OOP版) +------------------------- 功能: - 自动同步iBooks数据库和元数据文件到本地data目录。 - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite,构建结构化笔记数据。 - 解析epub目录和章节信息,定位每条笔记所属章节。 - 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。 - 仅导出选中书籍的所有笔记,按章节分组,生成Markdown文件。 - 依赖:config.py 统一管理路径和配置项。 - -主要数据流: - 1. 数据同步到data目录 - 2. 解析Books.plist获取书籍元数据 - 3. 解析BKLibrary.sqlite获取最近打开时间 - 4. 菜单排序与显示(书名+时间戳) - 5. 解析AEAnnotation.sqlite获取笔记 - 6. 解析epub目录,定位章节 - 7. 导出Markdown文件 - -依赖:Python 3, InquirerPy, bs4, shutil, os, datetime, sqlite3 - -主要数据流: - -典型用法: - python exportbooknotes.py - # 按提示选择书籍,自动导出笔记到export_notes目录 +主要接口:BookNotesExporter + - run():命令行交互式导出主流程 + - build_booksnote(bookid=None):构建结构化笔记数据 + - export_booksnote_to_md(booksnote, booksinfo, out_path=None):导出为Markdown """ import config """ @@ -40,117 +26,113 @@ booksnote = { }}} } """ -from collections import defaultdict import os -from annotationdata import get_annotations -from booklist_parse import parse_books_plist +from collections import defaultdict +from annotationdata import AnnotationManager +from booklist_parse import BookListManager from opf_parse import parse_opf -from toc_parse import parse_navpoints, find_label_path +from toc_parse import TOCParser from bs4 import BeautifulSoup -from pprint import pprint -def find_file_by_ext(root, exts): - """在root下递归查找第一个指定后缀的文件""" - for dirpath, _, files in os.walk(root): - for f in files: - for ext in exts: - if f.lower().endswith(ext): - return os.path.join(dirpath, f) - return None -def get_toc_tree(toc_path): - with open(toc_path, 'r', encoding='utf-8') as f: - soup = BeautifulSoup(f, 'xml') - nav_map = soup.find('navMap') +class BookNotesExporter: + def __init__(self, config_module=config): + self.config = config_module + self.annotation_db = config_module.LOCAL_ANNOTATION_DB + self.books_plist = config_module.LOCAL_BOOKS_PLIST + self.library_db = config_module.LOCAL_LIBRARY_DB - nav_points = nav_map.find_all('navPoint', recursive=False) - toc_tree = parse_navpoints(nav_points) - #pprint(toc_tree, indent=2, depth=5) - return toc_tree + @staticmethod + def find_file_by_ext(root, exts): + for dirpath, _, files in os.walk(root): + for f in files: + for ext in exts: + if f.lower().endswith(ext): + return os.path.join(dirpath, f) + return None -def build_booksnote(annotation_db=config.LOCAL_ANNOTATION_DB, books_plist=config.LOCAL_BOOKS_PLIST, bookid=None): - # 支持只处理特定 assetid 的笔记 - annotations = get_annotations(annotation_db, bookid=bookid) - booksinfo = parse_books_plist(books_plist) - booksnote = defaultdict(lambda: defaultdict(dict)) - for assetid, notes in annotations.items(): - # 获取epub路径 - bookinfo = booksinfo.get(assetid) - if not bookinfo: - continue - epub_path = bookinfo.get('path') - if not epub_path or not os.path.isdir(epub_path): - continue - # 查找opf和ncx - opf_path = find_file_by_ext(epub_path, ['.opf']) - ncx_path = find_file_by_ext(epub_path, ['.ncx']) - if not opf_path or not ncx_path: - continue - id2href = parse_opf(opf_path) - toc_tree = get_toc_tree(ncx_path) - for uuid, ann in notes.items(): - idref = ann['idref'] - filepos = ann['filepos'] - href = id2href.get(idref, idref) - chapter = find_label_path(toc_tree, href, filepos) - if chapter is None: - # 直接从html文件获取章节信息 - html_path = os.path.join(epub_path, href.split('#')[0]) - selectedtext = ann.get('selectedtext') - if os.path.exists(html_path) and selectedtext: - from toc_parse import find_section_by_selectedtext - section = find_section_by_selectedtext(html_path, selectedtext) - if section: - chapter = section + @staticmethod + def get_toc_tree(toc_path): + with open(toc_path, 'r', encoding='utf-8') as f: + soup = BeautifulSoup(f, 'xml') + nav_map = soup.find('navMap') + nav_points = nav_map.find_all('navPoint', recursive=False) + toc_tree = TOCParser.parse_navpoints(nav_points) + return toc_tree + + def build_booksnote(self, bookid=None): + manager = AnnotationManager(self.annotation_db) + annotations = manager.get_annotations(bookid=bookid) + bl_manager = BookListManager(plist_path=self.books_plist) + booksinfo = bl_manager.get_books_info() + booksnote = defaultdict(lambda: defaultdict(dict)) + for assetid, notes in annotations.items(): + bookinfo = booksinfo.get(assetid) + if not bookinfo: + continue + epub_path = bookinfo.get('path') + if not epub_path or not os.path.isdir(epub_path): + continue + opf_path = self.find_file_by_ext(epub_path, ['.opf']) + ncx_path = self.find_file_by_ext(epub_path, ['.ncx']) + if not opf_path or not ncx_path: + continue + id2href = parse_opf(opf_path) + toc_tree = self.get_toc_tree(ncx_path) + for uuid, ann in notes.items(): + idref = ann['idref'] + filepos = ann['filepos'] + href = id2href.get(idref, idref) + chapter = TOCParser.find_label_path(toc_tree, href, filepos) + if chapter is None: + html_path = os.path.join(epub_path, href.split('#')[0]) + selectedtext = ann.get('selectedtext') + if os.path.exists(html_path) and selectedtext: + section = TOCParser.find_section_by_selectedtext(html_path, selectedtext) + if section: + chapter = section + else: + chapter = "(未找到章节)" else: chapter = "(未找到章节)" - else: - chapter = "(未找到章节)" - booksnote[assetid][chapter][uuid] = { - 'creationdate': ann['creationdate'], - 'filepos': filepos, - 'idref': href, - 'note': ann['note'], - 'selectedtext': ann['selectedtext'] - } - return booksnote + booksnote[assetid][chapter][uuid] = { + 'creationdate': ann['creationdate'], + 'filepos': filepos, + 'idref': href, + 'note': ann['note'], + 'selectedtext': ann['selectedtext'] + } + return booksnote -import datetime - -def export_booksnote_to_md(booksnote, booksinfo, out_path=None): - """ - 依据booksnote结构导出markdown文件,格式: - # “笔记导出”+导出时间 - ## 书名 - ### chapter - selectedtext - > note (如果存在) - """ - now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') - lines = [f'# 笔记导出 {now}\n'] - for assetid, chapters in booksnote.items(): - bookname = booksinfo.get(assetid, {}).get('itemname', assetid) - lines.append(f'\n## {bookname}\n') - for chapter, notes in chapters.items(): - lines.append(f'### {chapter}') - for uuid, ann in notes.items(): - sel = ann.get('selectedtext') - note = ann.get('note') - if sel: - lines.append(sel) - if note: - lines.append(f'> {note}') - lines.append('') - md = '\n'.join(lines) - if out_path: - with open(out_path, 'w', encoding='utf-8') as f: - f.write(md) - return md + def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None): + import datetime + now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + lines = [f'# 笔记导出 {now}\n'] + for assetid, chapters in booksnote.items(): + bookname = booksinfo.get(assetid, {}).get('itemname', assetid) + lines.append(f'\n## {bookname}\n') + for chapter, notes in chapters.items(): + lines.append(f'### {chapter}') + for uuid, ann in notes.items(): + sel = ann.get('selectedtext') + note = ann.get('note') + if sel: + lines.append(sel) + if note: + lines.append(f'> {note}') + lines.append('') + md = '\n'.join(lines) + if out_path: + with open(out_path, 'w', encoding='utf-8') as f: + f.write(md) + return md if __name__ == '__main__': import shutil import os.path + from InquirerPy import inquirer # type: ignore + exporter = BookNotesExporter(config) # 自动覆盖 ./data 下的数据库和plist文件,源为iBooks真实路径 src_files = [ (config.IBOOKS_ANNOTATION_DB, config.LOCAL_ANNOTATION_DB), @@ -166,31 +148,19 @@ if __name__ == '__main__': else: print(f'file not found: {src} ') - from booklist_parse import parse_books_plist - from InquirerPy import inquirer # type: ignore - # 先获取所有书籍元数据 - booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST) - - # 构建书名列表(优先displayname, 其次itemname, 否则assetid),按parse_books_plist中的date字段排序 + manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB) + booksinfo = manager.get_books_info() assetid2name = {} assetid2lastopen = {} - from booklist_parse import get_books_last_open - - # 获取所有书籍的最后打开时间(字典,值为{'last_open': 时间戳}) - last_open_times = get_books_last_open(config.LOCAL_LIBRARY_DB) - + last_open_times = manager.get_books_last_open() for assetid, info in booksinfo.items(): name = info.get('displayname') or info.get('itemname') or assetid - # 如果书名中包含“-”,只取“-”前面的部分 if '-' in name: name = name.split('-', 1)[0].strip() assetid2name[assetid] = name - # 用 get_books_last_open 返回的时间戳排序,如无则为0 ts = last_open_times.get(assetid, {}).get('last_open', 0) assetid2lastopen[assetid] = ts - - # 按last_open时间戳降序排列 sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True) choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids] if not choices: @@ -202,8 +172,6 @@ if __name__ == '__main__': multiselect=False, instruction="上下键选择,输入可模糊筛选,回车确定" ).execute() - - # 解析选中assetid for aid, name in assetid2name.items(): if answer.startswith(name): selected_assetid = aid @@ -211,10 +179,8 @@ if __name__ == '__main__': else: print("未找到选中书籍") exit(1) - - # 只导出选中书的笔记 - selected_booksnote = build_booksnote(bookid=selected_assetid) + selected_booksnote = exporter.build_booksnote(bookid=selected_assetid) selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})} out_path = f'export_notes/notes_export_{selected_assetid}.md' - export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path) + exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path) print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}') diff --git a/opf_parse.py b/opf_parse.py index b711311..08aece5 100644 --- a/opf_parse.py +++ b/opf_parse.py @@ -1,38 +1,46 @@ - -# parseopf.py -# ----------------------------- -# 用于解析EPUB电子书的OPF文件,提取manifest部分所有id对应的html文件href。 -# 支持批量测试和通过id快速查找href。 -# 依赖:BeautifulSoup4 -# ----------------------------- - -from collections import defaultdict -from bs4 import BeautifulSoup -import pprint - - def parse_opf(filepath): """ - 解析OPF文件,返回{id: href}的defaultdict(dict)结构。 - 仅保留href以.html结尾的项。 - - 参数: - filepath (str): OPF文件路径 - 返回: - defaultdict(dict): id到href的映射(仅html文件) + 兼容旧代码的顶层函数,实际调用 OPFParser.parse_opf。 """ - result = defaultdict(dict) - with open(filepath, 'r', encoding='utf-8') as f: - soup = BeautifulSoup(f, 'xml') - # 查找manifest部分,遍历所有item,筛选html结尾的href - manifest = soup.find('manifest') - if manifest: - for item in manifest.find_all('item'): - id_ = item.get('id') - href = item.get('href') - if id_ and href and href.strip().lower().endswith('html'): - result[id_] = href - return result + return OPFParser.parse_opf(filepath) + +""" +opf_parse.py (OOP版) +------------------- +功能: + - 解析EPUB电子书的OPF文件,提取manifest部分所有id对应的html文件href。 + - 支持通过id快速查找href。 + - 支持批量测试。 +依赖:BeautifulSoup4 +主要接口:OPFParser + - parse_opf(filepath):静态方法,返回id->href映射(仅html文件)。 +""" +from collections import defaultdict +from bs4 import BeautifulSoup + +class OPFParser: + @staticmethod + def parse_opf(filepath): + """ + 解析OPF文件,返回{id: href}的defaultdict(dict)结构。 + 仅保留href以.html结尾的项。 + 参数: + filepath (str): OPF文件路径 + 返回: + defaultdict(dict): id到href的映射(仅html文件) + """ + result = defaultdict(dict) + with open(filepath, 'r', encoding='utf-8') as f: + soup = BeautifulSoup(f, 'xml') + manifest = soup.find('manifest') + if manifest: + for item in manifest.find_all('item'): + id_ = item.get('id') + href = item.get('href') + if id_ and href and href.strip().lower().endswith('html'): + result[id_] = href + return result + if __name__ == "__main__": test_files = [ @@ -44,8 +52,7 @@ if __name__ == "__main__": for file in test_files: print(f"\n==== 测试文件: {file} ====") try: - result = parse_opf(file) - pprint.pprint(result, indent=2, width=120, sort_dicts=False) + result = OPFParser.parse_opf(file) # 增加通过id快速打印href的测试 test_ids = list(result.keys())[:3] # 取前三个id做演示 diff --git a/toc_parse.py b/toc_parse.py index 426e619..c1a7a55 100644 --- a/toc_parse.py +++ b/toc_parse.py @@ -1,6 +1,7 @@ + """ -toc_parse.py ------------- +toc_parse.py (OOP版) +------------------- 功能: - 解析EPUB电子书的toc.ncx目录文件,递归构建章节树结构。 - 支持通过ref和filepos查找完整label路径。 @@ -8,166 +9,120 @@ toc_parse.py - 兼容多种EPUB格式,支持批量测试。 依赖:config.py 统一管理路径和配置项。 -主要接口: - parse_navpoints(navpoints) # 递归解析navPoint节点,返回章节树结构。 - find_label_path(node, ref, filepos, path) # 查找指定ref和filepos的章节label路径。 - find_section_by_selectedtext(html_path, selectedtext) # 通过选中文本定位章节标题。 - parse_html_title(html_path) # 解析html文件标题。 +主要接口:TOCParser + - parse_navpoints(navpoints):递归解析navPoint节点,返回章节树结构。 + - find_label_path(node, ref, filepos, path):查找指定ref和filepos的章节label路径。 + - find_section_by_selectedtext(html_path, selectedtext):通过选中文本定位章节标题。 + - parse_html_title(html_path):解析html文件标题。 依赖:BeautifulSoup4, pprint, os, typing """ import config - - from bs4 import BeautifulSoup -from typing import Dict, Optional, List, Any -import pprint +import os -# ==== 辅助函数:根据selectedtext在html文件中的位置推断所在章节 ==== -def find_section_by_selectedtext(html_path, selectedtext): - """ - 在html文件中查找selectedtext出现的位置,向上回溯最近的h1-h6标题,返回该标题文本。 - 若未找到标题,则返回None。 - """ - try: - with open(html_path, 'r', encoding='utf-8') as f: - soup = BeautifulSoup(f, 'html.parser') - # 在所有文本节点中查找selectedtext - for elem in soup.find_all(string=True): - if selectedtext and selectedtext.strip() and selectedtext.strip() in elem: - # 回溯父节点,查找最近的h1-h6 - parent = elem.parent - while parent: - prev = parent.previous_sibling - # 向上查找同级前面的h1-h6 - while prev: - if prev.name and prev.name.lower() in ['h1','h2','h3','h4','h5','h6']: - return prev.get_text(strip=True) - prev = prev.previous_sibling - parent = parent.parent - # 若未找到,尝试全局第一个h1-h6 - for tag in ['h1','h2','h3','h4','h5','h6']: - h = soup.find(tag) - if h and h.get_text(strip=True): - return h.get_text(strip=True) - except Exception: +class TOCParser: + def __init__(self): pass - return None -def parse_html_title(html_path): - """ - 解析html文件,优先返回,否则返回body第一个h1/h2/h3/h4/h5/h6或None。 - """ - try: - with open(html_path, 'r', encoding='utf-8') as f: - soup = BeautifulSoup(f, 'html.parser') - # 优先<title> - if soup.title and soup.title.string: - return soup.title.string.strip() - # 其次正文第一个h1-h6 - for tag in ['h1','h2','h3','h4','h5','h6']: - h = soup.find(tag) - if h and h.get_text(strip=True): - return h.get_text(strip=True) - except Exception: - pass - return None + @staticmethod + def find_section_by_selectedtext(html_path, selectedtext): + try: + with open(html_path, 'r', encoding='utf-8') as f: + soup = BeautifulSoup(f, 'html.parser') + for elem in soup.find_all(string=True): + if selectedtext and selectedtext.strip() and selectedtext.strip() in elem: + parent = elem.parent + while parent: + prev = parent.previous_sibling + while prev: + if prev.name and prev.name.lower() in ['h1','h2','h3','h4','h5','h6']: + return prev.get_text(strip=True) + prev = prev.previous_sibling + parent = parent.parent + for tag in ['h1','h2','h3','h4','h5','h6']: + h = soup.find(tag) + if h and h.get_text(strip=True): + return h.get_text(strip=True) + except Exception: + pass + return None -def parse_navpoints(navpoints) -> Dict[str, dict]: - """ - 递归解析 navpoints 节点,返回嵌套 dict 结构。 - :param navpoints: BeautifulSoup 查找到的 navPoint 节点列表 - :return: 章节树结构 - """ - result = {} - for navpoint in navpoints: - label = navpoint.navLabel.text.strip().strip('"“”') - src = navpoint.content["src"] - if "#" in src: - ref, filepos = src.split("#", 1) - else: - ref, filepos = src, None - entry = { - "label": label, - "ref": ref, - "filepos": filepos, - "children": parse_navpoints(navpoint.find_all("navPoint", recursive=False)) - } - result[navpoint.get("id")] = entry + @staticmethod + def parse_html_title(html_path): + try: + with open(html_path, 'r', encoding='utf-8') as f: + soup = BeautifulSoup(f, 'html.parser') + if soup.title and soup.title.string: + return soup.title.string.strip() + for tag in ['h1','h2','h3','h4','h5','h6']: + h = soup.find(tag) + if h and h.get_text(strip=True): + return h.get_text(strip=True) + except Exception: + pass + return None - #pprint.pprint(result) # 格式化打印result + @staticmethod + def parse_navpoints(navpoints): + result = {} + for navpoint in navpoints: + label = navpoint.navLabel.text.strip().strip('"“”') + src = navpoint.content["src"] + if "#" in src: + ref, filepos = src.split("#", 1) + else: + ref, filepos = src, None + entry = { + "label": label, + "ref": ref, + "filepos": filepos, + "children": TOCParser.parse_navpoints(navpoint.find_all("navPoint", recursive=False)) + } + result[navpoint.get("id")] = entry + return result - return result - -def find_label_path( - node: Any, - ref: str, - filepos: Optional[str] = None, - path: Optional[List[str]] = None -) -> Optional[str]: - """ - 在嵌套 dict 结构中查找指定 ref 和 filepos 的 label 路径。 - :param node: 当前节点(dict 或 dict集合) - :param ref: html文件名 - :param filepos: 文件位置,可为 None - :param path: label 路径累积 - :return: 以 / 分隔的完整 label 路径,未找到返回 None - """ - if path is None: - path = [] - if isinstance(node, dict): - nodes = node.values() if "label" not in node else [node] - # 1. 优先精确匹配ref和filepos - for v in nodes: - if "label" in v: - new_path = path + [v["label"]] - if v["ref"] == ref and (filepos is None or v["filepos"] == filepos): - title = " / ".join(new_path) - #print(f'title ref={ref} filepos={filepos} -> {title}') #DBG - return title - title = find_label_path(v["children"], ref, filepos, new_path) - if title: - #print(f'title1 ref={ref} filepos={filepos} -> {title}') #DBG - return title - - # 2. 如果带filepos查找失败,回退到同ref下第一个章节(即只要ref匹配就返回) - if filepos is not None: + @staticmethod + def find_label_path(node, ref, filepos=None, path=None): + if path is None: + path = [] + if isinstance(node, dict): + nodes = node.values() if "label" not in node else [node] for v in nodes: if "label" in v: new_path = path + [v["label"]] - # print(f"对比 {v['ref']} == {ref}") - if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]: + if v["ref"] == ref and (filepos is None or v["filepos"] == filepos): title = " / ".join(new_path) - #print(f'title3 ref={ref} filepos={filepos} -> {title}') #DBG return title - title = find_label_path(v["children"], ref, None, new_path) - if title: - #print(f'title4 ref={ref} filepos={filepos} -> {title}') #DBG - return title - - # 3. 若完全未找到,尝试直接解析idref所指html文件标题,获取章节label信息 - # 仅在顶层调用时执行此逻辑 - if path == [] and ref and ref.endswith('.html'): - import os - # 自动在常见目录下查找html文件(以toc文件目录为基准) - caller_dir = os.path.dirname(os.path.abspath(__file__)) - search_dirs = [caller_dir, os.getcwd()] - for d in search_dirs: - html_path = os.path.join(d, ref) - #print(f"查找 {html_path}") - if os.path.isfile(html_path): - title = parse_html_title(html_path) - if title: - return title - # 递归查找(以toc文件目录为根) - for d in search_dirs: - for root, _, files in os.walk(d): - if ref in files: - html_path = os.path.join(root, ref) - #print(f"2 查找 {html_path}") - title = parse_html_title(html_path) + title = TOCParser.find_label_path(v["children"], ref, filepos, new_path) if title: return title - return None + if filepos is not None: + for v in nodes: + if "label" in v: + new_path = path + [v["label"]] + if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]: + title = " / ".join(new_path) + return title + title = TOCParser.find_label_path(v["children"], ref, None, new_path) + if title: + return title + if path == [] and ref and ref.endswith('.html'): + caller_dir = os.path.dirname(os.path.abspath(__file__)) + search_dirs = [caller_dir, os.getcwd()] + for d in search_dirs: + html_path = os.path.join(d, ref) + if os.path.isfile(html_path): + title = TOCParser.parse_html_title(html_path) + if title: + return title + for d in search_dirs: + for root, _, files in os.walk(d): + if ref in files: + html_path = os.path.join(root, ref) + title = TOCParser.parse_html_title(html_path) + if title: + return title + return None if __name__ == "__main__": # ==== 批量测试指定toc/html/filepos列表 ==== @@ -182,8 +137,6 @@ if __name__ == "__main__": [config.EXAMPLES_DIR + "/政治哲學的12堂Podcast", "ch1.xhtml#_idParaDest-4", ""], ] for epub_dir, html_file, filepos in test_cases: - # 自动查找epub目录下的toc.ncx - import os toc_path = None for root, _, files in os.walk(epub_dir): for f in files: @@ -200,39 +153,32 @@ if __name__ == "__main__": with open(toc_path, "r", encoding="utf-8") as f: soup = BeautifulSoup(f, "xml") nav_map = soup.find("navMap") - toc_tree = parse_navpoints(nav_map.find_all("navPoint", recursive=False)) - label_path = find_label_path(toc_tree, html_file, filepos) + toc_tree = TOCParser.parse_navpoints(nav_map.find_all("navPoint", recursive=False)) + label_path = TOCParser.find_label_path(toc_tree, html_file, filepos) print(f"find_label_path: {label_path if label_path else '未找到章节/标题'}") - - # tocb中不存在html,直接测试parse_html_title html_path = os.path.join(epub_dir, html_file.split('#')[0]) if os.path.exists(html_path): - title = parse_html_title(html_path) + title = TOCParser.parse_html_title(html_path) print(f"解析html标题: {html_path} => {title if title else '未找到标题'}") - # 新增:根据selectedtext定位章节标题 selectedtext = '从变法思想看,王安石变法最大的魅力是“民不加赋而国用足”:老百姓上缴的税率不增,国库的总收入仍可以' - section = find_section_by_selectedtext(html_path, selectedtext) + section = TOCParser.find_section_by_selectedtext(html_path, selectedtext) print(f"selectedtext定位到的章节标题: {section if section else '未找到相关标题'}") else: print(f"未找到html文件: {html_path}") except Exception as e: print(f"测试失败: {e}") - # ==== 新增:测试变宋笔记章节定位和html标题解析 ==== print("\n==== 测试: 变宋笔记章节定位和html标题解析 ====") - # 假设笔记数据如下 note_idref = 'text/part0002_split_003.html' note_filepos = None - # 变宋toc.ncx路径 bian_song_toc = config.EXAMPLES_DIR + "/变宋/toc.ncx" - import os if os.path.exists(bian_song_toc): with open(bian_song_toc, "r", encoding="utf-8") as f: soup = BeautifulSoup(f, "xml") nav_map = soup.find("navMap") - toc_tree = parse_navpoints(nav_map.find_all("navPoint", recursive=False)) - # 先尝试用find_label_path查找章节 - label_path = find_label_path(toc_tree, note_idref, note_filepos) + toc_tree = TOCParser.parse_navpoints(nav_map.find_all("navPoint", recursive=False)) + label_path = TOCParser.find_label_path(toc_tree, note_idref, note_filepos) print(f"查找 {note_idref}: ", label_path if label_path else "未找到章节,尝试解析html标题") else: print(f"未找到toc.ncx: {bian_song_toc}") +