From 19e45676a8c2da7ff08712f030fa8d0c6e0268b5 Mon Sep 17 00:00:00 2001 From: Andrea Brancaleoni Date: Fri, 8 Mar 2024 12:32:33 +0100 Subject: [PATCH] Move scripttagextractor to AI --- assets/scripttagextractor.pkl | Bin 0 -> 5773 bytes assets/scripttagextractor.py | 122 +++------------------------------- 2 files changed, 8 insertions(+), 114 deletions(-) create mode 100644 assets/scripttagextractor.pkl diff --git a/assets/scripttagextractor.pkl b/assets/scripttagextractor.pkl new file mode 100644 index 0000000000000000000000000000000000000000..17af809b0bbf903e1bd8fb3ce98805b08c6b3f7c GIT binary patch literal 5773 zcma)A%aWT|6797=VAhV_*p7*D01_Uv=!L)~Ko49(C0;uLDxd_2wuB`3`Z@Q31Z8`= zBB}xjU2f*hJbCgYQCMgBDJzffcIR#W?xVN=*B^h7k01ZsU3byP4_Q$iew^Ni+vD!b z_`|37had0h5A4S0@XI*fMTdCz@k3C~&f!|!`<|Wp(lw1nRvtBK6?*D9DpyIMPX*t# zj=pELS`jp%BqV`!J^NX24f&o_MN}v!Q%{A^z0{FDZ6e8OOg%qEy3)j&YB~?%a>MU; zdxfKAvUSBvIGPEj?$mLS!^Ze%8fqHr$(;MwZVgA+x|iNs>l_(*v30L`w^uaE+n&8P z*6lnh&2A52=fUS9y6^FDt)kItt`+Qj=7W6>OM4X+c^zx%*)OWxAFZnI$ssYd31ZKuEkYf0wm<7~d(CyVvuswIax!_6{z^728B=MU-TX>(n{=Qi-nVH~H*GkmXNT)Ca2 zsU41{ZXpK&@@7B#Qkq8PEDy@%7gGmLr3;DgCMunLzMh;OGxa-Sop<@6@iQzj(w%0m z?Q2g|5X)Tf#IFZmQh+U|fxVY4!C>JOQ7gXSc`2YVwGQyJ3EZGX&S<{*ek5MCKXS-3 zHJ(R3M8pqjo5h*LZju1+#~_nie>AHi!){IF=Sk1oczPPP2X^^XY3`cPuWl4ox zQ9mh;^)w9&`xrP%j_XghIF1-j9^`XaY~E&*t!3wWg{f%dT!4ead@bM38sN<2Y;)@C zet=u#3D~>loB;T8*uM^bkqyZJ2+^c=NHv0jt9L)-?jFuw_ksm*w-exbMGTW_PwU>DR$-R5;F-pkoadm(kHPwaxH$zr zj|rXv-3H#+ci`p>ZfUk|jJlZ~@%uh1^Jmms{kRFz5b*@xaV}DUQS^!oIC~J~_0+bW zrrkYYjd9WlTzN2c1+f?-e+sz8LOz2k)HfAk4!*wIMe2?B_q(LQF`1TiL(cl=VEIp985 z%usth+Z|^yT=E)A=B<#J`L#B)$=}G&KA0+W|LD@=M2>mRb3BHVACK!v_Pv>8rgkKL z(LD7r%((8izERtQ^Ra#Ez>A;hIlLw9r*t3Y1A4A~!9T|~u&DdlZe-#9>;>FL^}(}u zYGXGjwqB~3oCE0|h6=39!d#n8ci>g%9B2%=G&9u55Uc1-Ga+nD^e07(4OK)RpnIc6 z==IUX{SJ4`H-e=)96>`FGua!>#PFq$BNqQy%7v{4tws9nPkT?YxG{O1_=CFW#h_S} zT$31_e>mQV_Fypw7qVP-aR9F=&-ePd4E9EUfObO7d3FUYMtW6YmZu?)c;bI)u11y^ z6hXB^UkpX%3@uY4hTsYz>*qe=ddS>U-9~g+M!4@aH@&ircvG%Wuud5CWy0^ee1@*P zanvt(`6V2wGUL6UdGM7j(jVtr=q2a|@Qs*5Hzbcwq#h6}!lk&HT@LTMmgqpn8NBvy z+86Z+E-BgfO})Zr)FJ6BohCZ;V`Sz$f0{})M&PAFR7_uxs|J|t)vme^Jib=X&*6)Z z+1*-WnNY1mv@~!PWRtHQk3lbC-x!?BbUfv3j$d#Puwj9#!O5E3LMt6(eSU-J*Bj<$vhuSkw3y)(+ zBlQI>R7}OtSSe;E=UrU}uHE(OdRs0Db`I#(m&f(&>87`!>w>k~duB!TaoDgLg`cb+ z8TMi8EpL60)`srvp_k71KVXkLj(L304^P3`eA{`@g91(DwU(S>{p+b$Ykl9p5q$Q0 zAjf+`v>^2hSlB{CVm2Db$Y~@Y-`V6qV_!_O(0~pvp_yIGM?_OJ@*&p6a(ef(23T8a=6accbw7Hi7!oWkPkiJy(wO8pV#K*cOA0XfW}k($`u^!>oRH!e{hh41FT zFVUUAkT=VbKZ3^^_&b415o3~{4vTL+x83a4B5aP|>d!p(ECKT~?QN)4%r(e?o>m0^ z;L#&=F7KPG?2Yk(o}7sR?SF)i80SELBWBKXY~Z&wsogtHNM9wj?3YeS=FFf_8gMB3 zs{##6>dX?rzfF|AB@Q!=Ct^dUbNA!g{G0UYt?nG7Z{RSwoFB7|V6E4Vy4gHTH*0McFSJSaYdgz2Hd4XN_FQvJk0G0vDK{{E?z96RxF+uS zqgz9?>F;a+GiiV!WzhS`KD>^5@d|%h!|b8=2j?h*E&;ZncVES8Sw|!FxW#}CEjwO* zo7`}I*G)`+k$#Vvoy@>3(_&qYetmYY(>nvL$M*7ua|F*d<)NXzVpxT)O$9n5GOi-; zA4+g$8R`nDb<8qED_d1uY!rvq7-qt5hUa)E?-BYj0(R+4fh%S|&NY4Qe%DQybMgMk zuxL-56RraOx>>N}1%_=p8&fUYt^2>)54;%`(7nHoE14;Nr$ejG?(N;kU8C#dT_4dJ z;L}UT{ogaf3A|_hN{jc-%b%ydg@posVrRy@i?e~-(E~dC4lsWA;d@U@4E!KmMEk}Q z`aXCM?;QvDzV)Ay@vOS<$xa++GOi_KBr3L9( zlw-=Szh0OIZRCHN^*hdMb`IXxx?|Y>>#q-=K6kH*>(ldpzJB=h>CaCm+u0S>sd-lk H?>_$zz$2|( literal 0 HcmV?d00001 diff --git a/assets/scripttagextractor.py b/assets/scripttagextractor.py index abcae28e..68cd430e 100644 --- a/assets/scripttagextractor.py +++ b/assets/scripttagextractor.py @@ -1,117 +1,11 @@ -from dataclasses import dataclass -from glob import glob -from html.parser import HTMLParser -from os import environ, path -from shutil import copyfile -from sys import stderr -from typing import List +#!/usr/bin/env python3 -class DontPrint(object): - def write(*args): pass -out = DontPrint() +# load pickle file from first argument, then exit +import pickle +import os -@dataclass -class FoundScript: - line_number: int - start_offset: int - data: str +# get the directory of your script +localdir = os.path.dirname(os.path.realpath(__file__)) - def new_lines(self) -> int: - return len(self.data.split("\n")) - 1 - - -class MyHTMLParser(HTMLParser): - def __init__(self, *, convert_charrefs: bool = True) -> None: - super().__init__(convert_charrefs=convert_charrefs) - self.current_tag = None - self.scripts : List[FoundScript] = [] - - def handle_starttag(self, tag, attrs): - self.current_tag = tag.lower() - - def handle_endtag(self, tag: str) -> None: - self.current_tag = None - return super().handle_endtag(tag) - - def handle_data(self, data: str) -> None: - if self.lasttag.lower() == "script" and self.current_tag == "script": - self.scripts.append(FoundScript(self.lineno, self.offset, data)) - return super().handle_data(data) - - -def main(source_file, suffix, add_suffix_to_original, dry_run=False): - parser = MyHTMLParser() - with open(source_file) as original_file: - original_file_data = original_file.read() - parser.feed(original_file_data) - - if parser.scripts: - current_line_number = 1 - script_data = "" - for s in parser.scripts: - add_lines = s.line_number - current_line_number - script_data += "\n" * add_lines - script_data += "; " + s.data - current_line_number += add_lines + s.new_lines() - - output_file = f"{source_file}{suffix}" - print("Extracting", source_file, "to", output_file, file=out) - if not dry_run: - with open(output_file, "w") as f: - f.write(script_data) - - if add_suffix_to_original: - destination = f"{source_file}{add_suffix_to_original}" - print("Copying", source_file, destination, file=out) - if not dry_run: - copyfile(source_file, destination) - - -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser( - "Script tag extractor", - description="Extracts content of