#!/usr/local/bin/gawk -f
##public script mkfilters filter addligat code BEGIN{anf="^(([^<]*(<([^\042\047>]|\042[^\042]*\042|\047[^\047]*\047)*>))*[^<&]*((&[a-zA-Z#][a-zA-Z0-9]+[; ])[^<&]*)*)";antlig=split("st;st;ffl;ffi;fl;fi;ff",tkn,";");bn="\134\133• ";uta=bn "(([1234]?[[:digit:]])|(50))\134\135";utb=bn "\\.((1?[[:digit:]])|(20))\134\135"} {while((nmr=gensub(anf uta,"\\7|","1"))!=$0){nmr+=0;$0=gensub(anf uta,"\\1\\" nmr+((nmr>35)?12941:((nmr>20)?12860:((nmr<1)?9450:9311))) ";","1")};while((nmr=gensub(anf utb,"\\7|","1"))!=$0){nmr+=0;$0=gensub(anf utb,"\\1\\" nmr+((nmr>10)?9440:((nmr<1)?9471:10101))";","1")};$0=gensub("\134\133•\134\135","\\◉","g");for(i=97;i<123;i++){$0=gensub(anf bn sprintf("%c",i-32) "\134\135","\\1\\" i+9301 ";","g");$0=gensub(anf bn sprintf("%c",i) "\134\135","\\1\\" i+9327 ";","g")}; for(i=1;i<=antlig;i++){while($0~(tlf=anf tkn[i])){$0=gensub(tlf,"\\1\\" 64263-i ";","1")}};print}
##public script mkfilters filter addligat creation : 2006-07-20
##public script mkfilters filter addligat debuging 1 command : echo "möäbla� [\0225\024048] blaffmflm [\0225\024049] bl[\0225\0240a]affmflm
\042 blastbla bl=\047a\047fibfila>fimffi[\0225\0240G]m\042st\047mffl[\0225\0240.0]mfa mflm
nstnånctmn [\0225\024032]mn¿n [\0225\02402]m¡n[\0225\024048]nm \0227 m[\0225\02400]nm[\0225\02401]nst¿m[\0225\02402]n¿stm[\0225\02403]nstm[\0225\02409]mstn[\0225\024010]n[\0225\0240A]stm[\0225\024020]n[\0225\0240B][\0225\0240Z]stm[\0225\024021]nstm[\0225\024035]nm[\0225\024036]nm[\0225\024048]nm[\0225\024049]0nm[\0225\024050]nm[\0225\0240.1][\0225\024051][\0225\0240.10]nm \0227 bla1 [\0225\02401] b[\0225\0240z]l[\0140]a[{]a[@][[]a[\0225\0240.11] [\0225\0240.19]2 [\0225\0240.20]2 [\0225\02402] bla 3 [\0225\02403] \0227 � ngt" | cp2htmlz.awk | addligat.awk
##public script mkfilters filter addligat debuging 1 output : möäbla� ㊽ blaffmflm ㊾ blⓐaffmflmfimffiⒼm"st'mffl⓿mfa mflm
nstnånctmn ㉜mn¿n ②m¡n㊽nm — m⓪nm①nst¿m②n¿stm③nstm⑨mstn⑩nⒶstm⑳nⒷⓏstm㉑nstm㉟nm㊱nm㊽nm㊾0nm㊿nm❶[• 51]❿nm — bla1 ① bⓩl[`]a[{]a[@][[]a⓫ ⓳2 ⓴2 ② bla 3 ③ — � ngt
##public script mkfilters filter addligat filter : addligat.awk
##public script mkfilters filter addligat last-modified : 2008-08-11
##public script mkfilters filter addligat purpose : Adds HTML code for ligatures in UTF-8 encoded HTML or text files avoiding changing the content of HTML tags and entities. Also works with ASCII encoded HTML file where some extra conversions occur, for example to add circled digits and letters as follows: digits 0x24ea (0), 0x2460-0x2473 (1-20), 0x3251-0x325f (21-35), 0x32b1-0x32bf (36-50); reversed 0x24ff (0), 0x2776-0x277f (1-10), 0x24eb-0x24f4 (11-20); double 0x24f5-0x24fe (1-10); latin 0x24b6-0x24cf (A-Z), 0x24d0-0x24e9 (a-z). Read the code for details.
##public script mkfilters filter addligat run-example 1 command : cat filename.txt | cp2htmlz.awk | addligat.awk
##public script mkfilters filter addligat run-example 1 output :
BEGIN{anf="^(([^<]*(<([^\042\047>]|\042[^\042]*\042|\047[^\047]*\047)*>))*[^<&]*((&[a-zA-Z#][a-zA-Z0-9]+[; ])[^<&]*)*)";antlig=split("st;st;ffl;ffi;fl;fi;ff",tkn,";");bn="\134\133• ";uta=bn "(([1234]?[[:digit:]])|(50))\134\135";utb=bn "\\.((1?[[:digit:]])|(20))\134\135"} {while((nmr=gensub(anf uta,"\\7|","1"))!=$0){nmr+=0;$0=gensub(anf uta,"\\1\\" nmr+((nmr>35)?12941:((nmr>20)?12860:((nmr<1)?9450:9311))) ";","1")};while((nmr=gensub(anf utb,"\\7|","1"))!=$0){nmr+=0;$0=gensub(anf utb,"\\1\\" nmr+((nmr>10)?9440:((nmr<1)?9471:10101))";","1")};$0=gensub("\134\133•\134\135","\\◉","g");for(i=97;i<123;i++){$0=gensub(anf bn sprintf("%c",i-32) "\134\135","\\1\\" i+9301 ";","g");$0=gensub(anf bn sprintf("%c",i) "\134\135","\\1\\" i+9327 ";","g")}; for(i=1;i<=antlig;i++){while($0~(tlf=anf tkn[i])){$0=gensub(tlf,"\\1\\" 64263-i ";","1")}};print}