;
; Author: FiberOPtics - mirc.fiberoptics@gmail.com
;
; Usage: * $htmlconv(<string>)[.rem]
;
; -> strips a string from html tags and converts html entities to plain text.
; -> if the .rem property is specified, it doesn't translate the entities, instead
; it just removes them.
;
; * $_htmlconv(<infile> [,outfile])
;
; -> to convert an entire file like a html source file
; -> if no outfile is specified, the infile will be used as outfile
; -> the outfile is overwritten if it exists, otherwise it is created
; -> if no folderpath is given (only filename), then the snippet assumes it is in $mircdir
; -> returns 1 if the conversion was successful, returns 0 in case of an error
; -> this code treats the entire file as one entity, very different from parsing
; the file line by line with $htmlconv, where chunking would exist.
;
; Notes: The COM code in $htmlconv() is only executed when there are still html entities left in the
; string after stripping tags, and after substitution of common entities like " < etc.
; (and if you didn't use the .rem property)
;
; The reason to do custom replacing, is because it goes faster in native mIRC script, than
; calling the COM object. When calling the COM a few hundred times in a row, it is considerably
; slower than with native scripting, on less than 100 consecutive calls you wouldn't really
; notice that much difference.
;
; The reason for the COM is so I wouldn't have to include a ridiculously large list to take
; into account any existing html entity, some of which are rarely used, but should still be
; converted when present in the string. In most of the cases, since common entities are
; replaced with mIRC scripting, the COM code won't be called. Yet, if there are still entities
; left, the COM can and will take care of it.
;
; Because of this, $htmlconv() offers a mix of both speed and completeness.
;
; Reqs: $htmlconv: mIRC 5.91. The COM code in $htmlconv() needs 6.14, although you can use the alias
; on lower versions just fine, as it simply won't use the COM code then.
;
; $_htmlconv: mIRC 5.91, Windows ME or higher.
;
; Install: The aliases go into your remotes: alt+r -> tab "Remote" -> paste
;
; Examples: $htmlconv(foo , bar and © " and & <br>)
; $htmlconv(foo , bar and © " and & <br>).rem
; $htmlconv(<A href="http://www.mirc.com/index.html">Home</A>)
; $_htmlconv(mysource.html,output.txt)
;
alias htmlconv {
var %t, %u = $regsub($replace($1,<br>,$crlf),/^[^<]*>|<[^>]*>|<[^>]*$/g,,%t)
if (!$regex(%t,/&\S+?;/)) return %t
if ($prop == rem) { !.echo -q $regsub(%t,/&\S+?;/g,,%t) | return %t }
%t = $replace(%t, ,$chr(160),",",<,<,>,>,&,&,…,...,®,®, $&
©,©,,™,™,¼,¼,½,½,¾,¾,³,³,²,²,°,°,•,•, $&
›,›,‹,‹,»,»,«,’,’,‘,‘,&rdguo;,”,“,“,–, $&
–,—,—,',',˜,~,×,×,÷,÷,¶,,&ecute;,é,&Ecute;,É,¢, $&
¢,£,£,¥,¥,§,§)
if ($version < 6.14) || (!$regex(%t,/&\S+?;/)) return %t
var %html = html $+ $ticks, %body = body $+ $ticks
.comopen %html htmlfile
if ($comerr) return %t
%t = $com(%html,write,1,bstr*,$+(<html><body>,%t,</body></html>))
%t = $com(%html,body,2,dispatch* %body) $com(%body,innertext,2)
%t = $com(%body).result
:error
if ($com(%body)) .comclose %body
if ($com(%html)) .comclose %html
return %t
}
alias _htmlconv {
if (!$isfile($1)) || ($os isin 9598) return 0
var %in = $shortfn($1), %out = $shortfn($2), %mss = mss $+ $ticks
if (* !iswm $2) %out = %in
.comopen %mss MSScriptControl.ScriptControl
if ($comerr) return 0
var %t, %n = $crlf
%t = $com(%mss,language,4,bstr*,vbscript)
%t = $&
set html = createobject("htmlfile") %n $&
set fso = createobject("scripting.filesystemobject") %n $&
set inf = fso.opentextfile( $+(",%in,") ) %n $&
src = inf.readall %n $&
inf.close %n $&
html.write "<html><body>" & src & "</body></html>" %n $&
set outf = fso.createtextfile( $+(",%out,") ,true) %n $&
outf.write html.body.innertext %n $&
outf.close
%t = $com(%mss,executestatement,1,bstr*,%t)
.comclose %mss
return %t
:error
if ($com(%mss)) .comclose %mss
return 0
}