Welcome

User: Pass:   register


;
; Author:    FiberOPtics - mirc.fiberoptics@gmail.com
;
; Usage:     * $htmlconv(<string>)[.rem] 
;
;            -> strips a string from html tags and converts html entities to plain text.
;            -> if the .rem property is specified, it doesn't translate the entities, instead
;               it just removes them.
;
;            * $_htmlconv(<infile> [,outfile])
;
;            -> to convert an entire file like a html source file
;            -> if no outfile is specified, the infile will be used as outfile
;            -> the outfile is overwritten if it exists, otherwise it is created
;            -> if no folderpath is given (only filename), then the snippet assumes it is in $mircdir
;            -> returns 1 if the conversion was successful, returns 0 in case of an error
;            -> this code treats the entire file as one entity, very different from parsing
;               the file line by line with $htmlconv, where chunking would exist.
;
; Notes:     The COM code in $htmlconv() is only executed when there are still html entities left in the 
;            string after stripping tags, and after substitution of common entities like " < etc.
;            (and if you didn't use the .rem property)
;
;            The reason to do custom replacing, is because it goes faster in native mIRC script, than
;            calling the COM object. When calling the COM a few hundred times in a row, it is considerably 
;            slower than with native scripting, on less than 100 consecutive calls you wouldn't really
;            notice that much difference.
; 
;            The reason for the COM is so I wouldn't have to include a ridiculously large list to take
;            into account any existing html entity, some of which are rarely used, but should still be
;            converted when present in the string. In most of the cases, since common entities are 
;            replaced with mIRC scripting, the COM code won't be called. Yet, if there are still entities
;            left, the COM can and will take care of it.
;
;            Because of this, $htmlconv() offers a mix of both speed and completeness.
;
; Reqs:      $htmlconv:  mIRC 5.91. The COM code in $htmlconv() needs 6.14, although you can use the alias
;                        on lower versions just fine, as it simply won't use the COM code then.
;
;            $_htmlconv: mIRC 5.91, Windows ME or higher.
;
; Install:   The aliases go into your remotes: alt+r -> tab "Remote" -> paste
;      
; Examples:  $htmlconv(foo , bar and © " and & <br>)
;            $htmlconv(foo , bar and © " and & <br>).rem
;            $htmlconv(<A href="http://www.mirc.com/index.html">Home</A>)
;            $_htmlconv(mysource.html,output.txt)
;        

alias htmlconv {
  var %t, %u = $regsub($replace($1,<br>,$crlf),/^[^<]*>|<[^>]*>|<[^>]*$/g,,%t)
  if (!$regex(%t,/&\S+?;/)) return %t
  if ($prop == rem) { !.echo -q $regsub(%t,/&\S+?;/g,,%t) | return %t }
  %t = $replace(%t, ,$chr(160),",",<,<,>,>,&,&,…,...,®,®, $&
    ©,©,,™,™,¼,¼,½,½,¾,¾,³,³,²,²,°,°,•,•, $&
    ›,›,‹,‹,»,»,«,’,’,‘,‘,&rdguo;,”,“,“,–, $&
    –,—,—,&apos;,',˜,~,×,×,÷,÷,¶,,&ecute;,é,&Ecute;,É,¢, $&
    ¢,£,£,¥,¥,§,§)
  if ($version < 6.14) || (!$regex(%t,/&\S+?;/)) return %t
  var %html = html $+ $ticks, %body = body $+ $ticks
  .comopen %html htmlfile
  if ($comerr) return %t
  %t = $com(%html,write,1,bstr*,$+(<html><body>,%t,</body></html>))
  %t = $com(%html,body,2,dispatch* %body) $com(%body,innertext,2)
  %t = $com(%body).result
  :error
  if ($com(%body)) .comclose %body 
  if ($com(%html)) .comclose %html
  return %t
}

alias _htmlconv {
  if (!$isfile($1)) || ($os isin 9598) return 0
  var %in = $shortfn($1), %out = $shortfn($2), %mss = mss $+ $ticks
  if (* !iswm $2) %out = %in
  .comopen %mss MSScriptControl.ScriptControl
  if ($comerr) return 0
  var %t, %n = $crlf
  %t = $com(%mss,language,4,bstr*,vbscript)
  %t = $&
    set html = createobject("htmlfile") %n $&
    set fso = createobject("scripting.filesystemobject") %n $&
    set inf = fso.opentextfile( $+(",%in,") ) %n $&
    src = inf.readall %n $&
    inf.close %n $&
    html.write "<html><body>" & src & "</body></html>" %n $&
    set outf = fso.createtextfile( $+(",%out,") ,true) %n $&
    outf.write html.body.innertext %n $&
    outf.close 
  %t = $com(%mss,executestatement,1,bstr*,%t)
  .comclose %mss
  return %t
  :error
  if ($com(%mss)) .comclose %mss
  return 0
}






© 1996-2010 hdesk.org and dalnethelpdesk.com