1 # Copyright 2009 Interchange Development Group and others
3 # This program is free software; you can redistribute it and/or modify
4 # it under the terms of the GNU General Public License as published by
5 # the Free Software Foundation; either version 2 of the License, or
6 # (at your option) any later version. See the LICENSE file for details.
8 CodeDef strip_html Filter
9 CodeDef strip_html Description Strip HTML
10 CodeDef strip_html Routine <<EOR
14 # get rid of HTML comments
15 $val =~ s/<!(?:--(?:[^-]*|-[^-]+)*--\s*)>//s;
17 # replace these container tags with a space
18 $val =~ s{</?(?:p|ol|ul|li|div|h[123456]|pre|dl|dd|dt|form|option|textarea|blockquote)(?:\s[^>]*)?>}{ }ig;
20 # replace these self-closing tags with a space
21 $val =~ s{<[bh]r(?:\s*/|\s[^>]*)?>}{ }ig;
23 # remove all remaining tags and leave no space
24 $val =~ s{</?\w[^>]*>}{}g;
26 # collapse all whitespace, as HTML does when rendering anyway,
27 # to facilitate truncating at a certain number of characters