HTML Eingaben intelligent filtern

Ich stehe bei einem Projekt vor dem Problem, dass der Text bei einer Eingabe nur begrenzt HTML formatiert sein darf. Dabei möchte ich Tags wie strong, em usw. erlauben, div container zum Beispiel aber nicht.

Außerdem sollen eigene XML Tags erlaubt sein, die ich definiere. Da reguläre Ausrücke zum XHTML parsen sehr ungeeignet sind, habe ich einen Filter basierend auf den DOM Klassen von PHP geschrieben.

Hier die Klasse zur öffentlichen Verfügbarkeit. Ist auch ganz leicht ins Zend Framework einzubinden.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
<?php
/**
 * class XHTMLFilter
 * needs PHP's built in DOM classes
 * 
 * Copyright 2010 Jan Oliver Oelerich
 * 
 * Filters HTML Code from any input source by defined rules.
 * One can specify the allowed tags and the context they're allowed in
 * 
 * Usage: 
 *      $filter = new XHTMLFilter();
 *      $filter->allowTag('em')
 *             ->allowTag('strong',array('em');
 *      echo $filter->filter($htmlSource);
 *     
 *      This Code allows em and strong Tags. em may only be
 *      used at the top layer and within no other tag, strong can be used
 *      within em.
 */
class XHTMLFilter
{
    /**
     * In this variable, the rules of which html elements are allowed where,
     * are stored
     * 
     * @var array
     */
    private $_allowed;
 
 
    /**
     * Defines a new rule for an element $name, which should be allowed in the
     * context $allowedIn
     * 
     * Example: $this->allowTag('div', array('strong'));
     * Allow 'div' ELements within 'strong' Elements (but not, e.g., within 'em')
     * 
     * @param string $name
     * @param array $allowedIn
     * @return XHTMLFilter
     */
    public function allowTag($name, $allowedIn = array()) {
        $this->_allowed[] = array('name' => $name, 'in' => $allowedIn);
        return $this;
    }
 
    /**
     * Accepts a string as paramter. Replaces every NOT allowed html Tag 
     * with htmlentities()
     * 
     * Wraps html in <div /> and starts the XML traversing
     * 
     * @param string $value
     * @return string 
     */
    public function filter($value)
    {
        $value = '<div>' . $value . '</div>';
 
        $doc = new DOMDocument();
        $doc->loadXML($value);
 
        $doc->replaceChild($this->traverse($doc->firstChild),$doc->firstChild);
 
        return $this->stripXML($doc->saveXML());
    }
 
    /**
     * removes <?xml ..?><div>xxx</div> and returns xxx
     * 
     * @param xml string $source
     * @return string 
     */
    private function stripXml($source) {
        $source = preg_replace('/<\?[^>]+\?>/','',$source);
        $source = str_replace(array('<div>','</div>'),array('',''),$source);
        return trim($source);
    }
 
    /**
     * traverses the xml tree and calls replaceNode for not-allowed tags
     * 
     * @param DOMNode $nodeObj
     * @return DOMNode 
     */
    private function traverse($nodeObj) {
        foreach($nodeObj->childNodes as $node) {
            if($node->hasChildNodes())
                $node = $this->traverse($node);
 
            if(!$this->is_allowed($node)) {
                $nodeObj->replaceChild($this->replaceNode($node),$node);
            }
        }
        return $nodeObj;
    }
 
    /**
     * check, if a Node is allowed in it's context defined by the Rules
     * 
     * @param DOMNode $node
     * @return boolean
     */
    private function is_allowed($node) {
        foreach($this->_allowed as $rule) {
            if(
                    ($rule['name']==$node->localName &&
                    in_array($node->parentNode->localName,$rule['in'])) ||
                    get_class($node) == 'DOMText' ||
                    $node->parentNode->localName == 'div'
              )
                return true;
        }
        return false;
    }
 
    /**
     * replaces a Node recursively with its htmlentities() html string
     * 
     * @param DOMNode $node
     * @return DOMText
     */
    private function replaceNode($node) {
        if($node->hasChildNodes()) {
            foreach($node->childNodes as $childnode) {
                $node->replaceChild($this->replaceNode($childnode),$childnode);
            }
        }
 
        $newNode = new DOMText($this->getHtmlString($node));
        return $newNode;
    }
 
    /**
     * Gets the HTML String of a Node (without children!!!)
     * 
     * @param DOMNode $node
     * @return string
     */
    private function getHtmlString($node) {
        if(is_null($node->localName))
            return $node->textContent;
 
        $str = '<' . $node->localName;
        if(!is_null($node->attributes))
            foreach($node->attributes as $attr)
                $str .= ' ' . $attr->name . '="' . $attr->value . '"';
 
        $str .= '>' . $node->textContent . '</';
        $str .= $node->localName . '>';
        return $str;
    }
}

0 Responses to “HTML Eingaben intelligent filtern”


  • No Comments

Leave a Reply