Ich stehe bei einem Projekt vor dem Problem, dass der Text bei einer Eingabe nur begrenzt HTML formatiert sein darf. Dabei möchte ich Tags wie strong, em usw. erlauben, div container zum Beispiel aber nicht.
Außerdem sollen eigene XML Tags erlaubt sein, die ich definiere. Da reguläre Ausrücke zum XHTML parsen sehr ungeeignet sind, habe ich einen Filter basierend auf den DOM Klassen von PHP geschrieben.
Hier die Klasse zur öffentlichen Verfügbarkeit. Ist auch ganz leicht ins Zend Framework einzubinden.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | <?php /** * class XHTMLFilter * needs PHP's built in DOM classes * * Copyright 2010 Jan Oliver Oelerich * * Filters HTML Code from any input source by defined rules. * One can specify the allowed tags and the context they're allowed in * * Usage: * $filter = new XHTMLFilter(); * $filter->allowTag('em') * ->allowTag('strong',array('em'); * echo $filter->filter($htmlSource); * * This Code allows em and strong Tags. em may only be * used at the top layer and within no other tag, strong can be used * within em. */ class XHTMLFilter { /** * In this variable, the rules of which html elements are allowed where, * are stored * * @var array */ private $_allowed; /** * Defines a new rule for an element $name, which should be allowed in the * context $allowedIn * * Example: $this->allowTag('div', array('strong')); * Allow 'div' ELements within 'strong' Elements (but not, e.g., within 'em') * * @param string $name * @param array $allowedIn * @return XHTMLFilter */ public function allowTag($name, $allowedIn = array()) { $this->_allowed[] = array('name' => $name, 'in' => $allowedIn); return $this; } /** * Accepts a string as paramter. Replaces every NOT allowed html Tag * with htmlentities() * * Wraps html in <div /> and starts the XML traversing * * @param string $value * @return string */ public function filter($value) { $value = '<div>' . $value . '</div>'; $doc = new DOMDocument(); $doc->loadXML($value); $doc->replaceChild($this->traverse($doc->firstChild),$doc->firstChild); return $this->stripXML($doc->saveXML()); } /** * removes <?xml ..?><div>xxx</div> and returns xxx * * @param xml string $source * @return string */ private function stripXml($source) { $source = preg_replace('/<\?[^>]+\?>/','',$source); $source = str_replace(array('<div>','</div>'),array('',''),$source); return trim($source); } /** * traverses the xml tree and calls replaceNode for not-allowed tags * * @param DOMNode $nodeObj * @return DOMNode */ private function traverse($nodeObj) { foreach($nodeObj->childNodes as $node) { if($node->hasChildNodes()) $node = $this->traverse($node); if(!$this->is_allowed($node)) { $nodeObj->replaceChild($this->replaceNode($node),$node); } } return $nodeObj; } /** * check, if a Node is allowed in it's context defined by the Rules * * @param DOMNode $node * @return boolean */ private function is_allowed($node) { foreach($this->_allowed as $rule) { if( ($rule['name']==$node->localName && in_array($node->parentNode->localName,$rule['in'])) || get_class($node) == 'DOMText' || $node->parentNode->localName == 'div' ) return true; } return false; } /** * replaces a Node recursively with its htmlentities() html string * * @param DOMNode $node * @return DOMText */ private function replaceNode($node) { if($node->hasChildNodes()) { foreach($node->childNodes as $childnode) { $node->replaceChild($this->replaceNode($childnode),$childnode); } } $newNode = new DOMText($this->getHtmlString($node)); return $newNode; } /** * Gets the HTML String of a Node (without children!!!) * * @param DOMNode $node * @return string */ private function getHtmlString($node) { if(is_null($node->localName)) return $node->textContent; $str = '<' . $node->localName; if(!is_null($node->attributes)) foreach($node->attributes as $attr) $str .= ' ' . $attr->name . '="' . $attr->value . '"'; $str .= '>' . $node->textContent . '</'; $str .= $node->localName . '>'; return $str; } } |
0 Responses to “HTML Eingaben intelligent filtern”