PHP Cross Reference of WordPress Subversion HEAD

[ Index ]     [ Classes ]     [ Functions ]     [ Variables ]     [ Constants ]

title

Body

[close]

/wp-includes/ -> formatting.php (source)

   1  <?php
   2  
   3  function wptexturize($text) {
   4      global $wp_cockneyreplace;
   5      $next = true;
   6      $output = '';
   7      $curl = '';
   8      $textarr = preg_split('/(<.*>)/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
   9      $stop = count($textarr);
  10  
  11      // if a plugin has provided an autocorrect array, use it
  12      if ( isset($wp_cockneyreplace) ) {
  13          $cockney = array_keys($wp_cockneyreplace);
  14          $cockneyreplace = array_values($wp_cockneyreplace);
  15      } else {
  16          $cockney = array("'tain't","'twere","'twas","'tis","'twill","'til","'bout","'nuff","'round","'cause");
  17          $cockneyreplace = array("&#8217;tain&#8217;t","&#8217;twere","&#8217;twas","&#8217;tis","&#8217;twill","&#8217;til","&#8217;bout","&#8217;nuff","&#8217;round","&#8217;cause");
  18      }
  19  
  20      $static_characters = array_merge(array('---', ' -- ', '--', 'xn&#8211;', '...', '``', '\'s', '\'\'', ' (tm)'), $cockney);
  21      $static_replacements = array_merge(array('&#8212;', ' &#8212; ', '&#8211;', 'xn--', '&#8230;', '&#8220;', '&#8217;s', '&#8221;', ' &#8482;'), $cockneyreplace);
  22  
  23      $dynamic_characters = array('/\'(\d\d(?:&#8217;|\')?s)/', '/(\s|\A|")\'/', '/(\d+)"/', '/(\d+)\'/', '/(\S)\'([^\'\s])/', '/(\s|\A)"(?!\s)/', '/"(\s|\S|\Z)/', '/\'([\s.]|\Z)/', '/(\d+)x(\d+)/');
  24      $dynamic_replacements = array('&#8217;$1','$1&#8216;', '$1&#8243;', '$1&#8242;', '$1&#8217;$2', '$1&#8220;$2', '&#8221;$1', '&#8217;$1', '$1&#215;$2');
  25  
  26      for ( $i = 0; $i < $stop; $i++ ) {
  27           $curl = $textarr[$i];
  28  
  29          if (isset($curl{0}) && '<' != $curl{0} && $next) { // If it's not a tag
  30              // static strings
  31              $curl = str_replace($static_characters, $static_replacements, $curl);
  32              // regular expressions
  33              $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
  34          } elseif (strpos($curl, '<code') !== false || strpos($curl, '<pre') !== false || strpos($curl, '<kbd') !== false || strpos($curl, '<style') !== false || strpos($curl, '<script') !== false) {
  35              $next = false;
  36          } else {
  37              $next = true;
  38          }
  39  
  40          $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
  41          $output .= $curl;
  42      }
  43  
  44        return $output;
  45  }
  46  
  47  // Accepts matches array from preg_replace_callback in wpautop()
  48  // or a string
  49  function clean_pre($matches) {
  50      if ( is_array($matches) )
  51          $text = $matches[1] . $matches[2] . "</pre>";
  52      else
  53          $text = $matches;
  54  
  55      $text = str_replace('<br />', '', $text);
  56      $text = str_replace('<p>', "\n", $text);
  57      $text = str_replace('</p>', '', $text);
  58  
  59      return $text;
  60  }
  61  
  62  function wpautop($pee, $br = 1) {
  63      $pee = $pee . "\n"; // just to make things a little easier, pad the end
  64      $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee);
  65      // Space things out a little
  66      $allblocks = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|map|area|blockquote|address|math|style|input|p|h[1-6]|hr)';
  67      $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee);
  68      $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
  69      $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
  70      $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
  71      $pee = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n", $pee); // make paragraphs, including one at the end
  72      $pee = preg_replace('|<p>\s*?</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
  73      $pee = preg_replace('!<p>([^<]+)\s*?(</(?:div|address|form)[^>]*>)!', "<p>$1</p>$2", $pee);
  74      $pee = preg_replace( '|<p>|', "$1<p>", $pee );
  75      $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag
  76      $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists
  77      $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee);
  78      $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee);
  79      $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee);
  80      $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee);
  81      if ($br) {
  82          $pee = preg_replace('/<(script|style).*?<\/\\1>/se', 'str_replace("\n", "<WPPreserveNewline />", "\\0")', $pee);
  83          $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks
  84          $pee = str_replace('<WPPreserveNewline />', "\n", $pee);
  85      }
  86      $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee);
  87      $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee);
  88      if (strpos($pee, '<pre') !== false)
  89          $pee = preg_replace_callback('!(<pre.*?>)(.*?)</pre>!is', 'clean_pre', $pee );
  90      $pee = preg_replace( "|\n</p>$|", '</p>', $pee );
  91  
  92      return $pee;
  93  }
  94  
  95  
  96  function seems_utf8($Str) { # by bmorel at ssi dot fr
  97      $length = strlen($Str);
  98      for ($i=0; $i < $length; $i++) {
  99          if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
 100          elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
 101          elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
 102          elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
 103          elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
 104          elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
 105          else return false; # Does not match any model
 106          for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
 107              if ((++$i == $length) || ((ord($Str[$i]) & 0xC0) != 0x80))
 108              return false;
 109          }
 110      }
 111      return true;
 112  }
 113  
 114  function wp_specialchars( $text, $quotes = 0 ) {
 115      // Like htmlspecialchars except don't double-encode HTML entities
 116      $text = str_replace('&&', '&#038;&', $text);
 117      $text = str_replace('&&', '&#038;&', $text);
 118      $text = preg_replace('/&(?:$|([^#])(?![a-z1-4]{1,8};))/', '&#038;$1', $text);
 119      $text = str_replace('<', '&lt;', $text);
 120      $text = str_replace('>', '&gt;', $text);
 121      if ( 'double' === $quotes ) {
 122          $text = str_replace('"', '&quot;', $text);
 123      } elseif ( 'single' === $quotes ) {
 124          $text = str_replace("'", '&#039;', $text);
 125      } elseif ( $quotes ) {
 126          $text = str_replace('"', '&quot;', $text);
 127          $text = str_replace("'", '&#039;', $text);
 128      }
 129      return $text;
 130  }
 131  
 132  function utf8_uri_encode( $utf8_string, $length = 0 ) {
 133      $unicode = '';
 134      $values = array();
 135      $num_octets = 1;
 136      $unicode_length = 0;
 137  
 138      $string_length = strlen( $utf8_string );
 139      for ($i = 0; $i < $string_length; $i++ ) {
 140  
 141          $value = ord( $utf8_string[ $i ] );
 142  
 143          if ( $value < 128 ) {
 144              if ( $length && ( $unicode_length >= $length ) )
 145                  break;
 146              $unicode .= chr($value);
 147              $unicode_length++;
 148          } else {
 149              if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
 150  
 151              $values[] = $value;
 152  
 153              if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length )
 154                  break;
 155              if ( count( $values ) == $num_octets ) {
 156                  if ($num_octets == 3) {
 157                      $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
 158                      $unicode_length += 9;
 159                  } else {
 160                      $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
 161                      $unicode_length += 6;
 162                  }
 163  
 164                  $values = array();
 165                  $num_octets = 1;
 166              }
 167          }
 168      }
 169  
 170      return $unicode;
 171  }
 172  
 173  function remove_accents($string) {
 174      if ( !preg_match('/[\x80-\xff]/', $string) )
 175          return $string;
 176  
 177      if (seems_utf8($string)) {
 178          $chars = array(
 179          // Decompositions for Latin-1 Supplement
 180          chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
 181          chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
 182          chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
 183          chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
 184          chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
 185          chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
 186          chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
 187          chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
 188          chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
 189          chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
 190          chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
 191          chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
 192          chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
 193          chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
 194          chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
 195          chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
 196          chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
 197          chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
 198          chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
 199          chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
 200          chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
 201          chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
 202          chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
 203          chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
 204          chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
 205          chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
 206          chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
 207          chr(195).chr(191) => 'y',
 208          // Decompositions for Latin Extended-A
 209          chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
 210          chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
 211          chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
 212          chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
 213          chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
 214          chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
 215          chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
 216          chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
 217          chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
 218          chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
 219          chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
 220          chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
 221          chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
 222          chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
 223          chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
 224          chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
 225          chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
 226          chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
 227          chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
 228          chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
 229          chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
 230          chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
 231          chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
 232          chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
 233          chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
 234          chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
 235          chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
 236          chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
 237          chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
 238          chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
 239          chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
 240          chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
 241          chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
 242          chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
 243          chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
 244          chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
 245          chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
 246          chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
 247          chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
 248          chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
 249          chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
 250          chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
 251          chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
 252          chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
 253          chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
 254          chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
 255          chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
 256          chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
 257          chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
 258          chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
 259          chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
 260          chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
 261          chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
 262          chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
 263          chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
 264          chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
 265          chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
 266          chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
 267          chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
 268          chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
 269          chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
 270          chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
 271          chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
 272          chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
 273          // Euro Sign
 274          chr(226).chr(130).chr(172) => 'E',
 275          // GBP (Pound) Sign
 276          chr(194).chr(163) => '');
 277  
 278          $string = strtr($string, $chars);
 279      } else {
 280          // Assume ISO-8859-1 if not UTF-8
 281          $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
 282              .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).ch