html.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. function _getCssList(css) {
  2. var list = {},
  3. reg = /\s*([\w\-]+)\s*:([^;]*)(;|$)/g,
  4. match;
  5. while ((match = reg.exec(css))) {
  6. var key = _trim(match[1].toLowerCase()),
  7. val = _trim(_toHex(match[2]));
  8. list[key] = val;
  9. }
  10. return list;
  11. }
  12. function _getAttrList(tag) {
  13. var list = {},
  14. reg = /\s+(?:([\w\-:]+)|(?:([\w\-:]+)=([^\s"'<>]+))|(?:([\w\-:"]+)="([^"]*)")|(?:([\w\-:"]+)='([^']*)'))(?=(?:\s|\/|>)+)/g,
  15. match;
  16. while ((match = reg.exec(tag))) {
  17. var key = (match[1] || match[2] || match[4] || match[6]).toLowerCase(),
  18. val = (match[2] ? match[3] : (match[4] ? match[5] : match[7])) || '';
  19. list[key] = val;
  20. }
  21. return list;
  22. }
  23. function _addClassToTag(tag, className) {
  24. if (/\s+class\s*=/.test(tag)) {
  25. tag = tag.replace(/(\s+class=["']?)([^"']*)(["']?[\s>])/, function($0, $1, $2, $3) {
  26. if ((' ' + $2 + ' ').indexOf(' ' + className + ' ') < 0) {
  27. return $2 === '' ? $1 + className + $3 : $1 + $2 + ' ' + className + $3;
  28. } else {
  29. return $0;
  30. }
  31. });
  32. } else {
  33. tag = tag.substr(0, tag.length - 1) + ' class="' + className + '">';
  34. }
  35. return tag;
  36. }
  37. function _formatCss(css) {
  38. var str = '';
  39. _each(_getCssList(css), function(key, val) {
  40. str += key + ':' + val + ';';
  41. });
  42. return str;
  43. }
  44. function _formatUrl(url, mode, host, pathname) {
  45. mode = _undef(mode, '').toLowerCase();
  46. // 移除连续斜线,比如,http://localhost/upload/file/201205//maincus.swf
  47. // base64 data 除外
  48. if (url.substr(0, 5) != 'data:') {
  49. url = url.replace(/([^:])\/\//g, '$1/');
  50. }
  51. if (_inArray(mode, ['absolute', 'relative', 'domain']) < 0) {
  52. return url;
  53. }
  54. host = host || location.protocol + '//' + location.host;
  55. if (pathname === undefined) {
  56. var m = location.pathname.match(/^(\/.*)\//);
  57. pathname = m ? m[1] : '';
  58. }
  59. var match;
  60. if ((match = /^(\w+:\/\/[^\/]*)/.exec(url))) {
  61. if (match[1] !== host) {
  62. return url;
  63. }
  64. } else if (/^\w+:/.test(url)) {
  65. return url;
  66. }
  67. function getRealPath(path) {
  68. var parts = path.split('/'), paths = [];
  69. for (var i = 0, len = parts.length; i < len; i++) {
  70. var part = parts[i];
  71. if (part == '..') {
  72. if (paths.length > 0) {
  73. paths.pop();
  74. }
  75. } else if (part !== '' && part != '.') {
  76. paths.push(part);
  77. }
  78. }
  79. return '/' + paths.join('/');
  80. }
  81. if (/^\//.test(url)) {
  82. url = host + getRealPath(url.substr(1));
  83. } else if (!/^\w+:\/\//.test(url)) {
  84. url = host + getRealPath(pathname + '/' + url);
  85. }
  86. function getRelativePath(path, depth) {
  87. if (url.substr(0, path.length) === path) {
  88. var arr = [];
  89. for (var i = 0; i < depth; i++) {
  90. arr.push('..');
  91. }
  92. var prefix = '.';
  93. if (arr.length > 0) {
  94. prefix += '/' + arr.join('/');
  95. }
  96. if (pathname == '/') {
  97. prefix += '/';
  98. }
  99. return prefix + url.substr(path.length);
  100. } else {
  101. if ((match = /^(.*)\//.exec(path))) {
  102. return getRelativePath(match[1], ++depth);
  103. }
  104. }
  105. }
  106. if (mode === 'relative') {
  107. url = getRelativePath(host + pathname, 0).substr(2);
  108. } else if (mode === 'absolute') {
  109. if (url.substr(0, host.length) === host) {
  110. url = url.substr(host.length);
  111. }
  112. }
  113. return url;
  114. }
  115. function _formatHtml(html, htmlTags, urlType, wellFormatted, indentChar) {
  116. // null or undefined: object == null
  117. if (html == null) {
  118. html = '';
  119. }
  120. urlType = urlType || '';
  121. wellFormatted = _undef(wellFormatted, false);
  122. indentChar = _undef(indentChar, '\t');
  123. var fontSizeList = 'xx-small,x-small,small,medium,large,x-large,xx-large'.split(',');
  124. // 将pre里的br转换成\n
  125. html = html.replace(/(<(?:pre|pre\s[^>]*)>)([\s\S]*?)(<\/pre>)/ig, function($0, $1, $2, $3) {
  126. return $1 + $2.replace(/<(?:br|br\s[^>]*)>/ig, '\n') + $3;
  127. });
  128. // <br/></p> to </p>
  129. html = html.replace(/<(?:br|br\s[^>]*)\s*\/?>\s*<\/p>/ig, '</p>');
  130. // <p></p> to <p><br /></p>
  131. html = html.replace(/(<(?:p|p\s[^>]*)>)\s*(<\/p>)/ig, '$1<br />$2');
  132. // empty char
  133. html = html.replace(/\u200B/g, '');
  134. // &copy;
  135. html = html.replace(/\u00A9/g, '&copy;');
  136. // &reg;
  137. html = html.replace(/\u00AE/g, '&reg;');
  138. // Bugfix:
  139. // https://github.com/kindsoft/kindeditor/issues/147
  140. html = html.replace(/\u2003/g, '&emsp;');
  141. html = html.replace(/\u3000/g, '&emsp;');
  142. // Bugfix:
  143. // https://github.com/kindsoft/kindeditor/issues/116
  144. // https://github.com/kindsoft/kindeditor/issues/145
  145. html = html.replace(/<[^>]+/g, function($0) {
  146. return $0.replace(/\s+/g, ' ');
  147. });
  148. var htmlTagMap = {};
  149. if (htmlTags) {
  150. // 展开htmlTags里的key
  151. _each(htmlTags, function(key, val) {
  152. var arr = key.split(',');
  153. for (var i = 0, len = arr.length; i < len; i++) {
  154. htmlTagMap[arr[i]] = _toMap(val);
  155. }
  156. });
  157. // 删除script和style里的内容
  158. if (!htmlTagMap.script) {
  159. html = html.replace(/(<(?:script|script\s[^>]*)>)([\s\S]*?)(<\/script>)/ig, '');
  160. }
  161. if (!htmlTagMap.style) {
  162. html = html.replace(/(<(?:style|style\s[^>]*)>)([\s\S]*?)(<\/style>)/ig, '');
  163. }
  164. }
  165. var re = /(\s*)<(\/)?([\w\-:]+)((?:\s+|(?:\s+[\w\-:]+)|(?:\s+[\w\-:]+=[^\s"'<>]+)|(?:\s+[\w\-:"]+="[^"]*")|(?:\s+[\w\-:"]+='[^']*'))*)(\/)?>(\s*)/g;
  166. var tagStack = [];
  167. html = html.replace(re, function($0, $1, $2, $3, $4, $5, $6) {
  168. var full = $0,
  169. startNewline = $1 || '',
  170. startSlash = $2 || '',
  171. tagName = $3.toLowerCase(),
  172. attr = $4 || '',
  173. endSlash = $5 ? ' ' + $5 : '',
  174. endNewline = $6 || '';
  175. // 不在名单里的过滤掉
  176. if (htmlTags && !htmlTagMap[tagName]) {
  177. return '';
  178. }
  179. // 无闭合标签的自动添加斜线
  180. if (endSlash === '' && _SINGLE_TAG_MAP[tagName]) {
  181. endSlash = ' /';
  182. }
  183. // inline tag时自动将多个空白转换成一个空格
  184. if (_INLINE_TAG_MAP[tagName]) {
  185. if (startNewline) {
  186. startNewline = ' ';
  187. }
  188. if (endNewline) {
  189. endNewline = ' ';
  190. }
  191. }
  192. // pre,style,script tag的格式化
  193. if (_PRE_TAG_MAP[tagName]) {
  194. if (startSlash) {
  195. endNewline = '\n';
  196. } else {
  197. startNewline = '\n';
  198. }
  199. }
  200. // br tag
  201. if (wellFormatted && tagName == 'br') {
  202. endNewline = '\n';
  203. }
  204. // block tag的格式化
  205. if (_BLOCK_TAG_MAP[tagName] && !_PRE_TAG_MAP[tagName]) {
  206. if (wellFormatted) {
  207. if (startSlash && tagStack.length > 0 && tagStack[tagStack.length - 1] === tagName) {
  208. tagStack.pop();
  209. } else {
  210. tagStack.push(tagName);
  211. }
  212. startNewline = '\n';
  213. endNewline = '\n';
  214. for (var i = 0, len = startSlash ? tagStack.length : tagStack.length - 1; i < len; i++) {
  215. startNewline += indentChar;
  216. if (!startSlash) {
  217. endNewline += indentChar;
  218. }
  219. }
  220. if (endSlash) {
  221. tagStack.pop();
  222. } else if (!startSlash) {
  223. endNewline += indentChar;
  224. }
  225. } else {
  226. startNewline = endNewline = '';
  227. }
  228. }
  229. if (attr !== '') {
  230. var attrMap = _getAttrList(full);
  231. // 将font tag转换成span tag
  232. if (tagName === 'font') {
  233. var fontStyleMap = {}, fontStyle = '';
  234. _each(attrMap, function(key, val) {
  235. if (key === 'color') {
  236. fontStyleMap.color = val;
  237. delete attrMap[key];
  238. }
  239. if (key === 'size') {
  240. fontStyleMap['font-size'] = fontSizeList[parseInt(val, 10) - 1] || '';
  241. delete attrMap[key];
  242. }
  243. if (key === 'face') {
  244. fontStyleMap['font-family'] = val;
  245. delete attrMap[key];
  246. }
  247. if (key === 'style') {
  248. fontStyle = val;
  249. }
  250. });
  251. if (fontStyle && !/;$/.test(fontStyle)) {
  252. fontStyle += ';';
  253. }
  254. _each(fontStyleMap, function(key, val) {
  255. if (val === '') {
  256. return;
  257. }
  258. if (/\s/.test(val)) {
  259. val = "'" + val + "'";
  260. }
  261. fontStyle += key + ':' + val + ';';
  262. });
  263. attrMap.style = fontStyle;
  264. }
  265. // 处理attribute和style
  266. _each(attrMap, function(key, val) {
  267. // 补全单独属性
  268. if (_FILL_ATTR_MAP[key]) {
  269. attrMap[key] = key;
  270. }
  271. // 处理URL
  272. if (_inArray(key, ['src', 'href']) >= 0) {
  273. attrMap[key] = _formatUrl(val, urlType);
  274. }
  275. // 过滤属性
  276. if (htmlTags && key !== 'style' && !htmlTagMap[tagName]['*'] && !htmlTagMap[tagName][key] ||
  277. tagName === 'body' && key === 'contenteditable' ||
  278. /^kindeditor_\d+$/.test(key)) {
  279. delete attrMap[key];
  280. }
  281. if (key === 'style' && val !== '') {
  282. var styleMap = _getCssList(val);
  283. _each(styleMap, function(k, v) {
  284. // 过滤样式
  285. if (htmlTags && !htmlTagMap[tagName].style && !htmlTagMap[tagName]['.' + k]) {
  286. delete styleMap[k];
  287. }
  288. });
  289. var style = '';
  290. _each(styleMap, function(k, v) {
  291. style += k + ':' + v + ';';
  292. });
  293. attrMap.style = style;
  294. }
  295. });
  296. attr = '';
  297. _each(attrMap, function(key, val) {
  298. if (key === 'style' && val === '') {
  299. return;
  300. }
  301. val = val.replace(/"/g, '&quot;');
  302. attr += ' ' + key + '="' + val + '"';
  303. });
  304. }
  305. if (tagName === 'font') {
  306. tagName = 'span';
  307. }
  308. return startNewline + '<' + startSlash + tagName + attr + endSlash + '>' + endNewline;
  309. });
  310. // 将pre里的\n转换成 临时标签 + \n,防止被替换
  311. html = html.replace(/(<(?:pre|pre\s[^>]*)>)([\s\S]*?)(<\/pre>)/ig, function($0, $1, $2, $3) {
  312. return $1 + $2.replace(/\n/g, '<span id="__kindeditor_pre_newline__">\n') + $3;
  313. });
  314. html = html.replace(/\n\s*\n/g, '\n');
  315. // 删除临时标签
  316. html = html.replace(/<span id="__kindeditor_pre_newline__">\n/g, '\n');
  317. return _trim(html);
  318. }
  319. // 清理MS Word专用标签
  320. function _clearMsWord(html, htmlTags) {
  321. html = html.replace(/<meta[\s\S]*?>/ig, '')
  322. .replace(/<![\s\S]*?>/ig, '')
  323. .replace(/<style[^>]*>[\s\S]*?<\/style>/ig, '')
  324. .replace(/<script[^>]*>[\s\S]*?<\/script>/ig, '')
  325. .replace(/<w:[^>]+>[\s\S]*?<\/w:[^>]+>/ig, '')
  326. .replace(/<o:[^>]+>[\s\S]*?<\/o:[^>]+>/ig, '')
  327. .replace(/<xml>[\s\S]*?<\/xml>/ig, '')
  328. .replace(/<(?:table|td)[^>]*>/ig, function(full) {
  329. return full.replace(/border-bottom:([#\w\s]+)/ig, 'border:$1');
  330. });
  331. return _formatHtml(html, htmlTags);
  332. }
  333. // 根据URL判断 media type
  334. function _mediaType(src) {
  335. if (/\.(rm|rmvb)(\?|$)/i.test(src)) {
  336. return 'audio/x-pn-realaudio-plugin';
  337. }
  338. if (/\.(swf|flv)(\?|$)/i.test(src)) {
  339. return 'application/x-shockwave-flash';
  340. }
  341. return 'video/x-ms-asf-plugin';
  342. }
  343. // 根据 media type取得className
  344. function _mediaClass(type) {
  345. if (/realaudio/i.test(type)) {
  346. return 'ke-rm';
  347. }
  348. if (/flash/i.test(type)) {
  349. return 'ke-flash';
  350. }
  351. return 'ke-media';
  352. }
  353. function _mediaAttrs(srcTag) {
  354. return _getAttrList(unescape(srcTag));
  355. }
  356. function _mediaEmbed(attrs) {
  357. var html = '<embed ';
  358. _each(attrs, function(key, val) {
  359. html += key + '="' + val + '" ';
  360. });
  361. html += '/>';
  362. return html;
  363. }
  364. function _mediaImg(blankPath, attrs) {
  365. var width = attrs.width,
  366. height = attrs.height,
  367. type = attrs.type || _mediaType(attrs.src),
  368. srcTag = _mediaEmbed(attrs),
  369. style = '';
  370. if (/\D/.test(width)) {
  371. style += 'width:' + width + ';';
  372. } else if (width > 0) {
  373. style += 'width:' + width + 'px;';
  374. }
  375. if (/\D/.test(height)) {
  376. style += 'height:' + height + ';';
  377. } else if (height > 0) {
  378. style += 'height:' + height + 'px;';
  379. }
  380. var html = '<img class="' + _mediaClass(type) + '" src="' + blankPath + '" ';
  381. if (style !== '') {
  382. html += 'style="' + style + '" ';
  383. }
  384. html += 'data-ke-tag="' + escape(srcTag) + '" alt="" />';
  385. return html;
  386. }
  387. // Simple JavaScript Templating
  388. // John Resig - http://ejohn.org/ - MIT Licensed
  389. // http://ejohn.org/blog/javascript-micro-templating/
  390. function _tmpl(str, data) {
  391. // Figure out if we're getting a template, or if we need to
  392. // load the template - and be sure to cache the result.
  393. var fn = new Function("obj",
  394. "var p=[],print=function(){p.push.apply(p,arguments);};" +
  395. // Introduce the data as local variables using with(){}
  396. "with(obj){p.push('" +
  397. // Convert the template into pure JavaScript
  398. str.replace(/[\r\t\n]/g, " ")
  399. .split("<%").join("\t")
  400. .replace(/((^|%>)[^\t]*)'/g, "$1\r")
  401. .replace(/\t=(.*?)%>/g, "',$1,'")
  402. .split("\t").join("');")
  403. .split("%>").join("p.push('")
  404. .split("\r").join("\\'") + "');}return p.join('');");
  405. // Provide some basic currying to the user
  406. return data ? fn(data) : fn;
  407. }
  408. K.formatUrl = _formatUrl;
  409. K.formatHtml = _formatHtml;
  410. K.getCssList = _getCssList;
  411. K.getAttrList = _getAttrList;
  412. K.mediaType = _mediaType;
  413. K.mediaAttrs = _mediaAttrs;
  414. K.mediaEmbed = _mediaEmbed;
  415. K.mediaImg = _mediaImg;
  416. K.clearMsWord = _clearMsWord;
  417. K.tmpl = _tmpl;