'use strict' ;
var obsidian = require ( 'obsidian' ) ;
var path = require ( 'path' ) ;
function _interopDefaultLegacy ( e ) { return e && typeof e === 'object' && 'default' in e ? e : { 'default' : e } ; }
var path _ _default = /*#__PURE__*/ _interopDefaultLegacy ( path ) ;
/ * ! * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Copyright ( c ) Microsoft Corporation .
Permission to use , copy , modify , and / or distribute this software for any
purpose with or without fee is hereby granted .
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS . IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL , DIRECT ,
INDIRECT , OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE , DATA OR PROFITS , WHETHER IN AN ACTION OF CONTRACT , NEGLIGENCE OR
OTHER TORTIOUS ACTION , ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * /
function _ _awaiter ( thisArg , _arguments , P , generator ) {
function adopt ( value ) { return value instanceof P ? value : new P ( function ( resolve ) { resolve ( value ) ; } ) ; }
return new ( P || ( P = Promise ) ) ( function ( resolve , reject ) {
function fulfilled ( value ) { try { step ( generator . next ( value ) ) ; } catch ( e ) { reject ( e ) ; } }
function rejected ( value ) { try { step ( generator [ "throw" ] ( value ) ) ; } catch ( e ) { reject ( e ) ; } }
function step ( result ) { result . done ? resolve ( result . value ) : adopt ( result . value ) . then ( fulfilled , rejected ) ; }
step ( ( generator = generator . apply ( thisArg , _arguments || [ ] ) ) . next ( ) ) ;
} ) ;
}
function downloadImage ( url ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const res = yield fetch ( url ) ;
return {
fileContent : yield res . arrayBuffer ( ) ,
fileExtension : url . slice ( url . lastIndexOf ( '.' ) ) ,
} ;
} ) ;
}
/ * *
* Open or create a folderpath if it does not exist
* @ param vault
* @ param folderpath
* /
function checkAndCreateFolder ( vault , folderpath ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
folderpath = obsidian . normalizePath ( folderpath ) ;
const folder = vault . getAbstractFileByPath ( folderpath ) ;
if ( folder && folder instanceof obsidian . TFolder ) {
return ;
}
yield vault . createFolder ( folderpath ) ;
} ) ;
}
function isValidUrl ( url ) {
try {
new URL ( url ) ;
}
catch ( e ) {
return false ;
}
return true ;
}
function getBaseUrl ( url , prefix ) {
const dir = '/' ;
const urlAsArray = url . split ( dir ) ;
const doubleSlashIndex = url . indexOf ( '://' ) ;
if ( doubleSlashIndex !== - 1 && doubleSlashIndex === url . indexOf ( dir ) - 1 ) {
urlAsArray . length = 3 ;
let url = urlAsArray . join ( dir ) ;
if ( prefix !== undefined )
url = url . replace ( /http:\/\/|https:\/\// , prefix ) ;
return url ;
}
else {
const pointIndex = url . indexOf ( '.' ) ;
if ( pointIndex !== - 1 && pointIndex !== 0 ) {
return ( prefix !== undefined ? prefix : 'https://' ) + urlAsArray [ 0 ] ;
}
}
}
function normalizeFilename ( fileName ) {
const illegalSymbols = [ ':' , '#' , '/' , '\\' , '|' , '?' , '*' , '<' , '>' , '"' ] ;
if ( illegalSymbols . some ( ( el ) => fileName . contains ( el ) ) ) {
illegalSymbols . forEach ( ( ilSymbol ) => {
fileName = fileName . replace ( ilSymbol , '' ) ;
} ) ;
return fileName ;
}
else {
return fileName ;
}
}
function pathJoin ( dir , subpath ) {
const result = path _ _default [ "default" ] . join ( dir , subpath ) ;
// it seems that obsidian do not understand paths with backslashes in Windows, so turn them into forward slashes
return obsidian . normalizePath ( result . replace ( /\\/g , '/' ) ) ;
}
var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : { } ;
var sparkMd5 = { exports : { } } ;
( function ( module , exports ) {
( function ( factory ) {
{
// Node/CommonJS
module . exports = factory ( ) ;
}
} ( function ( undefined $1 ) {
/ *
* Fastest md5 implementation around ( JKM md5 ) .
* Credits : Joseph Myers
*
* @ see http : //www.myersdaily.org/joseph/javascript/md5-text.html
* @ see http : //jsperf.com/md5-shootout/7
* /
/ * t h i s f u n c t i o n i s m u c h f a s t e r ,
so if possible we use it . Some IEs
are the only ones I know of that
need the idiotic second function ,
generated by an if clause . * /
var hex _chr = [ '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' ] ;
function md5cycle ( x , k ) {
var a = x [ 0 ] ,
b = x [ 1 ] ,
c = x [ 2 ] ,
d = x [ 3 ] ;
a += ( b & c | ~ b & d ) + k [ 0 ] - 680876936 | 0 ;
a = ( a << 7 | a >>> 25 ) + b | 0 ;
d += ( a & b | ~ a & c ) + k [ 1 ] - 389564586 | 0 ;
d = ( d << 12 | d >>> 20 ) + a | 0 ;
c += ( d & a | ~ d & b ) + k [ 2 ] + 606105819 | 0 ;
c = ( c << 17 | c >>> 15 ) + d | 0 ;
b += ( c & d | ~ c & a ) + k [ 3 ] - 1044525330 | 0 ;
b = ( b << 22 | b >>> 10 ) + c | 0 ;
a += ( b & c | ~ b & d ) + k [ 4 ] - 176418897 | 0 ;
a = ( a << 7 | a >>> 25 ) + b | 0 ;
d += ( a & b | ~ a & c ) + k [ 5 ] + 1200080426 | 0 ;
d = ( d << 12 | d >>> 20 ) + a | 0 ;
c += ( d & a | ~ d & b ) + k [ 6 ] - 1473231341 | 0 ;
c = ( c << 17 | c >>> 15 ) + d | 0 ;
b += ( c & d | ~ c & a ) + k [ 7 ] - 45705983 | 0 ;
b = ( b << 22 | b >>> 10 ) + c | 0 ;
a += ( b & c | ~ b & d ) + k [ 8 ] + 1770035416 | 0 ;
a = ( a << 7 | a >>> 25 ) + b | 0 ;
d += ( a & b | ~ a & c ) + k [ 9 ] - 1958414417 | 0 ;
d = ( d << 12 | d >>> 20 ) + a | 0 ;
c += ( d & a | ~ d & b ) + k [ 10 ] - 42063 | 0 ;
c = ( c << 17 | c >>> 15 ) + d | 0 ;
b += ( c & d | ~ c & a ) + k [ 11 ] - 1990404162 | 0 ;
b = ( b << 22 | b >>> 10 ) + c | 0 ;
a += ( b & c | ~ b & d ) + k [ 12 ] + 1804603682 | 0 ;
a = ( a << 7 | a >>> 25 ) + b | 0 ;
d += ( a & b | ~ a & c ) + k [ 13 ] - 40341101 | 0 ;
d = ( d << 12 | d >>> 20 ) + a | 0 ;
c += ( d & a | ~ d & b ) + k [ 14 ] - 1502002290 | 0 ;
c = ( c << 17 | c >>> 15 ) + d | 0 ;
b += ( c & d | ~ c & a ) + k [ 15 ] + 1236535329 | 0 ;
b = ( b << 22 | b >>> 10 ) + c | 0 ;
a += ( b & d | c & ~ d ) + k [ 1 ] - 165796510 | 0 ;
a = ( a << 5 | a >>> 27 ) + b | 0 ;
d += ( a & c | b & ~ c ) + k [ 6 ] - 1069501632 | 0 ;
d = ( d << 9 | d >>> 23 ) + a | 0 ;
c += ( d & b | a & ~ b ) + k [ 11 ] + 643717713 | 0 ;
c = ( c << 14 | c >>> 18 ) + d | 0 ;
b += ( c & a | d & ~ a ) + k [ 0 ] - 373897302 | 0 ;
b = ( b << 20 | b >>> 12 ) + c | 0 ;
a += ( b & d | c & ~ d ) + k [ 5 ] - 701558691 | 0 ;
a = ( a << 5 | a >>> 27 ) + b | 0 ;
d += ( a & c | b & ~ c ) + k [ 10 ] + 38016083 | 0 ;
d = ( d << 9 | d >>> 23 ) + a | 0 ;
c += ( d & b | a & ~ b ) + k [ 15 ] - 660478335 | 0 ;
c = ( c << 14 | c >>> 18 ) + d | 0 ;
b += ( c & a | d & ~ a ) + k [ 4 ] - 405537848 | 0 ;
b = ( b << 20 | b >>> 12 ) + c | 0 ;
a += ( b & d | c & ~ d ) + k [ 9 ] + 568446438 | 0 ;
a = ( a << 5 | a >>> 27 ) + b | 0 ;
d += ( a & c | b & ~ c ) + k [ 14 ] - 1019803690 | 0 ;
d = ( d << 9 | d >>> 23 ) + a | 0 ;
c += ( d & b | a & ~ b ) + k [ 3 ] - 187363961 | 0 ;
c = ( c << 14 | c >>> 18 ) + d | 0 ;
b += ( c & a | d & ~ a ) + k [ 8 ] + 1163531501 | 0 ;
b = ( b << 20 | b >>> 12 ) + c | 0 ;
a += ( b & d | c & ~ d ) + k [ 13 ] - 1444681467 | 0 ;
a = ( a << 5 | a >>> 27 ) + b | 0 ;
d += ( a & c | b & ~ c ) + k [ 2 ] - 51403784 | 0 ;
d = ( d << 9 | d >>> 23 ) + a | 0 ;
c += ( d & b | a & ~ b ) + k [ 7 ] + 1735328473 | 0 ;
c = ( c << 14 | c >>> 18 ) + d | 0 ;
b += ( c & a | d & ~ a ) + k [ 12 ] - 1926607734 | 0 ;
b = ( b << 20 | b >>> 12 ) + c | 0 ;
a += ( b ^ c ^ d ) + k [ 5 ] - 378558 | 0 ;
a = ( a << 4 | a >>> 28 ) + b | 0 ;
d += ( a ^ b ^ c ) + k [ 8 ] - 2022574463 | 0 ;
d = ( d << 11 | d >>> 21 ) + a | 0 ;
c += ( d ^ a ^ b ) + k [ 11 ] + 1839030562 | 0 ;
c = ( c << 16 | c >>> 16 ) + d | 0 ;
b += ( c ^ d ^ a ) + k [ 14 ] - 35309556 | 0 ;
b = ( b << 23 | b >>> 9 ) + c | 0 ;
a += ( b ^ c ^ d ) + k [ 1 ] - 1530992060 | 0 ;
a = ( a << 4 | a >>> 28 ) + b | 0 ;
d += ( a ^ b ^ c ) + k [ 4 ] + 1272893353 | 0 ;
d = ( d << 11 | d >>> 21 ) + a | 0 ;
c += ( d ^ a ^ b ) + k [ 7 ] - 155497632 | 0 ;
c = ( c << 16 | c >>> 16 ) + d | 0 ;
b += ( c ^ d ^ a ) + k [ 10 ] - 1094730640 | 0 ;
b = ( b << 23 | b >>> 9 ) + c | 0 ;
a += ( b ^ c ^ d ) + k [ 13 ] + 681279174 | 0 ;
a = ( a << 4 | a >>> 28 ) + b | 0 ;
d += ( a ^ b ^ c ) + k [ 0 ] - 358537222 | 0 ;
d = ( d << 11 | d >>> 21 ) + a | 0 ;
c += ( d ^ a ^ b ) + k [ 3 ] - 722521979 | 0 ;
c = ( c << 16 | c >>> 16 ) + d | 0 ;
b += ( c ^ d ^ a ) + k [ 6 ] + 76029189 | 0 ;
b = ( b << 23 | b >>> 9 ) + c | 0 ;
a += ( b ^ c ^ d ) + k [ 9 ] - 640364487 | 0 ;
a = ( a << 4 | a >>> 28 ) + b | 0 ;
d += ( a ^ b ^ c ) + k [ 12 ] - 421815835 | 0 ;
d = ( d << 11 | d >>> 21 ) + a | 0 ;
c += ( d ^ a ^ b ) + k [ 15 ] + 530742520 | 0 ;
c = ( c << 16 | c >>> 16 ) + d | 0 ;
b += ( c ^ d ^ a ) + k [ 2 ] - 995338651 | 0 ;
b = ( b << 23 | b >>> 9 ) + c | 0 ;
a += ( c ^ ( b | ~ d ) ) + k [ 0 ] - 198630844 | 0 ;
a = ( a << 6 | a >>> 26 ) + b | 0 ;
d += ( b ^ ( a | ~ c ) ) + k [ 7 ] + 1126891415 | 0 ;
d = ( d << 10 | d >>> 22 ) + a | 0 ;
c += ( a ^ ( d | ~ b ) ) + k [ 14 ] - 1416354905 | 0 ;
c = ( c << 15 | c >>> 17 ) + d | 0 ;
b += ( d ^ ( c | ~ a ) ) + k [ 5 ] - 57434055 | 0 ;
b = ( b << 21 | b >>> 11 ) + c | 0 ;
a += ( c ^ ( b | ~ d ) ) + k [ 12 ] + 1700485571 | 0 ;
a = ( a << 6 | a >>> 26 ) + b | 0 ;
d += ( b ^ ( a | ~ c ) ) + k [ 3 ] - 1894986606 | 0 ;
d = ( d << 10 | d >>> 22 ) + a | 0 ;
c += ( a ^ ( d | ~ b ) ) + k [ 10 ] - 1051523 | 0 ;
c = ( c << 15 | c >>> 17 ) + d | 0 ;
b += ( d ^ ( c | ~ a ) ) + k [ 1 ] - 2054922799 | 0 ;
b = ( b << 21 | b >>> 11 ) + c | 0 ;
a += ( c ^ ( b | ~ d ) ) + k [ 8 ] + 1873313359 | 0 ;
a = ( a << 6 | a >>> 26 ) + b | 0 ;
d += ( b ^ ( a | ~ c ) ) + k [ 15 ] - 30611744 | 0 ;
d = ( d << 10 | d >>> 22 ) + a | 0 ;
c += ( a ^ ( d | ~ b ) ) + k [ 6 ] - 1560198380 | 0 ;
c = ( c << 15 | c >>> 17 ) + d | 0 ;
b += ( d ^ ( c | ~ a ) ) + k [ 13 ] + 1309151649 | 0 ;
b = ( b << 21 | b >>> 11 ) + c | 0 ;
a += ( c ^ ( b | ~ d ) ) + k [ 4 ] - 145523070 | 0 ;
a = ( a << 6 | a >>> 26 ) + b | 0 ;
d += ( b ^ ( a | ~ c ) ) + k [ 11 ] - 1120210379 | 0 ;
d = ( d << 10 | d >>> 22 ) + a | 0 ;
c += ( a ^ ( d | ~ b ) ) + k [ 2 ] + 718787259 | 0 ;
c = ( c << 15 | c >>> 17 ) + d | 0 ;
b += ( d ^ ( c | ~ a ) ) + k [ 9 ] - 343485551 | 0 ;
b = ( b << 21 | b >>> 11 ) + c | 0 ;
x [ 0 ] = a + x [ 0 ] | 0 ;
x [ 1 ] = b + x [ 1 ] | 0 ;
x [ 2 ] = c + x [ 2 ] | 0 ;
x [ 3 ] = d + x [ 3 ] | 0 ;
}
function md5blk ( s ) {
var md5blks = [ ] ,
i ; /* Andy King said do it this way. */
for ( i = 0 ; i < 64 ; i += 4 ) {
md5blks [ i >> 2 ] = s . charCodeAt ( i ) + ( s . charCodeAt ( i + 1 ) << 8 ) + ( s . charCodeAt ( i + 2 ) << 16 ) + ( s . charCodeAt ( i + 3 ) << 24 ) ;
}
return md5blks ;
}
function md5blk _array ( a ) {
var md5blks = [ ] ,
i ; /* Andy King said do it this way. */
for ( i = 0 ; i < 64 ; i += 4 ) {
md5blks [ i >> 2 ] = a [ i ] + ( a [ i + 1 ] << 8 ) + ( a [ i + 2 ] << 16 ) + ( a [ i + 3 ] << 24 ) ;
}
return md5blks ;
}
function md51 ( s ) {
var n = s . length ,
state = [ 1732584193 , - 271733879 , - 1732584194 , 271733878 ] ,
i ,
length ,
tail ,
tmp ,
lo ,
hi ;
for ( i = 64 ; i <= n ; i += 64 ) {
md5cycle ( state , md5blk ( s . substring ( i - 64 , i ) ) ) ;
}
s = s . substring ( i - 64 ) ;
length = s . length ;
tail = [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ;
for ( i = 0 ; i < length ; i += 1 ) {
tail [ i >> 2 ] |= s . charCodeAt ( i ) << ( ( i % 4 ) << 3 ) ;
}
tail [ i >> 2 ] |= 0x80 << ( ( i % 4 ) << 3 ) ;
if ( i > 55 ) {
md5cycle ( state , tail ) ;
for ( i = 0 ; i < 16 ; i += 1 ) {
tail [ i ] = 0 ;
}
}
// Beware that the final length might not fit in 32 bits so we take care of that
tmp = n * 8 ;
tmp = tmp . toString ( 16 ) . match ( /(.*?)(.{0,8})$/ ) ;
lo = parseInt ( tmp [ 2 ] , 16 ) ;
hi = parseInt ( tmp [ 1 ] , 16 ) || 0 ;
tail [ 14 ] = lo ;
tail [ 15 ] = hi ;
md5cycle ( state , tail ) ;
return state ;
}
function md51 _array ( a ) {
var n = a . length ,
state = [ 1732584193 , - 271733879 , - 1732584194 , 271733878 ] ,
i ,
length ,
tail ,
tmp ,
lo ,
hi ;
for ( i = 64 ; i <= n ; i += 64 ) {
md5cycle ( state , md5blk _array ( a . subarray ( i - 64 , i ) ) ) ;
}
// Not sure if it is a bug, however IE10 will always produce a sub array of length 1
// containing the last element of the parent array if the sub array specified starts
// beyond the length of the parent array - weird.
// https://connect.microsoft.com/IE/feedback/details/771452/typed-array-subarray-issue
a = ( i - 64 ) < n ? a . subarray ( i - 64 ) : new Uint8Array ( 0 ) ;
length = a . length ;
tail = [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ;
for ( i = 0 ; i < length ; i += 1 ) {
tail [ i >> 2 ] |= a [ i ] << ( ( i % 4 ) << 3 ) ;
}
tail [ i >> 2 ] |= 0x80 << ( ( i % 4 ) << 3 ) ;
if ( i > 55 ) {
md5cycle ( state , tail ) ;
for ( i = 0 ; i < 16 ; i += 1 ) {
tail [ i ] = 0 ;
}
}
// Beware that the final length might not fit in 32 bits so we take care of that
tmp = n * 8 ;
tmp = tmp . toString ( 16 ) . match ( /(.*?)(.{0,8})$/ ) ;
lo = parseInt ( tmp [ 2 ] , 16 ) ;
hi = parseInt ( tmp [ 1 ] , 16 ) || 0 ;
tail [ 14 ] = lo ;
tail [ 15 ] = hi ;
md5cycle ( state , tail ) ;
return state ;
}
function rhex ( n ) {
var s = '' ,
j ;
for ( j = 0 ; j < 4 ; j += 1 ) {
s += hex _chr [ ( n >> ( j * 8 + 4 ) ) & 0x0F ] + hex _chr [ ( n >> ( j * 8 ) ) & 0x0F ] ;
}
return s ;
}
function hex ( x ) {
var i ;
for ( i = 0 ; i < x . length ; i += 1 ) {
x [ i ] = rhex ( x [ i ] ) ;
}
return x . join ( '' ) ;
}
// In some cases the fast add32 function cannot be used..
if ( hex ( md51 ( 'hello' ) ) !== '5d41402abc4b2a76b9719d911017c592' ) ;
// ---------------------------------------------------
/ * *
* ArrayBuffer slice polyfill .
*
* @ see https : //github.com/ttaubert/node-arraybuffer-slice
* /
if ( typeof ArrayBuffer !== 'undefined' && ! ArrayBuffer . prototype . slice ) {
( function ( ) {
function clamp ( val , length ) {
val = ( val | 0 ) || 0 ;
if ( val < 0 ) {
return Math . max ( val + length , 0 ) ;
}
return Math . min ( val , length ) ;
}
ArrayBuffer . prototype . slice = function ( from , to ) {
var length = this . byteLength ,
begin = clamp ( from , length ) ,
end = length ,
num ,
target ,
targetArray ,
sourceArray ;
if ( to !== undefined $1 ) {
end = clamp ( to , length ) ;
}
if ( begin > end ) {
return new ArrayBuffer ( 0 ) ;
}
num = end - begin ;
target = new ArrayBuffer ( num ) ;
targetArray = new Uint8Array ( target ) ;
sourceArray = new Uint8Array ( this , begin , num ) ;
targetArray . set ( sourceArray ) ;
return target ;
} ;
} ) ( ) ;
}
// ---------------------------------------------------
/ * *
* Helpers .
* /
function toUtf8 ( str ) {
if ( /[\u0080-\uFFFF]/ . test ( str ) ) {
str = unescape ( encodeURIComponent ( str ) ) ;
}
return str ;
}
function utf8Str2ArrayBuffer ( str , returnUInt8Array ) {
var length = str . length ,
buff = new ArrayBuffer ( length ) ,
arr = new Uint8Array ( buff ) ,
i ;
for ( i = 0 ; i < length ; i += 1 ) {
arr [ i ] = str . charCodeAt ( i ) ;
}
return returnUInt8Array ? arr : buff ;
}
function arrayBuffer2Utf8Str ( buff ) {
return String . fromCharCode . apply ( null , new Uint8Array ( buff ) ) ;
}
function concatenateArrayBuffers ( first , second , returnUInt8Array ) {
var result = new Uint8Array ( first . byteLength + second . byteLength ) ;
result . set ( new Uint8Array ( first ) ) ;
result . set ( new Uint8Array ( second ) , first . byteLength ) ;
return returnUInt8Array ? result : result . buffer ;
}
function hexToBinaryString ( hex ) {
var bytes = [ ] ,
length = hex . length ,
x ;
for ( x = 0 ; x < length - 1 ; x += 2 ) {
bytes . push ( parseInt ( hex . substr ( x , 2 ) , 16 ) ) ;
}
return String . fromCharCode . apply ( String , bytes ) ;
}
// ---------------------------------------------------
/ * *
* SparkMD5 OOP implementation .
*
* Use this class to perform an incremental md5 , otherwise use the
* static methods instead .
* /
function SparkMD5 ( ) {
// call reset to init the instance
this . reset ( ) ;
}
/ * *
* Appends a string .
* A conversion will be applied if an utf8 string is detected .
*
* @ param { String } str The string to be appended
*
* @ return { SparkMD5 } The instance itself
* /
SparkMD5 . prototype . append = function ( str ) {
// Converts the string to utf8 bytes if necessary
// Then append as binary
this . appendBinary ( toUtf8 ( str ) ) ;
return this ;
} ;
/ * *
* Appends a binary string .
*
* @ param { String } contents The binary string to be appended
*
* @ return { SparkMD5 } The instance itself
* /
SparkMD5 . prototype . appendBinary = function ( contents ) {
this . _buff += contents ;
this . _length += contents . length ;
var length = this . _buff . length ,
i ;
for ( i = 64 ; i <= length ; i += 64 ) {
md5cycle ( this . _hash , md5blk ( this . _buff . substring ( i - 64 , i ) ) ) ;
}
this . _buff = this . _buff . substring ( i - 64 ) ;
return this ;
} ;
/ * *
* Finishes the incremental computation , reseting the internal state and
* returning the result .
*
* @ param { Boolean } raw True to get the raw string , false to get the hex string
*
* @ return { String } The result
* /
SparkMD5 . prototype . end = function ( raw ) {
var buff = this . _buff ,
length = buff . length ,
i ,
tail = [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ,
ret ;
for ( i = 0 ; i < length ; i += 1 ) {
tail [ i >> 2 ] |= buff . charCodeAt ( i ) << ( ( i % 4 ) << 3 ) ;
}
this . _finish ( tail , length ) ;
ret = hex ( this . _hash ) ;
if ( raw ) {
ret = hexToBinaryString ( ret ) ;
}
this . reset ( ) ;
return ret ;
} ;
/ * *
* Resets the internal state of the computation .
*
* @ return { SparkMD5 } The instance itself
* /
SparkMD5 . prototype . reset = function ( ) {
this . _buff = '' ;
this . _length = 0 ;
this . _hash = [ 1732584193 , - 271733879 , - 1732584194 , 271733878 ] ;
return this ;
} ;
/ * *
* Gets the internal state of the computation .
*
* @ return { Object } The state
* /
SparkMD5 . prototype . getState = function ( ) {
return {
buff : this . _buff ,
length : this . _length ,
hash : this . _hash . slice ( )
} ;
} ;
/ * *
* Gets the internal state of the computation .
*
* @ param { Object } state The state
*
* @ return { SparkMD5 } The instance itself
* /
SparkMD5 . prototype . setState = function ( state ) {
this . _buff = state . buff ;
this . _length = state . length ;
this . _hash = state . hash ;
return this ;
} ;
/ * *
* Releases memory used by the incremental buffer and other additional
* resources . If you plan to use the instance again , use reset instead .
* /
SparkMD5 . prototype . destroy = function ( ) {
delete this . _hash ;
delete this . _buff ;
delete this . _length ;
} ;
/ * *
* Finish the final calculation based on the tail .
*
* @ param { Array } tail The tail ( will be modified )
* @ param { Number } length The length of the remaining buffer
* /
SparkMD5 . prototype . _finish = function ( tail , length ) {
var i = length ,
tmp ,
lo ,
hi ;
tail [ i >> 2 ] |= 0x80 << ( ( i % 4 ) << 3 ) ;
if ( i > 55 ) {
md5cycle ( this . _hash , tail ) ;
for ( i = 0 ; i < 16 ; i += 1 ) {
tail [ i ] = 0 ;
}
}
// Do the final computation based on the tail and length
// Beware that the final length may not fit in 32 bits so we take care of that
tmp = this . _length * 8 ;
tmp = tmp . toString ( 16 ) . match ( /(.*?)(.{0,8})$/ ) ;
lo = parseInt ( tmp [ 2 ] , 16 ) ;
hi = parseInt ( tmp [ 1 ] , 16 ) || 0 ;
tail [ 14 ] = lo ;
tail [ 15 ] = hi ;
md5cycle ( this . _hash , tail ) ;
} ;
/ * *
* Performs the md5 hash on a string .
* A conversion will be applied if utf8 string is detected .
*
* @ param { String } str The string
* @ param { Boolean } [ raw ] True to get the raw string , false to get the hex string
*
* @ return { String } The result
* /
SparkMD5 . hash = function ( str , raw ) {
// Converts the string to utf8 bytes if necessary
// Then compute it using the binary function
return SparkMD5 . hashBinary ( toUtf8 ( str ) , raw ) ;
} ;
/ * *
* Performs the md5 hash on a binary string .
*
* @ param { String } content The binary string
* @ param { Boolean } [ raw ] True to get the raw string , false to get the hex string
*
* @ return { String } The result
* /
SparkMD5 . hashBinary = function ( content , raw ) {
var hash = md51 ( content ) ,
ret = hex ( hash ) ;
return raw ? hexToBinaryString ( ret ) : ret ;
} ;
// ---------------------------------------------------
/ * *
* SparkMD5 OOP implementation for array buffers .
*
* Use this class to perform an incremental md5 ONLY for array buffers .
* /
SparkMD5 . ArrayBuffer = function ( ) {
// call reset to init the instance
this . reset ( ) ;
} ;
/ * *
* Appends an array buffer .
*
* @ param { ArrayBuffer } arr The array to be appended
*
* @ return { SparkMD5 . ArrayBuffer } The instance itself
* /
SparkMD5 . ArrayBuffer . prototype . append = function ( arr ) {
var buff = concatenateArrayBuffers ( this . _buff . buffer , arr , true ) ,
length = buff . length ,
i ;
this . _length += arr . byteLength ;
for ( i = 64 ; i <= length ; i += 64 ) {
md5cycle ( this . _hash , md5blk _array ( buff . subarray ( i - 64 , i ) ) ) ;
}
this . _buff = ( i - 64 ) < length ? new Uint8Array ( buff . buffer . slice ( i - 64 ) ) : new Uint8Array ( 0 ) ;
return this ;
} ;
/ * *
* Finishes the incremental computation , reseting the internal state and
* returning the result .
*
* @ param { Boolean } raw True to get the raw string , false to get the hex string
*
* @ return { String } The result
* /
SparkMD5 . ArrayBuffer . prototype . end = function ( raw ) {
var buff = this . _buff ,
length = buff . length ,
tail = [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ,
i ,
ret ;
for ( i = 0 ; i < length ; i += 1 ) {
tail [ i >> 2 ] |= buff [ i ] << ( ( i % 4 ) << 3 ) ;
}
this . _finish ( tail , length ) ;
ret = hex ( this . _hash ) ;
if ( raw ) {
ret = hexToBinaryString ( ret ) ;
}
this . reset ( ) ;
return ret ;
} ;
/ * *
* Resets the internal state of the computation .
*
* @ return { SparkMD5 . ArrayBuffer } The instance itself
* /
SparkMD5 . ArrayBuffer . prototype . reset = function ( ) {
this . _buff = new Uint8Array ( 0 ) ;
this . _length = 0 ;
this . _hash = [ 1732584193 , - 271733879 , - 1732584194 , 271733878 ] ;
return this ;
} ;
/ * *
* Gets the internal state of the computation .
*
* @ return { Object } The state
* /
SparkMD5 . ArrayBuffer . prototype . getState = function ( ) {
var state = SparkMD5 . prototype . getState . call ( this ) ;
// Convert buffer to a string
state . buff = arrayBuffer2Utf8Str ( state . buff ) ;
return state ;
} ;
/ * *
* Gets the internal state of the computation .
*
* @ param { Object } state The state
*
* @ return { SparkMD5 . ArrayBuffer } The instance itself
* /
SparkMD5 . ArrayBuffer . prototype . setState = function ( state ) {
// Convert string to buffer
state . buff = utf8Str2ArrayBuffer ( state . buff , true ) ;
return SparkMD5 . prototype . setState . call ( this , state ) ;
} ;
SparkMD5 . ArrayBuffer . prototype . destroy = SparkMD5 . prototype . destroy ;
SparkMD5 . ArrayBuffer . prototype . _finish = SparkMD5 . prototype . _finish ;
/ * *
* Performs the md5 hash on an array buffer .
*
* @ param { ArrayBuffer } arr The array buffer
* @ param { Boolean } [ raw ] True to get the raw string , false to get the hex one
*
* @ return { String } The result
* /
SparkMD5 . ArrayBuffer . hash = function ( arr , raw ) {
var hash = md51 _array ( new Uint8Array ( arr ) ) ,
ret = hex ( hash ) ;
return raw ? hexToBinaryString ( ret ) : ret ;
} ;
return SparkMD5 ;
} ) ) ;
} ( sparkMd5 ) ) ;
class LinkHashes {
constructor ( ) {
this . linksInfo = { } ;
}
ensureHashGenerated ( link , data ) {
if ( ! this . linksInfo [ link ] ) {
this . linksInfo [ link ] = sparkMd5 . exports . ArrayBuffer . hash ( data ) ;
}
}
isSame ( link , data ) {
const fileHash = sparkMd5 . exports . ArrayBuffer . hash ( data ) ;
return this . linksInfo [ link ] == fileHash ;
}
}
const linkHashes = new LinkHashes ( ) ;
const EXTERNAL _MEDIA _LINK _PATTERN = /\!\[(?<anchor>.*?)\]\((?<link>.+?)\)/g ;
function replaceImages ( app , content , assetsDir ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
return yield replaceAsync ( content , EXTERNAL _MEDIA _LINK _PATTERN , imageTagProcessor ( app , assetsDir ) ) ;
} ) ;
}
function replaceAsync ( string , searchValue , replacer ) {
try {
if ( typeof replacer === 'function' ) {
// 1. Run fake pass of `replace`, collect values from `replacer` calls
// 2. Resolve them with `Promise.all`
// 3. Run `replace` with resolved values
const values = [ ] ;
String . prototype . replace . call ( string , searchValue , function ( ) {
values . push ( replacer . apply ( undefined , arguments ) ) ;
return '' ;
} ) ;
return Promise . all ( values ) . then ( function ( resolvedValues ) {
return String . prototype . replace . call ( string , searchValue , function ( ) {
return resolvedValues . shift ( ) ;
} ) ;
} ) ;
}
else {
return Promise . resolve ( String . prototype . replace . call ( string , searchValue , replacer ) ) ;
}
}
catch ( error ) {
return Promise . reject ( error ) ;
}
}
const FILENAME _ATTEMPTS = 5 ;
function imageTagProcessor ( app , mediaDir ) {
return function processImageTag ( match , anchor , link ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
if ( ! isValidUrl ( link ) ) {
return match ;
}
yield checkAndCreateFolder ( app . vault , mediaDir ) ;
try {
const { fileContent , fileExtension } = yield downloadImage ( link ) ;
let attempt = 0 ;
while ( attempt < FILENAME _ATTEMPTS ) {
try {
const { fileName , needWrite } = yield chooseFileName ( app . vault . adapter , mediaDir , anchor , link , fileContent , fileExtension ) ;
if ( needWrite && fileName ) {
yield app . vault . createBinary ( fileName , fileContent ) ;
}
if ( fileName ) {
const maskedFilename = fileName . replace ( /\s/g , '%20' ) ;
return ` ![ ${ anchor } ]( ${ maskedFilename } ) ` ;
}
else {
return match ;
}
}
catch ( error ) {
if ( error . message === 'File already exists.' ) {
attempt ++ ;
}
else {
throw error ;
}
}
}
return match ;
}
catch ( error ) {
console . warn ( 'Image processing failed: ' , error ) ;
return match ;
}
} ) ;
} ;
}
const FILENAME _TEMPLATE = 'media' ;
const MAX _FILENAME _INDEX = 1000 ;
function chooseFileName ( adapter , dir , baseName , link , contentData , fileExtension ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
if ( ! fileExtension ) {
return { fileName : '' , needWrite : false } ;
}
// if there is no anchor try get file name from url
if ( ! baseName ) {
const parsedUrl = new URL ( link ) ;
baseName = path . basename ( parsedUrl . pathname ) ;
}
// if there is no part for file name from url use name template
if ( ! baseName ) {
baseName = FILENAME _TEMPLATE ;
}
// if filename already ends with correct extension, remove it to work with base name
if ( baseName . endsWith ( ` . ${ fileExtension } ` ) ) {
baseName = baseName . slice ( 0 , - 1 * ( fileExtension . length + 1 ) ) ;
}
baseName = normalizeFilename ( baseName ) ;
let fileName = '' ;
let needWrite = true ;
let index = 0 ;
while ( ! fileName && index < MAX _FILENAME _INDEX ) {
const suggestedName = index
? pathJoin ( dir , ` ${ baseName } - ${ index } . ${ fileExtension } ` )
: pathJoin ( dir , ` ${ baseName } . ${ fileExtension } ` ) ;
if ( yield adapter . exists ( suggestedName , false ) ) {
linkHashes . ensureHashGenerated ( link , contentData ) ;
const fileData = yield adapter . readBinary ( suggestedName ) ;
if ( linkHashes . isSame ( link , fileData ) ) {
fileName = suggestedName ;
needWrite = false ;
}
}
else {
fileName = suggestedName ;
}
index ++ ;
}
if ( ! fileName ) {
throw new Error ( 'Failed to generate file name for media file.' ) ;
}
linkHashes . ensureHashGenerated ( link , contentData ) ;
return { fileName , needWrite } ;
} ) ;
}
const DEFAULT _SETTINGS = {
inboxDir : 'ReadItLater Inbox' ,
assetsDir : 'ReadItLater Inbox/assets' ,
openNewNote : false ,
youtubeNoteTitle : 'Youtube - %title%' ,
youtubeNote : ` [[ReadItLater]] [[Youtube]] \n \n # [%videoTitle%](%videoURL%) \n \n %videoPlayer% ` ,
twitterNoteTitle : 'Tweet from %tweetAuthorName% (%date%)' ,
twitterNote : ` [[ReadItLater]] [[Tweet]] \n \n # [%tweetAuthorName%](%tweetURL%) \n \n %tweetContent% ` ,
parseableArticleNoteTitle : '%title%' ,
parsableArticleNote : ` [[ReadItLater]] [[Article]] \n \n # [%articleTitle%](%articleURL%) \n \n %articleContent% ` ,
notParseableArticleNoteTitle : 'Article %date%' ,
notParsableArticleNote : ` [[ReadItLater]] [[Article]] \n \n [%articleURL%](%articleURL%) ` ,
textSnippetNoteTitle : 'Notice %date%' ,
textSnippetNote : ` [[ReadItLater]] [[Textsnippet]] \n \n %content% ` ,
downloadImages : true ,
} ;
class Note {
constructor ( fileName , content ) {
this . fileName = fileName ;
this . content = content ;
}
}
class Parser {
constructor ( app , settings ) {
this . app = app ;
this . settings = settings ;
}
isValidUrl ( url ) {
try {
new URL ( url ) ;
}
catch ( e ) {
return false ;
}
return true ;
}
getFormattedDateForFilename ( ) {
const date = new Date ( ) ;
return obsidian . moment ( date ) . format ( 'YYYY-MM-DD HH-mm-ss' ) ;
}
}
class YoutubeParser extends Parser {
constructor ( app , settings ) {
super ( app , settings ) ;
this . PATTERN = /(youtube.com|youtu.be)\/(watch)?(\?v=)?(\S+)?/ ;
}
test ( url ) {
return this . isValidUrl ( url ) && this . PATTERN . test ( url ) ;
}
prepareNote ( url ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const response = yield obsidian . request ( { method : 'GET' , url } ) ;
const videoTitle = new DOMParser ( ) . parseFromString ( response , 'text/html' ) . title ;
const videoId = this . PATTERN . exec ( url ) [ 4 ] ;
const videoPlayer = ` <iframe width="560" height="315" src="https://www.youtube.com/embed/ ${ videoId } " title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> ` ;
const content = this . settings . youtubeNote
. replace ( /%videoTitle%/g , videoTitle )
. replace ( /%videoURL%/g , url )
. replace ( /%videoId%/g , videoId )
. replace ( /%videoPlayer%/g , videoPlayer ) ;
const fileNameTemplate = this . settings . youtubeNoteTitle . replace ( /%title%/g , videoTitle ) ;
const fileName = ` ${ fileNameTemplate } .md ` ;
return new Note ( fileName , content ) ;
} ) ;
}
}
function extend ( destination ) {
for ( var i = 1 ; i < arguments . length ; i ++ ) {
var source = arguments [ i ] ;
for ( var key in source ) {
if ( source . hasOwnProperty ( key ) ) destination [ key ] = source [ key ] ;
}
}
return destination
}
function repeat ( character , count ) {
return Array ( count + 1 ) . join ( character )
}
function trimLeadingNewlines ( string ) {
return string . replace ( /^\n*/ , '' )
}
function trimTrailingNewlines ( string ) {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string . length ;
while ( indexEnd > 0 && string [ indexEnd - 1 ] === '\n' ) indexEnd -- ;
return string . substring ( 0 , indexEnd )
}
var blockElements = [
'ADDRESS' , 'ARTICLE' , 'ASIDE' , 'AUDIO' , 'BLOCKQUOTE' , 'BODY' , 'CANVAS' ,
'CENTER' , 'DD' , 'DIR' , 'DIV' , 'DL' , 'DT' , 'FIELDSET' , 'FIGCAPTION' , 'FIGURE' ,
'FOOTER' , 'FORM' , 'FRAMESET' , 'H1' , 'H2' , 'H3' , 'H4' , 'H5' , 'H6' , 'HEADER' ,
'HGROUP' , 'HR' , 'HTML' , 'ISINDEX' , 'LI' , 'MAIN' , 'MENU' , 'NAV' , 'NOFRAMES' ,
'NOSCRIPT' , 'OL' , 'OUTPUT' , 'P' , 'PRE' , 'SECTION' , 'TABLE' , 'TBODY' , 'TD' ,
'TFOOT' , 'TH' , 'THEAD' , 'TR' , 'UL'
] ;
function isBlock ( node ) {
return is ( node , blockElements )
}
var voidElements = [
'AREA' , 'BASE' , 'BR' , 'COL' , 'COMMAND' , 'EMBED' , 'HR' , 'IMG' , 'INPUT' ,
'KEYGEN' , 'LINK' , 'META' , 'PARAM' , 'SOURCE' , 'TRACK' , 'WBR'
] ;
function isVoid ( node ) {
return is ( node , voidElements )
}
function hasVoid ( node ) {
return has ( node , voidElements )
}
var meaningfulWhenBlankElements = [
'A' , 'TABLE' , 'THEAD' , 'TBODY' , 'TFOOT' , 'TH' , 'TD' , 'IFRAME' , 'SCRIPT' ,
'AUDIO' , 'VIDEO'
] ;
function isMeaningfulWhenBlank ( node ) {
return is ( node , meaningfulWhenBlankElements )
}
function hasMeaningfulWhenBlank ( node ) {
return has ( node , meaningfulWhenBlankElements )
}
function is ( node , tagNames ) {
return tagNames . indexOf ( node . nodeName ) >= 0
}
function has ( node , tagNames ) {
return (
node . getElementsByTagName &&
tagNames . some ( function ( tagName ) {
return node . getElementsByTagName ( tagName ) . length
} )
)
}
var rules$1 = { } ;
rules$1 . paragraph = {
filter : 'p' ,
replacement : function ( content ) {
return '\n\n' + content + '\n\n'
}
} ;
rules$1 . lineBreak = {
filter : 'br' ,
replacement : function ( content , node , options ) {
return options . br + '\n'
}
} ;
rules$1 . heading = {
filter : [ 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' ] ,
replacement : function ( content , node , options ) {
var hLevel = Number ( node . nodeName . charAt ( 1 ) ) ;
if ( options . headingStyle === 'setext' && hLevel < 3 ) {
var underline = repeat ( ( hLevel === 1 ? '=' : '-' ) , content . length ) ;
return (
'\n\n' + content + '\n' + underline + '\n\n'
)
} else {
return '\n\n' + repeat ( '#' , hLevel ) + ' ' + content + '\n\n'
}
}
} ;
rules$1 . blockquote = {
filter : 'blockquote' ,
replacement : function ( content ) {
content = content . replace ( /^\n+|\n+$/g , '' ) ;
content = content . replace ( /^/gm , '> ' ) ;
return '\n\n' + content + '\n\n'
}
} ;
rules$1 . list = {
filter : [ 'ul' , 'ol' ] ,
replacement : function ( content , node ) {
var parent = node . parentNode ;
if ( parent . nodeName === 'LI' && parent . lastElementChild === node ) {
return '\n' + content
} else {
return '\n\n' + content + '\n\n'
}
}
} ;
rules$1 . listItem = {
filter : 'li' ,
replacement : function ( content , node , options ) {
content = content
. replace ( /^\n+/ , '' ) // remove leading newlines
. replace ( /\n+$/ , '\n' ) // replace trailing newlines with just a single one
. replace ( /\n/gm , '\n ' ) ; // indent
var prefix = options . bulletListMarker + ' ' ;
var parent = node . parentNode ;
if ( parent . nodeName === 'OL' ) {
var start = parent . getAttribute ( 'start' ) ;
var index = Array . prototype . indexOf . call ( parent . children , node ) ;
prefix = ( start ? Number ( start ) + index : index + 1 ) + '. ' ;
}
return (
prefix + content + ( node . nextSibling && ! /\n$/ . test ( content ) ? '\n' : '' )
)
}
} ;
rules$1 . indentedCodeBlock = {
filter : function ( node , options ) {
return (
options . codeBlockStyle === 'indented' &&
node . nodeName === 'PRE' &&
node . firstChild &&
node . firstChild . nodeName === 'CODE'
)
} ,
replacement : function ( content , node , options ) {
return (
'\n\n ' +
node . firstChild . textContent . replace ( /\n/g , '\n ' ) +
'\n\n'
)
}
} ;
rules$1 . fencedCodeBlock = {
filter : function ( node , options ) {
return (
options . codeBlockStyle === 'fenced' &&
node . nodeName === 'PRE' &&
node . firstChild &&
node . firstChild . nodeName === 'CODE'
)
} ,
replacement : function ( content , node , options ) {
var className = node . firstChild . getAttribute ( 'class' ) || '' ;
var language = ( className . match ( /language-(\S+)/ ) || [ null , '' ] ) [ 1 ] ;
var code = node . firstChild . textContent ;
var fenceChar = options . fence . charAt ( 0 ) ;
var fenceSize = 3 ;
var fenceInCodeRegex = new RegExp ( '^' + fenceChar + '{3,}' , 'gm' ) ;
var match ;
while ( ( match = fenceInCodeRegex . exec ( code ) ) ) {
if ( match [ 0 ] . length >= fenceSize ) {
fenceSize = match [ 0 ] . length + 1 ;
}
}
var fence = repeat ( fenceChar , fenceSize ) ;
return (
'\n\n' + fence + language + '\n' +
code . replace ( /\n$/ , '' ) +
'\n' + fence + '\n\n'
)
}
} ;
rules$1 . horizontalRule = {
filter : 'hr' ,
replacement : function ( content , node , options ) {
return '\n\n' + options . hr + '\n\n'
}
} ;
rules$1 . inlineLink = {
filter : function ( node , options ) {
return (
options . linkStyle === 'inlined' &&
node . nodeName === 'A' &&
node . getAttribute ( 'href' )
)
} ,
replacement : function ( content , node ) {
var href = node . getAttribute ( 'href' ) ;
var title = cleanAttribute ( node . getAttribute ( 'title' ) ) ;
if ( title ) title = ' "' + title + '"' ;
return '[' + content + '](' + href + title + ')'
}
} ;
rules$1 . referenceLink = {
filter : function ( node , options ) {
return (
options . linkStyle === 'referenced' &&
node . nodeName === 'A' &&
node . getAttribute ( 'href' )
)
} ,
replacement : function ( content , node , options ) {
var href = node . getAttribute ( 'href' ) ;
var title = cleanAttribute ( node . getAttribute ( 'title' ) ) ;
if ( title ) title = ' "' + title + '"' ;
var replacement ;
var reference ;
switch ( options . linkReferenceStyle ) {
case 'collapsed' :
replacement = '[' + content + '][]' ;
reference = '[' + content + ']: ' + href + title ;
break
case 'shortcut' :
replacement = '[' + content + ']' ;
reference = '[' + content + ']: ' + href + title ;
break
default :
var id = this . references . length + 1 ;
replacement = '[' + content + '][' + id + ']' ;
reference = '[' + id + ']: ' + href + title ;
}
this . references . push ( reference ) ;
return replacement
} ,
references : [ ] ,
append : function ( options ) {
var references = '' ;
if ( this . references . length ) {
references = '\n\n' + this . references . join ( '\n' ) + '\n\n' ;
this . references = [ ] ; // Reset references
}
return references
}
} ;
rules$1 . emphasis = {
filter : [ 'em' , 'i' ] ,
replacement : function ( content , node , options ) {
if ( ! content . trim ( ) ) return ''
return options . emDelimiter + content + options . emDelimiter
}
} ;
rules$1 . strong = {
filter : [ 'strong' , 'b' ] ,
replacement : function ( content , node , options ) {
if ( ! content . trim ( ) ) return ''
return options . strongDelimiter + content + options . strongDelimiter
}
} ;
rules$1 . code = {
filter : function ( node ) {
var hasSiblings = node . previousSibling || node . nextSibling ;
var isCodeBlock = node . parentNode . nodeName === 'PRE' && ! hasSiblings ;
return node . nodeName === 'CODE' && ! isCodeBlock
} ,
replacement : function ( content ) {
if ( ! content ) return ''
content = content . replace ( /\r?\n|\r/g , ' ' ) ;
var extraSpace = /^`|^ .*?[^ ].* $|`$/ . test ( content ) ? ' ' : '' ;
var delimiter = '`' ;
var matches = content . match ( /`+/gm ) || [ ] ;
while ( matches . indexOf ( delimiter ) !== - 1 ) delimiter = delimiter + '`' ;
return delimiter + extraSpace + content + extraSpace + delimiter
}
} ;
rules$1 . image = {
filter : 'img' ,
replacement : function ( content , node ) {
var alt = cleanAttribute ( node . getAttribute ( 'alt' ) ) ;
var src = node . getAttribute ( 'src' ) || '' ;
var title = cleanAttribute ( node . getAttribute ( 'title' ) ) ;
var titlePart = title ? ' "' + title + '"' : '' ;
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
} ;
function cleanAttribute ( attribute ) {
return attribute ? attribute . replace ( /(\n+\s*)+/g , '\n' ) : ''
}
/ * *
* Manages a collection of rules used to convert HTML to Markdown
* /
function Rules ( options ) {
this . options = options ;
this . _keep = [ ] ;
this . _remove = [ ] ;
this . blankRule = {
replacement : options . blankReplacement
} ;
this . keepReplacement = options . keepReplacement ;
this . defaultRule = {
replacement : options . defaultReplacement
} ;
this . array = [ ] ;
for ( var key in options . rules ) this . array . push ( options . rules [ key ] ) ;
}
Rules . prototype = {
add : function ( key , rule ) {
this . array . unshift ( rule ) ;
} ,
keep : function ( filter ) {
this . _keep . unshift ( {
filter : filter ,
replacement : this . keepReplacement
} ) ;
} ,
remove : function ( filter ) {
this . _remove . unshift ( {
filter : filter ,
replacement : function ( ) {
return ''
}
} ) ;
} ,
forNode : function ( node ) {
if ( node . isBlank ) return this . blankRule
var rule ;
if ( ( rule = findRule ( this . array , node , this . options ) ) ) return rule
if ( ( rule = findRule ( this . _keep , node , this . options ) ) ) return rule
if ( ( rule = findRule ( this . _remove , node , this . options ) ) ) return rule
return this . defaultRule
} ,
forEach : function ( fn ) {
for ( var i = 0 ; i < this . array . length ; i ++ ) fn ( this . array [ i ] , i ) ;
}
} ;
function findRule ( rules , node , options ) {
for ( var i = 0 ; i < rules . length ; i ++ ) {
var rule = rules [ i ] ;
if ( filterValue ( rule , node , options ) ) return rule
}
return void 0
}
function filterValue ( rule , node , options ) {
var filter = rule . filter ;
if ( typeof filter === 'string' ) {
if ( filter === node . nodeName . toLowerCase ( ) ) return true
} else if ( Array . isArray ( filter ) ) {
if ( filter . indexOf ( node . nodeName . toLowerCase ( ) ) > - 1 ) return true
} else if ( typeof filter === 'function' ) {
if ( filter . call ( rule , node , options ) ) return true
} else {
throw new TypeError ( '`filter` needs to be a string, array, or function' )
}
}
/ * *
* The collapseWhitespace function is adapted from collapse - whitespace
* by Luc Thevenard .
*
* The MIT License ( MIT )
*
* Copyright ( c ) 2014 Luc Thevenard < lucthevenard @ gmail . com >
*
* Permission is hereby granted , free of charge , to any person obtaining a copy
* of this software and associated documentation files ( the "Software" ) , to deal
* in the Software without restriction , including without limitation the rights
* to use , copy , modify , merge , publish , distribute , sublicense , and / or sell
* copies of the Software , and to permit persons to whom the Software is
* furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED "AS IS" , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
* LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING FROM ,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE .
* /
/ * *
* collapseWhitespace ( options ) removes extraneous whitespace from an the given element .
*
* @ param { Object } options
* /
function collapseWhitespace ( options ) {
var element = options . element ;
var isBlock = options . isBlock ;
var isVoid = options . isVoid ;
var isPre = options . isPre || function ( node ) {
return node . nodeName === 'PRE'
} ;
if ( ! element . firstChild || isPre ( element ) ) return
var prevText = null ;
var keepLeadingWs = false ;
var prev = null ;
var node = next ( prev , element , isPre ) ;
while ( node !== element ) {
if ( node . nodeType === 3 || node . nodeType === 4 ) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node . data . replace ( /[ \r\n\t]+/g , ' ' ) ;
if ( ( ! prevText || / $/ . test ( prevText . data ) ) &&
! keepLeadingWs && text [ 0 ] === ' ' ) {
text = text . substr ( 1 ) ;
}
// `text` might be empty at this point.
if ( ! text ) {
node = remove ( node ) ;
continue
}
node . data = text ;
prevText = node ;
} else if ( node . nodeType === 1 ) { // Node.ELEMENT_NODE
if ( isBlock ( node ) || node . nodeName === 'BR' ) {
if ( prevText ) {
prevText . data = prevText . data . replace ( / $/ , '' ) ;
}
prevText = null ;
keepLeadingWs = false ;
} else if ( isVoid ( node ) || isPre ( node ) ) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null ;
keepLeadingWs = true ;
} else if ( prevText ) {
// Drop protection if set previously.
keepLeadingWs = false ;
}
} else {
node = remove ( node ) ;
continue
}
var nextNode = next ( prev , node , isPre ) ;
prev = node ;
node = nextNode ;
}
if ( prevText ) {
prevText . data = prevText . data . replace ( / $/ , '' ) ;
if ( ! prevText . data ) {
remove ( prevText ) ;
}
}
}
/ * *
* remove ( node ) removes the given node from the DOM and returns the
* next node in the sequence .
*
* @ param { Node } node
* @ return { Node } node
* /
function remove ( node ) {
var next = node . nextSibling || node . parentNode ;
node . parentNode . removeChild ( node ) ;
return next
}
/ * *
* next ( prev , current , isPre ) returns the next node in the sequence , given the
* current and previous nodes .
*
* @ param { Node } prev
* @ param { Node } current
* @ param { Function } isPre
* @ return { Node }
* /
function next ( prev , current , isPre ) {
if ( ( prev && prev . parentNode === current ) || isPre ( current ) ) {
return current . nextSibling || current . parentNode
}
return current . firstChild || current . nextSibling || current . parentNode
}
/ *
* Set up window for Node . js
* /
var root = ( typeof window !== 'undefined' ? window : { } ) ;
/ *
* Parsing HTML strings
* /
function canParseHTMLNatively ( ) {
var Parser = root . DOMParser ;
var canParse = false ;
// Adapted from https://gist.github.com/1129031
// Firefox/Opera/IE throw errors on unsupported types
try {
// WebKit returns null on unsupported types
if ( new Parser ( ) . parseFromString ( '' , 'text/html' ) ) {
canParse = true ;
}
} catch ( e ) { }
return canParse
}
function createHTMLParser ( ) {
var Parser = function ( ) { } ;
{
if ( shouldUseActiveX ( ) ) {
Parser . prototype . parseFromString = function ( string ) {
var doc = new window . ActiveXObject ( 'htmlfile' ) ;
doc . designMode = 'on' ; // disable on-page scripts
doc . open ( ) ;
doc . write ( string ) ;
doc . close ( ) ;
return doc
} ;
} else {
Parser . prototype . parseFromString = function ( string ) {
var doc = document . implementation . createHTMLDocument ( '' ) ;
doc . open ( ) ;
doc . write ( string ) ;
doc . close ( ) ;
return doc
} ;
}
}
return Parser
}
function shouldUseActiveX ( ) {
var useActiveX = false ;
try {
document . implementation . createHTMLDocument ( '' ) . open ( ) ;
} catch ( e ) {
if ( window . ActiveXObject ) useActiveX = true ;
}
return useActiveX
}
var HTMLParser = canParseHTMLNatively ( ) ? root . DOMParser : createHTMLParser ( ) ;
function RootNode ( input , options ) {
var root ;
if ( typeof input === 'string' ) {
var doc = htmlParser ( ) . parseFromString (
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>' ,
'text/html'
) ;
root = doc . getElementById ( 'turndown-root' ) ;
} else {
root = input . cloneNode ( true ) ;
}
collapseWhitespace ( {
element : root ,
isBlock : isBlock ,
isVoid : isVoid ,
isPre : options . preformattedCode ? isPreOrCode : null
} ) ;
return root
}
var _htmlParser ;
function htmlParser ( ) {
_htmlParser = _htmlParser || new HTMLParser ( ) ;
return _htmlParser
}
function isPreOrCode ( node ) {
return node . nodeName === 'PRE' || node . nodeName === 'CODE'
}
function Node ( node , options ) {
node . isBlock = isBlock ( node ) ;
node . isCode = node . nodeName === 'CODE' || node . parentNode . isCode ;
node . isBlank = isBlank ( node ) ;
node . flankingWhitespace = flankingWhitespace ( node , options ) ;
return node
}
function isBlank ( node ) {
return (
! isVoid ( node ) &&
! isMeaningfulWhenBlank ( node ) &&
/^\s*$/i . test ( node . textContent ) &&
! hasVoid ( node ) &&
! hasMeaningfulWhenBlank ( node )
)
}
function flankingWhitespace ( node , options ) {
if ( node . isBlock || ( options . preformattedCode && node . isCode ) ) {
return { leading : '' , trailing : '' }
}
var edges = edgeWhitespace ( node . textContent ) ;
// abandon leading ASCII WS if left-flanked by ASCII WS
if ( edges . leadingAscii && isFlankedByWhitespace ( 'left' , node , options ) ) {
edges . leading = edges . leadingNonAscii ;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if ( edges . trailingAscii && isFlankedByWhitespace ( 'right' , node , options ) ) {
edges . trailing = edges . trailingNonAscii ;
}
return { leading : edges . leading , trailing : edges . trailing }
}
function edgeWhitespace ( string ) {
var m = string . match ( /^(([ \t\r\n]*)(\s*))[\s\S]*?((\s*?)([ \t\r\n]*))$/ ) ;
return {
leading : m [ 1 ] , // whole string for whitespace-only strings
leadingAscii : m [ 2 ] ,
leadingNonAscii : m [ 3 ] ,
trailing : m [ 4 ] , // empty for whitespace-only strings
trailingNonAscii : m [ 5 ] ,
trailingAscii : m [ 6 ]
}
}
function isFlankedByWhitespace ( side , node , options ) {
var sibling ;
var regExp ;
var isFlanked ;
if ( side === 'left' ) {
sibling = node . previousSibling ;
regExp = / $/ ;
} else {
sibling = node . nextSibling ;
regExp = /^ / ;
}
if ( sibling ) {
if ( sibling . nodeType === 3 ) {
isFlanked = regExp . test ( sibling . nodeValue ) ;
} else if ( options . preformattedCode && sibling . nodeName === 'CODE' ) {
isFlanked = false ;
} else if ( sibling . nodeType === 1 && ! isBlock ( sibling ) ) {
isFlanked = regExp . test ( sibling . textContent ) ;
}
}
return isFlanked
}
var reduce = Array . prototype . reduce ;
var escapes = [
[ /\\/g , '\\\\' ] ,
[ /\*/g , '\\*' ] ,
[ /^-/g , '\\-' ] ,
[ /^\+ /g , '\\+ ' ] ,
[ /^(=+)/g , '\\$1' ] ,
[ /^(#{1,6}) /g , '\\$1 ' ] ,
[ /`/g , '\\`' ] ,
[ /^~~~/g , '\\~~~' ] ,
[ /\[/g , '\\[' ] ,
[ /\]/g , '\\]' ] ,
[ /^>/g , '\\>' ] ,
[ /_/g , '\\_' ] ,
[ /^(\d+)\. /g , '$1\\. ' ]
] ;
function TurndownService ( options ) {
if ( ! ( this instanceof TurndownService ) ) return new TurndownService ( options )
var defaults = {
rules : rules$1 ,
headingStyle : 'setext' ,
hr : '* * *' ,
bulletListMarker : '*' ,
codeBlockStyle : 'indented' ,
fence : '```' ,
emDelimiter : '_' ,
strongDelimiter : '**' ,
linkStyle : 'inlined' ,
linkReferenceStyle : 'full' ,
br : ' ' ,
preformattedCode : false ,
blankReplacement : function ( content , node ) {
return node . isBlock ? '\n\n' : ''
} ,
keepReplacement : function ( content , node ) {
return node . isBlock ? '\n\n' + node . outerHTML + '\n\n' : node . outerHTML
} ,
defaultReplacement : function ( content , node ) {
return node . isBlock ? '\n\n' + content + '\n\n' : content
}
} ;
this . options = extend ( { } , defaults , options ) ;
this . rules = new Rules ( this . options ) ;
}
TurndownService . prototype = {
/ * *
* The entry point for converting a string or DOM node to Markdown
* @ public
* @ param { String | HTMLElement } input The string or DOM node to convert
* @ returns A Markdown representation of the input
* @ type String
* /
turndown : function ( input ) {
if ( ! canConvert ( input ) ) {
throw new TypeError (
input + ' is not a string, or an element/document/fragment node.'
)
}
if ( input === '' ) return ''
var output = process . call ( this , new RootNode ( input , this . options ) ) ;
return postProcess . call ( this , output )
} ,
/ * *
* Add one or more plugins
* @ public
* @ param { Function | Array } plugin The plugin or array of plugins to add
* @ returns The Turndown instance for chaining
* @ type Object
* /
use : function ( plugin ) {
if ( Array . isArray ( plugin ) ) {
for ( var i = 0 ; i < plugin . length ; i ++ ) this . use ( plugin [ i ] ) ;
} else if ( typeof plugin === 'function' ) {
plugin ( this ) ;
} else {
throw new TypeError ( 'plugin must be a Function or an Array of Functions' )
}
return this
} ,
/ * *
* Adds a rule
* @ public
* @ param { String } key The unique key of the rule
* @ param { Object } rule The rule
* @ returns The Turndown instance for chaining
* @ type Object
* /
addRule : function ( key , rule ) {
this . rules . add ( key , rule ) ;
return this
} ,
/ * *
* Keep a node ( as HTML ) that matches the filter
* @ public
* @ param { String | Array | Function } filter The unique key of the rule
* @ returns The Turndown instance for chaining
* @ type Object
* /
keep : function ( filter ) {
this . rules . keep ( filter ) ;
return this
} ,
/ * *
* Remove a node that matches the filter
* @ public
* @ param { String | Array | Function } filter The unique key of the rule
* @ returns The Turndown instance for chaining
* @ type Object
* /
remove : function ( filter ) {
this . rules . remove ( filter ) ;
return this
} ,
/ * *
* Escapes Markdown syntax
* @ public
* @ param { String } string The string to escape
* @ returns A string with Markdown syntax escaped
* @ type String
* /
escape : function ( string ) {
return escapes . reduce ( function ( accumulator , escape ) {
return accumulator . replace ( escape [ 0 ] , escape [ 1 ] )
} , string )
}
} ;
/ * *
* Reduces a DOM node down to its Markdown string equivalent
* @ private
* @ param { HTMLElement } parentNode The node to convert
* @ returns A Markdown representation of the node
* @ type String
* /
function process ( parentNode ) {
var self = this ;
return reduce . call ( parentNode . childNodes , function ( output , node ) {
node = new Node ( node , self . options ) ;
var replacement = '' ;
if ( node . nodeType === 3 ) {
replacement = node . isCode ? node . nodeValue : self . escape ( node . nodeValue ) ;
} else if ( node . nodeType === 1 ) {
replacement = replacementForNode . call ( self , node ) ;
}
return join ( output , replacement )
} , '' )
}
/ * *
* Appends strings as each rule requires and trims the output
* @ private
* @ param { String } output The conversion output
* @ returns A trimmed version of the ouput
* @ type String
* /
function postProcess ( output ) {
var self = this ;
this . rules . forEach ( function ( rule ) {
if ( typeof rule . append === 'function' ) {
output = join ( output , rule . append ( self . options ) ) ;
}
} ) ;
return output . replace ( /^[\t\r\n]+/ , '' ) . replace ( /[\t\r\n\s]+$/ , '' )
}
/ * *
* Converts an element node to its Markdown equivalent
* @ private
* @ param { HTMLElement } node The node to convert
* @ returns A Markdown representation of the node
* @ type String
* /
function replacementForNode ( node ) {
var rule = this . rules . forNode ( node ) ;
var content = process . call ( this , node ) ;
var whitespace = node . flankingWhitespace ;
if ( whitespace . leading || whitespace . trailing ) content = content . trim ( ) ;
return (
whitespace . leading +
rule . replacement ( content , node , this . options ) +
whitespace . trailing
)
}
/ * *
* Joins replacement to the current output with appropriate number of new lines
* @ private
* @ param { String } output The current conversion output
* @ param { String } replacement The string to append to the output
* @ returns Joined output
* @ type String
* /
function join ( output , replacement ) {
var s1 = trimTrailingNewlines ( output ) ;
var s2 = trimLeadingNewlines ( replacement ) ;
var nls = Math . max ( output . length - s1 . length , replacement . length - s2 . length ) ;
var separator = '\n\n' . substring ( 0 , nls ) ;
return s1 + separator + s2
}
/ * *
* Determines whether an input can be converted
* @ private
* @ param { String | HTMLElement } input Describe this parameter
* @ returns Describe what it returns
* @ type String | Object | Array | Boolean | Number
* /
function canConvert ( input ) {
return (
input != null && (
typeof input === 'string' ||
( input . nodeType && (
input . nodeType === 1 || input . nodeType === 9 || input . nodeType === 11
) )
)
)
}
var turndownPluginGfm _cjs = { } ;
Object . defineProperty ( turndownPluginGfm _cjs , '__esModule' , { value : true } ) ;
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/ ;
function highlightedCodeBlock ( turndownService ) {
turndownService . addRule ( 'highlightedCodeBlock' , {
filter : function ( node ) {
var firstChild = node . firstChild ;
return (
node . nodeName === 'DIV' &&
highlightRegExp . test ( node . className ) &&
firstChild &&
firstChild . nodeName === 'PRE'
)
} ,
replacement : function ( content , node , options ) {
var className = node . className || '' ;
var language = ( className . match ( highlightRegExp ) || [ null , '' ] ) [ 1 ] ;
return (
'\n\n' + options . fence + language + '\n' +
node . firstChild . textContent +
'\n' + options . fence + '\n\n'
)
}
} ) ;
}
function strikethrough ( turndownService ) {
turndownService . addRule ( 'strikethrough' , {
filter : [ 'del' , 's' , 'strike' ] ,
replacement : function ( content ) {
return '~' + content + '~'
}
} ) ;
}
var indexOf = Array . prototype . indexOf ;
var rules = { } ;
rules . tableCell = {
filter : [ 'th' , 'td' ] ,
replacement : function ( content , node ) {
return cell ( content , node ) + spannedCells ( node , '' )
}
} ;
rules . tableRow = {
filter : 'tr' ,
replacement : function ( content , node ) {
var borderCells = '' ;
var alignMap = { left : ':--' , right : '--:' , center : ':-:' } ;
if ( isHeadingRow ( node ) ) {
for ( var i = 0 ; i < node . childNodes . length ; i ++ ) {
var border = '---' ;
var align = (
node . childNodes [ i ] . getAttribute ( 'align' ) || ''
) . toLowerCase ( ) ;
if ( align ) border = alignMap [ align ] || border ;
borderCells += cell ( border , node . childNodes [ i ] ) + spannedCells ( node . childNodes [ i ] , border ) ;
}
}
return '\n' + content + ( borderCells ? '\n' + borderCells : '' )
}
} ;
rules . table = {
// Only convert tables that are not nested in another table, they are kept using `keep` (see below).
// TODO: nested tables should be converted to plain text in a strict (non HTML) gfm
filter : function ( node ) {
return node . nodeName === 'TABLE' && ! isNestedTable ( node )
} ,
replacement : function ( content ) {
// Ensure there are no blank lines
content = content . replace ( '\n\n' , '\n' ) ;
return '\n\n' + content + '\n\n'
}
} ;
rules . tableSection = {
filter : [ 'thead' , 'tbody' , 'tfoot' ] ,
replacement : function ( content ) {
return content
}
} ;
rules . captionSection = {
// only return content if caption if the first node immediately after TABLE
filter : 'caption' ,
replacement : function ( content , node ) {
if ( node . parentNode . nodeName === 'TABLE' && node . parentNode . childNodes [ 0 ] === node ) return content
return ''
}
} ;
function isHeadingRow ( tr ) {
var parentNode = tr . parentNode ;
var tableNode = parentNode ;
if ( parentNode . nodeName === 'THEAD' ||
parentNode . nodeName === 'TFOOT' ||
parentNode . nodeName === 'TBODY' ) {
tableNode = parentNode . parentNode ;
}
return ( tableNode . nodeName === 'TABLE' && tableNode . rows [ 0 ] === tr )
}
function cell ( content , node ) {
var index = indexOf . call ( node . parentNode . childNodes , node ) ;
var prefix = ' ' ;
if ( index === 0 ) prefix = '| ' ;
// Ensure single line per cell (both windows and unix EoL)
// TODO: allow gfm non-strict mode to replace new lines by `<br/>`
content = content . replace ( /\r\n/g , '\n' ) . replace ( /\n/g , ' ' ) ;
// | must be escaped as \|
content = content . replace ( /\|/g , '\\|' ) ;
return prefix + content + ' |'
}
function spannedCells ( node , spannedCellContent ) {
var colspan = node . getAttribute ( 'colspan' ) || 1 ;
if ( colspan <= 1 ) return ''
return ( ' ' + spannedCellContent + ' |' ) . repeat ( colspan - 1 )
}
function isNestedTable ( tableNode ) {
var currentNode = tableNode . parentNode ;
while ( currentNode ) {
if ( currentNode . nodeName === 'TABLE' ) return true
currentNode = currentNode . parentNode ;
}
return false
}
function tables ( turndownService ) {
turndownService . keep ( function ( node ) {
return node . nodeName === 'TABLE' && isNestedTable ( node )
} ) ;
for ( var key in rules ) turndownService . addRule ( key , rules [ key ] ) ;
}
function taskListItems ( turndownService ) {
turndownService . addRule ( 'taskListItems' , {
filter : function ( node ) {
return node . type === 'checkbox' && node . parentNode . nodeName === 'LI'
} ,
replacement : function ( content , node ) {
return ( node . checked ? '[x]' : '[ ]' ) + ' '
}
} ) ;
}
function gfm ( turndownService ) {
turndownService . use ( [
highlightedCodeBlock ,
strikethrough ,
tables ,
taskListItems
] ) ;
}
var gfm _1 = turndownPluginGfm _cjs . gfm = gfm ;
turndownPluginGfm _cjs . highlightedCodeBlock = highlightedCodeBlock ;
turndownPluginGfm _cjs . strikethrough = strikethrough ;
turndownPluginGfm _cjs . tables = tables ;
turndownPluginGfm _cjs . taskListItems = taskListItems ;
function parseHtmlContent ( content ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const gfm = gfm _1 ;
const turndownService = new TurndownService ( {
headingStyle : 'atx' ,
hr : '---' ,
bulletListMarker : '-' ,
codeBlockStyle : 'fenced' ,
emDelimiter : '*' ,
} ) ;
turndownService . use ( gfm ) ;
const articleContent = turndownService . turndown ( content ) ;
return articleContent ;
} ) ;
}
class TwitterParser extends Parser {
constructor ( app , settings ) {
super ( app , settings ) ;
this . PATTERN = /(https:\/\/twitter.com\/([a-zA-Z0-9_]+\/)([a-zA-Z0-9_]+\/[a-zA-Z0-9_]+))/ ;
}
test ( url ) {
return this . isValidUrl ( url ) && this . PATTERN . test ( url ) ;
}
prepareNote ( url ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const response = JSON . parse ( yield obsidian . request ( {
method : 'GET' ,
contentType : 'application/json' ,
url : ` https://publish.twitter.com/oembed?url= ${ url } ` ,
} ) ) ;
const tweetAuthorName = response . author _name ;
const content = yield parseHtmlContent ( response . html ) ;
const processedContent = this . settings . twitterNote
. replace ( /%tweetAuthorName%/g , tweetAuthorName )
. replace ( /%tweetURL%/g , response . url )
. replace ( /%tweetContent%/g , content ) ;
const fileNameTemplate = this . settings . twitterNoteTitle
. replace ( /%tweetAuthorName%/g , tweetAuthorName )
. replace ( /%date%/g , this . getFormattedDateForFilename ( ) ) ;
const fileName = ` ${ fileNameTemplate } .md ` ;
return new Note ( fileName , processedContent ) ;
} ) ;
}
}
var Readability$1 = { exports : { } } ;
/*eslint-env es6:false*/
( function ( module ) {
/ *
* Copyright ( c ) 2010 Arc90 Inc
*
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
/ *
* This code is heavily based on Arc90 ' s readability . js ( 1.7 . 1 ) script
* available at : http : //code.google.com/p/arc90labs-readability
* /
/ * *
* Public constructor .
* @ param { HTMLDocument } doc The document to parse .
* @ param { Object } options The options object .
* /
function Readability ( doc , options ) {
// In some older versions, people passed a URI as the first argument. Cope:
if ( options && options . documentElement ) {
doc = options ;
options = arguments [ 2 ] ;
} else if ( ! doc || ! doc . documentElement ) {
throw new Error ( "First argument to Readability constructor should be a document object." ) ;
}
options = options || { } ;
this . _doc = doc ;
this . _docJSDOMParser = this . _doc . firstChild . _ _JSDOMParser _ _ ;
this . _articleTitle = null ;
this . _articleByline = null ;
this . _articleDir = null ;
this . _articleSiteName = null ;
this . _attempts = [ ] ;
// Configurable options
this . _debug = ! ! options . debug ;
this . _maxElemsToParse = options . maxElemsToParse || this . DEFAULT _MAX _ELEMS _TO _PARSE ;
this . _nbTopCandidates = options . nbTopCandidates || this . DEFAULT _N _TOP _CANDIDATES ;
this . _charThreshold = options . charThreshold || this . DEFAULT _CHAR _THRESHOLD ;
this . _classesToPreserve = this . CLASSES _TO _PRESERVE . concat ( options . classesToPreserve || [ ] ) ;
this . _keepClasses = ! ! options . keepClasses ;
this . _serializer = options . serializer || function ( el ) {
return el . innerHTML ;
} ;
this . _disableJSONLD = ! ! options . disableJSONLD ;
// Start with all flags set
this . _flags = this . FLAG _STRIP _UNLIKELYS |
this . FLAG _WEIGHT _CLASSES |
this . FLAG _CLEAN _CONDITIONALLY ;
// Control whether log messages are sent to the console
if ( this . _debug ) {
let logNode = function ( node ) {
if ( node . nodeType == node . TEXT _NODE ) {
return ` ${ node . nodeName } (" ${ node . textContent } ") ` ;
}
let attrPairs = Array . from ( node . attributes || [ ] , function ( attr ) {
return ` ${ attr . name } =" ${ attr . value } " ` ;
} ) . join ( " " ) ;
return ` < ${ node . localName } ${ attrPairs } > ` ;
} ;
this . log = function ( ) {
if ( typeof dump !== "undefined" ) {
var msg = Array . prototype . map . call ( arguments , function ( x ) {
return ( x && x . nodeName ) ? logNode ( x ) : x ;
} ) . join ( " " ) ;
dump ( "Reader: (Readability) " + msg + "\n" ) ;
} else if ( typeof console !== "undefined" ) {
let args = Array . from ( arguments , arg => {
if ( arg && arg . nodeType == this . ELEMENT _NODE ) {
return logNode ( arg ) ;
}
return arg ;
} ) ;
args . unshift ( "Reader: (Readability)" ) ;
console . log . apply ( console , args ) ;
}
} ;
} else {
this . log = function ( ) { } ;
}
}
Readability . prototype = {
FLAG _STRIP _UNLIKELYS : 0x1 ,
FLAG _WEIGHT _CLASSES : 0x2 ,
FLAG _CLEAN _CONDITIONALLY : 0x4 ,
// https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType
ELEMENT _NODE : 1 ,
TEXT _NODE : 3 ,
// Max number of nodes supported by this parser. Default: 0 (no limit)
DEFAULT _MAX _ELEMS _TO _PARSE : 0 ,
// The number of top candidates to consider when analysing how
// tight the competition is among candidates.
DEFAULT _N _TOP _CANDIDATES : 5 ,
// Element tags to score by default.
DEFAULT _TAGS _TO _SCORE : "section,h2,h3,h4,h5,h6,p,td,pre" . toUpperCase ( ) . split ( "," ) ,
// The default number of chars an article must have in order to return a result
DEFAULT _CHAR _THRESHOLD : 500 ,
// All of the regular expressions in use within readability.
// Defined up here so we don't instantiate them repeatedly in loops.
REGEXPS : {
// NOTE: These two regular expressions are duplicated in
// Readability-readerable.js. Please keep both copies in sync.
unlikelyCandidates : /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i ,
okMaybeItsACandidate : /and|article|body|column|content|main|shadow/i ,
positive : /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i ,
negative : /-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i ,
extraneous : /print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i ,
byline : /byline|author|dateline|writtenby|p-author/i ,
replaceFonts : /<(\/?)font[^>]*>/gi ,
normalize : /\s{2,}/g ,
videos : /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i ,
shareElements : /(\b|_)(share|sharedaddy)(\b|_)/i ,
nextLink : /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i ,
prevLink : /(prev|earl|old|new|<|«)/i ,
tokenize : /\W+/g ,
whitespace : /^\s*$/ ,
hasContent : /\S$/ ,
hashUrl : /^#.+/ ,
srcsetUrl : /(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/g ,
b64DataUrl : /^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i ,
// See: https://schema.org/Article
jsonLdArticleTypes : /^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/
} ,
UNLIKELY _ROLES : [ "menu" , "menubar" , "complementary" , "navigation" , "alert" , "alertdialog" , "dialog" ] ,
DIV _TO _P _ELEMS : new Set ( [ "BLOCKQUOTE" , "DL" , "DIV" , "IMG" , "OL" , "P" , "PRE" , "TABLE" , "UL" ] ) ,
ALTER _TO _DIV _EXCEPTIONS : [ "DIV" , "ARTICLE" , "SECTION" , "P" ] ,
PRESENTATIONAL _ATTRIBUTES : [ "align" , "background" , "bgcolor" , "border" , "cellpadding" , "cellspacing" , "frame" , "hspace" , "rules" , "style" , "valign" , "vspace" ] ,
DEPRECATED _SIZE _ATTRIBUTE _ELEMS : [ "TABLE" , "TH" , "TD" , "HR" , "PRE" ] ,
// The commented out elements qualify as phrasing content but tend to be
// removed by readability when put into paragraphs, so we ignore them here.
PHRASING _ELEMS : [
// "CANVAS", "IFRAME", "SVG", "VIDEO",
"ABBR" , "AUDIO" , "B" , "BDO" , "BR" , "BUTTON" , "CITE" , "CODE" , "DATA" ,
"DATALIST" , "DFN" , "EM" , "EMBED" , "I" , "IMG" , "INPUT" , "KBD" , "LABEL" ,
"MARK" , "MATH" , "METER" , "NOSCRIPT" , "OBJECT" , "OUTPUT" , "PROGRESS" , "Q" ,
"RUBY" , "SAMP" , "SCRIPT" , "SELECT" , "SMALL" , "SPAN" , "STRONG" , "SUB" ,
"SUP" , "TEXTAREA" , "TIME" , "VAR" , "WBR"
] ,
// These are the classes that readability sets itself.
CLASSES _TO _PRESERVE : [ "page" ] ,
// These are the list of HTML entities that need to be escaped.
HTML _ESCAPE _MAP : {
"lt" : "<" ,
"gt" : ">" ,
"amp" : "&" ,
"quot" : '"' ,
"apos" : "'" ,
} ,
/ * *
* Run any post - process modifications to article content as necessary .
*
* @ param Element
* @ return void
* * /
_postProcessContent : function ( articleContent ) {
// Readability cannot open relative uris so we convert them to absolute uris.
this . _fixRelativeUris ( articleContent ) ;
this . _simplifyNestedElements ( articleContent ) ;
if ( ! this . _keepClasses ) {
// Remove classes.
this . _cleanClasses ( articleContent ) ;
}
} ,
/ * *
* Iterates over a NodeList , calls ` filterFn ` for each node and removes node
* if function returned ` true ` .
*
* If function is not passed , removes all the nodes in node list .
*
* @ param NodeList nodeList The nodes to operate on
* @ param Function filterFn the function to use as a filter
* @ return void
* /
_removeNodes : function ( nodeList , filterFn ) {
// Avoid ever operating on live node lists.
if ( this . _docJSDOMParser && nodeList . _isLiveNodeList ) {
throw new Error ( "Do not pass live node lists to _removeNodes" ) ;
}
for ( var i = nodeList . length - 1 ; i >= 0 ; i -- ) {
var node = nodeList [ i ] ;
var parentNode = node . parentNode ;
if ( parentNode ) {
if ( ! filterFn || filterFn . call ( this , node , i , nodeList ) ) {
parentNode . removeChild ( node ) ;
}
}
}
} ,
/ * *
* Iterates over a NodeList , and calls _setNodeTag for each node .
*
* @ param NodeList nodeList The nodes to operate on
* @ param String newTagName the new tag name to use
* @ return void
* /
_replaceNodeTags : function ( nodeList , newTagName ) {
// Avoid ever operating on live node lists.
if ( this . _docJSDOMParser && nodeList . _isLiveNodeList ) {
throw new Error ( "Do not pass live node lists to _replaceNodeTags" ) ;
}
for ( const node of nodeList ) {
this . _setNodeTag ( node , newTagName ) ;
}
} ,
/ * *
* Iterate over a NodeList , which doesn ' t natively fully implement the Array
* interface .
*
* For convenience , the current object context is applied to the provided
* iterate function .
*
* @ param NodeList nodeList The NodeList .
* @ param Function fn The iterate function .
* @ return void
* /
_forEachNode : function ( nodeList , fn ) {
Array . prototype . forEach . call ( nodeList , fn , this ) ;
} ,
/ * *
* Iterate over a NodeList , and return the first node that passes
* the supplied test function
*
* For convenience , the current object context is applied to the provided
* test function .
*
* @ param NodeList nodeList The NodeList .
* @ param Function fn The test function .
* @ return void
* /
_findNode : function ( nodeList , fn ) {
return Array . prototype . find . call ( nodeList , fn , this ) ;
} ,
/ * *
* Iterate over a NodeList , return true if any of the provided iterate
* function calls returns true , false otherwise .
*
* For convenience , the current object context is applied to the
* provided iterate function .
*
* @ param NodeList nodeList The NodeList .
* @ param Function fn The iterate function .
* @ return Boolean
* /
_someNode : function ( nodeList , fn ) {
return Array . prototype . some . call ( nodeList , fn , this ) ;
} ,
/ * *
* Iterate over a NodeList , return true if all of the provided iterate
* function calls return true , false otherwise .
*
* For convenience , the current object context is applied to the
* provided iterate function .
*
* @ param NodeList nodeList The NodeList .
* @ param Function fn The iterate function .
* @ return Boolean
* /
_everyNode : function ( nodeList , fn ) {
return Array . prototype . every . call ( nodeList , fn , this ) ;
} ,
/ * *
* Concat all nodelists passed as arguments .
*
* @ return ... NodeList
* @ return Array
* /
_concatNodeLists : function ( ) {
var slice = Array . prototype . slice ;
var args = slice . call ( arguments ) ;
var nodeLists = args . map ( function ( list ) {
return slice . call ( list ) ;
} ) ;
return Array . prototype . concat . apply ( [ ] , nodeLists ) ;
} ,
_getAllNodesWithTag : function ( node , tagNames ) {
if ( node . querySelectorAll ) {
return node . querySelectorAll ( tagNames . join ( "," ) ) ;
}
return [ ] . concat . apply ( [ ] , tagNames . map ( function ( tag ) {
var collection = node . getElementsByTagName ( tag ) ;
return Array . isArray ( collection ) ? collection : Array . from ( collection ) ;
} ) ) ;
} ,
/ * *
* Removes the class = "" attribute from every element in the given
* subtree , except those that match CLASSES _TO _PRESERVE and
* the classesToPreserve array from the options object .
*
* @ param Element
* @ return void
* /
_cleanClasses : function ( node ) {
var classesToPreserve = this . _classesToPreserve ;
var className = ( node . getAttribute ( "class" ) || "" )
. split ( /\s+/ )
. filter ( function ( cls ) {
return classesToPreserve . indexOf ( cls ) != - 1 ;
} )
. join ( " " ) ;
if ( className ) {
node . setAttribute ( "class" , className ) ;
} else {
node . removeAttribute ( "class" ) ;
}
for ( node = node . firstElementChild ; node ; node = node . nextElementSibling ) {
this . _cleanClasses ( node ) ;
}
} ,
/ * *
* Converts each < a > and < img > uri in the given element to an absolute URI ,
* ignoring # ref URIs .
*
* @ param Element
* @ return void
* /
_fixRelativeUris : function ( articleContent ) {
var baseURI = this . _doc . baseURI ;
var documentURI = this . _doc . documentURI ;
function toAbsoluteURI ( uri ) {
// Leave hash links alone if the base URI matches the document URI:
if ( baseURI == documentURI && uri . charAt ( 0 ) == "#" ) {
return uri ;
}
// Otherwise, resolve against base URI:
try {
return new URL ( uri , baseURI ) . href ;
} catch ( ex ) {
// Something went wrong, just return the original:
}
return uri ;
}
var links = this . _getAllNodesWithTag ( articleContent , [ "a" ] ) ;
this . _forEachNode ( links , function ( link ) {
var href = link . getAttribute ( "href" ) ;
if ( href ) {
// Remove links with javascript: URIs, since
// they won't work after scripts have been removed from the page.
if ( href . indexOf ( "javascript:" ) === 0 ) {
// if the link only contains simple text content, it can be converted to a text node
if ( link . childNodes . length === 1 && link . childNodes [ 0 ] . nodeType === this . TEXT _NODE ) {
var text = this . _doc . createTextNode ( link . textContent ) ;
link . parentNode . replaceChild ( text , link ) ;
} else {
// if the link has multiple children, they should all be preserved
var container = this . _doc . createElement ( "span" ) ;
while ( link . firstChild ) {
container . appendChild ( link . firstChild ) ;
}
link . parentNode . replaceChild ( container , link ) ;
}
} else {
link . setAttribute ( "href" , toAbsoluteURI ( href ) ) ;
}
}
} ) ;
var medias = this . _getAllNodesWithTag ( articleContent , [
"img" , "picture" , "figure" , "video" , "audio" , "source"
] ) ;
this . _forEachNode ( medias , function ( media ) {
var src = media . getAttribute ( "src" ) ;
var poster = media . getAttribute ( "poster" ) ;
var srcset = media . getAttribute ( "srcset" ) ;
if ( src ) {
media . setAttribute ( "src" , toAbsoluteURI ( src ) ) ;
}
if ( poster ) {
media . setAttribute ( "poster" , toAbsoluteURI ( poster ) ) ;
}
if ( srcset ) {
var newSrcset = srcset . replace ( this . REGEXPS . srcsetUrl , function ( _ , p1 , p2 , p3 ) {
return toAbsoluteURI ( p1 ) + ( p2 || "" ) + p3 ;
} ) ;
media . setAttribute ( "srcset" , newSrcset ) ;
}
} ) ;
} ,
_simplifyNestedElements : function ( articleContent ) {
var node = articleContent ;
while ( node ) {
if ( node . parentNode && [ "DIV" , "SECTION" ] . includes ( node . tagName ) && ! ( node . id && node . id . startsWith ( "readability" ) ) ) {
if ( this . _isElementWithoutContent ( node ) ) {
node = this . _removeAndGetNext ( node ) ;
continue ;
} else if ( this . _hasSingleTagInsideElement ( node , "DIV" ) || this . _hasSingleTagInsideElement ( node , "SECTION" ) ) {
var child = node . children [ 0 ] ;
for ( var i = 0 ; i < node . attributes . length ; i ++ ) {
child . setAttribute ( node . attributes [ i ] . name , node . attributes [ i ] . value ) ;
}
node . parentNode . replaceChild ( child , node ) ;
node = child ;
continue ;
}
}
node = this . _getNextNode ( node ) ;
}
} ,
/ * *
* Get the article title as an H1 .
*
* @ return string
* * /
_getArticleTitle : function ( ) {
var doc = this . _doc ;
var curTitle = "" ;
var origTitle = "" ;
try {
curTitle = origTitle = doc . title . trim ( ) ;
// If they had an element with id "title" in their HTML
if ( typeof curTitle !== "string" )
curTitle = origTitle = this . _getInnerText ( doc . getElementsByTagName ( "title" ) [ 0 ] ) ;
} catch ( e ) { /* ignore exceptions setting the title. */ }
var titleHadHierarchicalSeparators = false ;
function wordCount ( str ) {
return str . split ( /\s+/ ) . length ;
}
// If there's a separator in the title, first remove the final part
if ( ( / [\|\-\\\/>»] / ) . test ( curTitle ) ) {
titleHadHierarchicalSeparators = / [\\\/>»] / . test ( curTitle ) ;
curTitle = origTitle . replace ( /(.*)[\|\-\\\/>»] .*/gi , "$1" ) ;
// If the resulting title is too short (3 words or fewer), remove
// the first part instead:
if ( wordCount ( curTitle ) < 3 )
curTitle = origTitle . replace ( /[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi , "$1" ) ;
} else if ( curTitle . indexOf ( ": " ) !== - 1 ) {
// Check if we have an heading containing this exact string, so we
// could assume it's the full title.
var headings = this . _concatNodeLists (
doc . getElementsByTagName ( "h1" ) ,
doc . getElementsByTagName ( "h2" )
) ;
var trimmedTitle = curTitle . trim ( ) ;
var match = this . _someNode ( headings , function ( heading ) {
return heading . textContent . trim ( ) === trimmedTitle ;
} ) ;
// If we don't, let's extract the title out of the original title string.
if ( ! match ) {
curTitle = origTitle . substring ( origTitle . lastIndexOf ( ":" ) + 1 ) ;
// If the title is now too short, try the first colon instead:
if ( wordCount ( curTitle ) < 3 ) {
curTitle = origTitle . substring ( origTitle . indexOf ( ":" ) + 1 ) ;
// But if we have too many words before the colon there's something weird
// with the titles and the H tags so let's just use the original title instead
} else if ( wordCount ( origTitle . substr ( 0 , origTitle . indexOf ( ":" ) ) ) > 5 ) {
curTitle = origTitle ;
}
}
} else if ( curTitle . length > 150 || curTitle . length < 15 ) {
var hOnes = doc . getElementsByTagName ( "h1" ) ;
if ( hOnes . length === 1 )
curTitle = this . _getInnerText ( hOnes [ 0 ] ) ;
}
curTitle = curTitle . trim ( ) . replace ( this . REGEXPS . normalize , " " ) ;
// If we now have 4 words or fewer as our title, and either no
// 'hierarchical' separators (\, /, > or ») were found in the original
// title or we decreased the number of words by more than 1 word, use
// the original title.
var curTitleWordCount = wordCount ( curTitle ) ;
if ( curTitleWordCount <= 4 &&
( ! titleHadHierarchicalSeparators ||
curTitleWordCount != wordCount ( origTitle . replace ( /[\|\-\\\/>»]+/g , "" ) ) - 1 ) ) {
curTitle = origTitle ;
}
return curTitle ;
} ,
/ * *
* Prepare the HTML document for readability to scrape it .
* This includes things like stripping javascript , CSS , and handling terrible markup .
*
* @ return void
* * /
_prepDocument : function ( ) {
var doc = this . _doc ;
// Remove all style tags in head
this . _removeNodes ( this . _getAllNodesWithTag ( doc , [ "style" ] ) ) ;
if ( doc . body ) {
this . _replaceBrs ( doc . body ) ;
}
this . _replaceNodeTags ( this . _getAllNodesWithTag ( doc , [ "font" ] ) , "SPAN" ) ;
} ,
/ * *
* Finds the next node , starting from the given node , and ignoring
* whitespace in between . If the given node is an element , the same node is
* returned .
* /
_nextNode : function ( node ) {
var next = node ;
while ( next
&& ( next . nodeType != this . ELEMENT _NODE )
&& this . REGEXPS . whitespace . test ( next . textContent ) ) {
next = next . nextSibling ;
}
return next ;
} ,
/ * *
* Replaces 2 or more successive < br > elements with a single < p > .
* Whitespace between < br > elements are ignored . For example :
* < div > foo < br > bar < br > < br > < br > abc < / d i v >
* will become :
* < div > foo < br > bar < p > abc < / p > < / d i v >
* /
_replaceBrs : function ( elem ) {
this . _forEachNode ( this . _getAllNodesWithTag ( elem , [ "br" ] ) , function ( br ) {
var next = br . nextSibling ;
// Whether 2 or more <br> elements have been found and replaced with a
// <p> block.
var replaced = false ;
// If we find a <br> chain, remove the <br>s until we hit another node
// or non-whitespace. This leaves behind the first <br> in the chain
// (which will be replaced with a <p> later).
while ( ( next = this . _nextNode ( next ) ) && ( next . tagName == "BR" ) ) {
replaced = true ;
var brSibling = next . nextSibling ;
next . parentNode . removeChild ( next ) ;
next = brSibling ;
}
// If we removed a <br> chain, replace the remaining <br> with a <p>. Add
// all sibling nodes as children of the <p> until we hit another <br>
// chain.
if ( replaced ) {
var p = this . _doc . createElement ( "p" ) ;
br . parentNode . replaceChild ( p , br ) ;
next = p . nextSibling ;
while ( next ) {
// If we've hit another <br><br>, we're done adding children to this <p>.
if ( next . tagName == "BR" ) {
var nextElem = this . _nextNode ( next . nextSibling ) ;
if ( nextElem && nextElem . tagName == "BR" )
break ;
}
if ( ! this . _isPhrasingContent ( next ) )
break ;
// Otherwise, make this node a child of the new <p>.
var sibling = next . nextSibling ;
p . appendChild ( next ) ;
next = sibling ;
}
while ( p . lastChild && this . _isWhitespace ( p . lastChild ) ) {
p . removeChild ( p . lastChild ) ;
}
if ( p . parentNode . tagName === "P" )
this . _setNodeTag ( p . parentNode , "DIV" ) ;
}
} ) ;
} ,
_setNodeTag : function ( node , tag ) {
this . log ( "_setNodeTag" , node , tag ) ;
if ( this . _docJSDOMParser ) {
node . localName = tag . toLowerCase ( ) ;
node . tagName = tag . toUpperCase ( ) ;
return node ;
}
var replacement = node . ownerDocument . createElement ( tag ) ;
while ( node . firstChild ) {
replacement . appendChild ( node . firstChild ) ;
}
node . parentNode . replaceChild ( replacement , node ) ;
if ( node . readability )
replacement . readability = node . readability ;
for ( var i = 0 ; i < node . attributes . length ; i ++ ) {
try {
replacement . setAttribute ( node . attributes [ i ] . name , node . attributes [ i ] . value ) ;
} catch ( ex ) {
/ * i t ' s p o s s i b l e f o r s e t A t t r i b u t e ( ) t o t h r o w i f t h e a t t r i b u t e n a m e
* isn ' t a valid XML Name . Such attributes can however be parsed from
* source in HTML docs , see https : //github.com/whatwg/html/issues/4275,
* so we can hit them here and then throw . We don ' t care about such
* attributes so we ignore them .
* /
}
}
return replacement ;
} ,
/ * *
* Prepare the article node for display . Clean out any inline styles ,
* iframes , forms , strip extraneous < p > tags , etc .
*
* @ param Element
* @ return void
* * /
_prepArticle : function ( articleContent ) {
this . _cleanStyles ( articleContent ) ;
// Check for data tables before we continue, to avoid removing items in
// those tables, which will often be isolated even though they're
// visually linked to other content-ful elements (text, images, etc.).
this . _markDataTables ( articleContent ) ;
this . _fixLazyImages ( articleContent ) ;
// Clean out junk from the article content
this . _cleanConditionally ( articleContent , "form" ) ;
this . _cleanConditionally ( articleContent , "fieldset" ) ;
this . _clean ( articleContent , "object" ) ;
this . _clean ( articleContent , "embed" ) ;
this . _clean ( articleContent , "footer" ) ;
this . _clean ( articleContent , "link" ) ;
this . _clean ( articleContent , "aside" ) ;
// Clean out elements with little content that have "share" in their id/class combinations from final top candidates,
// which means we don't remove the top candidates even they have "share".
var shareElementThreshold = this . DEFAULT _CHAR _THRESHOLD ;
this . _forEachNode ( articleContent . children , function ( topCandidate ) {
this . _cleanMatchedNodes ( topCandidate , function ( node , matchString ) {
return this . REGEXPS . shareElements . test ( matchString ) && node . textContent . length < shareElementThreshold ;
} ) ;
} ) ;
this . _clean ( articleContent , "iframe" ) ;
this . _clean ( articleContent , "input" ) ;
this . _clean ( articleContent , "textarea" ) ;
this . _clean ( articleContent , "select" ) ;
this . _clean ( articleContent , "button" ) ;
this . _cleanHeaders ( articleContent ) ;
// Do these last as the previous stuff may have removed junk
// that will affect these
this . _cleanConditionally ( articleContent , "table" ) ;
this . _cleanConditionally ( articleContent , "ul" ) ;
this . _cleanConditionally ( articleContent , "div" ) ;
// replace H1 with H2 as H1 should be only title that is displayed separately
this . _replaceNodeTags ( this . _getAllNodesWithTag ( articleContent , [ "h1" ] ) , "h2" ) ;
// Remove extra paragraphs
this . _removeNodes ( this . _getAllNodesWithTag ( articleContent , [ "p" ] ) , function ( paragraph ) {
var imgCount = paragraph . getElementsByTagName ( "img" ) . length ;
var embedCount = paragraph . getElementsByTagName ( "embed" ) . length ;
var objectCount = paragraph . getElementsByTagName ( "object" ) . length ;
// At this point, nasty iframes have been removed, only remain embedded video ones.
var iframeCount = paragraph . getElementsByTagName ( "iframe" ) . length ;
var totalCount = imgCount + embedCount + objectCount + iframeCount ;
return totalCount === 0 && ! this . _getInnerText ( paragraph , false ) ;
} ) ;
this . _forEachNode ( this . _getAllNodesWithTag ( articleContent , [ "br" ] ) , function ( br ) {
var next = this . _nextNode ( br . nextSibling ) ;
if ( next && next . tagName == "P" )
br . parentNode . removeChild ( br ) ;
} ) ;
// Remove single-cell tables
this . _forEachNode ( this . _getAllNodesWithTag ( articleContent , [ "table" ] ) , function ( table ) {
var tbody = this . _hasSingleTagInsideElement ( table , "TBODY" ) ? table . firstElementChild : table ;
if ( this . _hasSingleTagInsideElement ( tbody , "TR" ) ) {
var row = tbody . firstElementChild ;
if ( this . _hasSingleTagInsideElement ( row , "TD" ) ) {
var cell = row . firstElementChild ;
cell = this . _setNodeTag ( cell , this . _everyNode ( cell . childNodes , this . _isPhrasingContent ) ? "P" : "DIV" ) ;
table . parentNode . replaceChild ( cell , table ) ;
}
}
} ) ;
} ,
/ * *
* Initialize a node with the readability object . Also checks the
* className / id for special names to add to its score .
*
* @ param Element
* @ return void
* * /
_initializeNode : function ( node ) {
node . readability = { "contentScore" : 0 } ;
switch ( node . tagName ) {
case "DIV" :
node . readability . contentScore += 5 ;
break ;
case "PRE" :
case "TD" :
case "BLOCKQUOTE" :
node . readability . contentScore += 3 ;
break ;
case "ADDRESS" :
case "OL" :
case "UL" :
case "DL" :
case "DD" :
case "DT" :
case "LI" :
case "FORM" :
node . readability . contentScore -= 3 ;
break ;
case "H1" :
case "H2" :
case "H3" :
case "H4" :
case "H5" :
case "H6" :
case "TH" :
node . readability . contentScore -= 5 ;
break ;
}
node . readability . contentScore += this . _getClassWeight ( node ) ;
} ,
_removeAndGetNext : function ( node ) {
var nextNode = this . _getNextNode ( node , true ) ;
node . parentNode . removeChild ( node ) ;
return nextNode ;
} ,
/ * *
* Traverse the DOM from node to node , starting at the node passed in .
* Pass true for the second parameter to indicate this node itself
* ( and its kids ) are going away , and we want the next node over .
*
* Calling this in a loop will traverse the DOM depth - first .
* /
_getNextNode : function ( node , ignoreSelfAndKids ) {
// First check for kids if those aren't being ignored
if ( ! ignoreSelfAndKids && node . firstElementChild ) {
return node . firstElementChild ;
}
// Then for siblings...
if ( node . nextElementSibling ) {
return node . nextElementSibling ;
}
// And finally, move up the parent chain *and* find a sibling
// (because this is depth-first traversal, we will have already
// seen the parent nodes themselves).
do {
node = node . parentNode ;
} while ( node && ! node . nextElementSibling ) ;
return node && node . nextElementSibling ;
} ,
// compares second text to first one
// 1 = same text, 0 = completely different text
// works the way that it splits both texts into words and then finds words that are unique in second text
// the result is given by the lower length of unique parts
_textSimilarity : function ( textA , textB ) {
var tokensA = textA . toLowerCase ( ) . split ( this . REGEXPS . tokenize ) . filter ( Boolean ) ;
var tokensB = textB . toLowerCase ( ) . split ( this . REGEXPS . tokenize ) . filter ( Boolean ) ;
if ( ! tokensA . length || ! tokensB . length ) {
return 0 ;
}
var uniqTokensB = tokensB . filter ( token => ! tokensA . includes ( token ) ) ;
var distanceB = uniqTokensB . join ( " " ) . length / tokensB . join ( " " ) . length ;
return 1 - distanceB ;
} ,
_checkByline : function ( node , matchString ) {
if ( this . _articleByline ) {
return false ;
}
if ( node . getAttribute !== undefined ) {
var rel = node . getAttribute ( "rel" ) ;
var itemprop = node . getAttribute ( "itemprop" ) ;
}
if ( ( rel === "author" || ( itemprop && itemprop . indexOf ( "author" ) !== - 1 ) || this . REGEXPS . byline . test ( matchString ) ) && this . _isValidByline ( node . textContent ) ) {
this . _articleByline = node . textContent . trim ( ) ;
return true ;
}
return false ;
} ,
_getNodeAncestors : function ( node , maxDepth ) {
maxDepth = maxDepth || 0 ;
var i = 0 , ancestors = [ ] ;
while ( node . parentNode ) {
ancestors . push ( node . parentNode ) ;
if ( maxDepth && ++ i === maxDepth )
break ;
node = node . parentNode ;
}
return ancestors ;
} ,
/ * * *
* grabArticle - Using a variety of metrics ( content score , classname , element types ) , find the content that is
* most likely to be the stuff a user wants to read . Then return it wrapped up in a div .
*
* @ param page a document to run upon . Needs to be a full document , complete with body .
* @ return Element
* * /
_grabArticle : function ( page ) {
this . log ( "**** grabArticle ****" ) ;
var doc = this . _doc ;
var isPaging = page !== null ;
page = page ? page : this . _doc . body ;
// We can't grab an article if we don't have a page!
if ( ! page ) {
this . log ( "No body found in document. Abort." ) ;
return null ;
}
var pageCacheHtml = page . innerHTML ;
while ( true ) {
this . log ( "Starting grabArticle loop" ) ;
var stripUnlikelyCandidates = this . _flagIsActive ( this . FLAG _STRIP _UNLIKELYS ) ;
// First, node prepping. Trash nodes that look cruddy (like ones with the
// class name "comment", etc), and turn divs into P tags where they have been
// used inappropriately (as in, where they contain no other block level elements.)
var elementsToScore = [ ] ;
var node = this . _doc . documentElement ;
let shouldRemoveTitleHeader = true ;
while ( node ) {
if ( node . tagName === "HTML" ) {
this . _articleLang = node . getAttribute ( "lang" ) ;
}
var matchString = node . className + " " + node . id ;
if ( ! this . _isProbablyVisible ( node ) ) {
this . log ( "Removing hidden node - " + matchString ) ;
node = this . _removeAndGetNext ( node ) ;
continue ;
}
// Check to see if this node is a byline, and remove it if it is.
if ( this . _checkByline ( node , matchString ) ) {
node = this . _removeAndGetNext ( node ) ;
continue ;
}
if ( shouldRemoveTitleHeader && this . _headerDuplicatesTitle ( node ) ) {
this . log ( "Removing header: " , node . textContent . trim ( ) , this . _articleTitle . trim ( ) ) ;
shouldRemoveTitleHeader = false ;
node = this . _removeAndGetNext ( node ) ;
continue ;
}
// Remove unlikely candidates
if ( stripUnlikelyCandidates ) {
if ( this . REGEXPS . unlikelyCandidates . test ( matchString ) &&
! this . REGEXPS . okMaybeItsACandidate . test ( matchString ) &&
! this . _hasAncestorTag ( node , "table" ) &&
! this . _hasAncestorTag ( node , "code" ) &&
node . tagName !== "BODY" &&
node . tagName !== "A" ) {
this . log ( "Removing unlikely candidate - " + matchString ) ;
node = this . _removeAndGetNext ( node ) ;
continue ;
}
if ( this . UNLIKELY _ROLES . includes ( node . getAttribute ( "role" ) ) ) {
this . log ( "Removing content with role " + node . getAttribute ( "role" ) + " - " + matchString ) ;
node = this . _removeAndGetNext ( node ) ;
continue ;
}
}
// Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe).
if ( ( node . tagName === "DIV" || node . tagName === "SECTION" || node . tagName === "HEADER" ||
node . tagName === "H1" || node . tagName === "H2" || node . tagName === "H3" ||
node . tagName === "H4" || node . tagName === "H5" || node . tagName === "H6" ) &&
this . _isElementWithoutContent ( node ) ) {
node = this . _removeAndGetNext ( node ) ;
continue ;
}
if ( this . DEFAULT _TAGS _TO _SCORE . indexOf ( node . tagName ) !== - 1 ) {
elementsToScore . push ( node ) ;
}
// Turn all divs that don't have children block level elements into p's
if ( node . tagName === "DIV" ) {
// Put phrasing content into paragraphs.
var p = null ;
var childNode = node . firstChild ;
while ( childNode ) {
var nextSibling = childNode . nextSibling ;
if ( this . _isPhrasingContent ( childNode ) ) {
if ( p !== null ) {
p . appendChild ( childNode ) ;
} else if ( ! this . _isWhitespace ( childNode ) ) {
p = doc . createElement ( "p" ) ;
node . replaceChild ( p , childNode ) ;
p . appendChild ( childNode ) ;
}
} else if ( p !== null ) {
while ( p . lastChild && this . _isWhitespace ( p . lastChild ) ) {
p . removeChild ( p . lastChild ) ;
}
p = null ;
}
childNode = nextSibling ;
}
// Sites like http://mobile.slate.com encloses each paragraph with a DIV
// element. DIVs with only a P element inside and no text content can be
// safely converted into plain P elements to avoid confusing the scoring
// algorithm with DIVs with are, in practice, paragraphs.
if ( this . _hasSingleTagInsideElement ( node , "P" ) && this . _getLinkDensity ( node ) < 0.25 ) {
var newNode = node . children [ 0 ] ;
node . parentNode . replaceChild ( newNode , node ) ;
node = newNode ;
elementsToScore . push ( node ) ;
} else if ( ! this . _hasChildBlockElement ( node ) ) {
node = this . _setNodeTag ( node , "P" ) ;
elementsToScore . push ( node ) ;
}
}
node = this . _getNextNode ( node ) ;
}
/ * *
* Loop through all paragraphs , and assign a score to them based on how content - y they look .
* Then add their score to their parent node .
*
* A score is determined by things like number of commas , class names , etc . Maybe eventually link density .
* * /
var candidates = [ ] ;
this . _forEachNode ( elementsToScore , function ( elementToScore ) {
if ( ! elementToScore . parentNode || typeof ( elementToScore . parentNode . tagName ) === "undefined" )
return ;
// If this paragraph is less than 25 characters, don't even count it.
var innerText = this . _getInnerText ( elementToScore ) ;
if ( innerText . length < 25 )
return ;
// Exclude nodes with no ancestor.
var ancestors = this . _getNodeAncestors ( elementToScore , 5 ) ;
if ( ancestors . length === 0 )
return ;
var contentScore = 0 ;
// Add a point for the paragraph itself as a base.
contentScore += 1 ;
// Add points for any commas within this paragraph.
contentScore += innerText . split ( "," ) . length ;
// For every 100 characters in this paragraph, add another point. Up to 3 points.
contentScore += Math . min ( Math . floor ( innerText . length / 100 ) , 3 ) ;
// Initialize and score ancestors.
this . _forEachNode ( ancestors , function ( ancestor , level ) {
if ( ! ancestor . tagName || ! ancestor . parentNode || typeof ( ancestor . parentNode . tagName ) === "undefined" )
return ;
if ( typeof ( ancestor . readability ) === "undefined" ) {
this . _initializeNode ( ancestor ) ;
candidates . push ( ancestor ) ;
}
// Node score divider:
// - parent: 1 (no division)
// - grandparent: 2
// - great grandparent+: ancestor level * 3
if ( level === 0 )
var scoreDivider = 1 ;
else if ( level === 1 )
scoreDivider = 2 ;
else
scoreDivider = level * 3 ;
ancestor . readability . contentScore += contentScore / scoreDivider ;
} ) ;
} ) ;
// After we've calculated scores, loop through all of the possible
// candidate nodes we found and find the one with the highest score.
var topCandidates = [ ] ;
for ( var c = 0 , cl = candidates . length ; c < cl ; c += 1 ) {
var candidate = candidates [ c ] ;
// Scale the final candidates score based on link density. Good content
// should have a relatively small link density (5% or less) and be mostly
// unaffected by this operation.
var candidateScore = candidate . readability . contentScore * ( 1 - this . _getLinkDensity ( candidate ) ) ;
candidate . readability . contentScore = candidateScore ;
this . log ( "Candidate:" , candidate , "with score " + candidateScore ) ;
for ( var t = 0 ; t < this . _nbTopCandidates ; t ++ ) {
var aTopCandidate = topCandidates [ t ] ;
if ( ! aTopCandidate || candidateScore > aTopCandidate . readability . contentScore ) {
topCandidates . splice ( t , 0 , candidate ) ;
if ( topCandidates . length > this . _nbTopCandidates )
topCandidates . pop ( ) ;
break ;
}
}
}
var topCandidate = topCandidates [ 0 ] || null ;
var neededToCreateTopCandidate = false ;
var parentOfTopCandidate ;
// If we still have no top candidate, just use the body as a last resort.
// We also have to copy the body node so it is something we can modify.
if ( topCandidate === null || topCandidate . tagName === "BODY" ) {
// Move all of the page's children into topCandidate
topCandidate = doc . createElement ( "DIV" ) ;
neededToCreateTopCandidate = true ;
// Move everything (not just elements, also text nodes etc.) into the container
// so we even include text directly in the body:
while ( page . firstChild ) {
this . log ( "Moving child out:" , page . firstChild ) ;
topCandidate . appendChild ( page . firstChild ) ;
}
page . appendChild ( topCandidate ) ;
this . _initializeNode ( topCandidate ) ;
} else if ( topCandidate ) {
// Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array
// and whose scores are quite closed with current `topCandidate` node.
var alternativeCandidateAncestors = [ ] ;
for ( var i = 1 ; i < topCandidates . length ; i ++ ) {
if ( topCandidates [ i ] . readability . contentScore / topCandidate . readability . contentScore >= 0.75 ) {
alternativeCandidateAncestors . push ( this . _getNodeAncestors ( topCandidates [ i ] ) ) ;
}
}
var MINIMUM _TOPCANDIDATES = 3 ;
if ( alternativeCandidateAncestors . length >= MINIMUM _TOPCANDIDATES ) {
parentOfTopCandidate = topCandidate . parentNode ;
while ( parentOfTopCandidate . tagName !== "BODY" ) {
var listsContainingThisAncestor = 0 ;
for ( var ancestorIndex = 0 ; ancestorIndex < alternativeCandidateAncestors . length && listsContainingThisAncestor < MINIMUM _TOPCANDIDATES ; ancestorIndex ++ ) {
listsContainingThisAncestor += Number ( alternativeCandidateAncestors [ ancestorIndex ] . includes ( parentOfTopCandidate ) ) ;
}
if ( listsContainingThisAncestor >= MINIMUM _TOPCANDIDATES ) {
topCandidate = parentOfTopCandidate ;
break ;
}
parentOfTopCandidate = parentOfTopCandidate . parentNode ;
}
}
if ( ! topCandidate . readability ) {
this . _initializeNode ( topCandidate ) ;
}
// Because of our bonus system, parents of candidates might have scores
// themselves. They get half of the node. There won't be nodes with higher
// scores than our topCandidate, but if we see the score going *up* in the first
// few steps up the tree, that's a decent sign that there might be more content
// lurking in other places that we want to unify in. The sibling stuff
// below does some of that - but only if we've looked high enough up the DOM
// tree.
parentOfTopCandidate = topCandidate . parentNode ;
var lastScore = topCandidate . readability . contentScore ;
// The scores shouldn't get too low.
var scoreThreshold = lastScore / 3 ;
while ( parentOfTopCandidate . tagName !== "BODY" ) {
if ( ! parentOfTopCandidate . readability ) {
parentOfTopCandidate = parentOfTopCandidate . parentNode ;
continue ;
}
var parentScore = parentOfTopCandidate . readability . contentScore ;
if ( parentScore < scoreThreshold )
break ;
if ( parentScore > lastScore ) {
// Alright! We found a better parent to use.
topCandidate = parentOfTopCandidate ;
break ;
}
lastScore = parentOfTopCandidate . readability . contentScore ;
parentOfTopCandidate = parentOfTopCandidate . parentNode ;
}
// If the top candidate is the only child, use parent instead. This will help sibling
// joining logic when adjacent content is actually located in parent's sibling node.
parentOfTopCandidate = topCandidate . parentNode ;
while ( parentOfTopCandidate . tagName != "BODY" && parentOfTopCandidate . children . length == 1 ) {
topCandidate = parentOfTopCandidate ;
parentOfTopCandidate = topCandidate . parentNode ;
}
if ( ! topCandidate . readability ) {
this . _initializeNode ( topCandidate ) ;
}
}
// Now that we have the top candidate, look through its siblings for content
// that might also be related. Things like preambles, content split by ads
// that we removed, etc.
var articleContent = doc . createElement ( "DIV" ) ;
if ( isPaging )
articleContent . id = "readability-content" ;
var siblingScoreThreshold = Math . max ( 10 , topCandidate . readability . contentScore * 0.2 ) ;
// Keep potential top candidate's parent node to try to get text direction of it later.
parentOfTopCandidate = topCandidate . parentNode ;
var siblings = parentOfTopCandidate . children ;
for ( var s = 0 , sl = siblings . length ; s < sl ; s ++ ) {
var sibling = siblings [ s ] ;
var append = false ;
this . log ( "Looking at sibling node:" , sibling , sibling . readability ? ( "with score " + sibling . readability . contentScore ) : "" ) ;
this . log ( "Sibling has score" , sibling . readability ? sibling . readability . contentScore : "Unknown" ) ;
if ( sibling === topCandidate ) {
append = true ;
} else {
var contentBonus = 0 ;
// Give a bonus if sibling nodes and top candidates have the example same classname
if ( sibling . className === topCandidate . className && topCandidate . className !== "" )
contentBonus += topCandidate . readability . contentScore * 0.2 ;
if ( sibling . readability &&
( ( sibling . readability . contentScore + contentBonus ) >= siblingScoreThreshold ) ) {
append = true ;
} else if ( sibling . nodeName === "P" ) {
var linkDensity = this . _getLinkDensity ( sibling ) ;
var nodeContent = this . _getInnerText ( sibling ) ;
var nodeLength = nodeContent . length ;
if ( nodeLength > 80 && linkDensity < 0.25 ) {
append = true ;
} else if ( nodeLength < 80 && nodeLength > 0 && linkDensity === 0 &&
nodeContent . search ( /\.( |$)/ ) !== - 1 ) {
append = true ;
}
}
}
if ( append ) {
this . log ( "Appending node:" , sibling ) ;
if ( this . ALTER _TO _DIV _EXCEPTIONS . indexOf ( sibling . nodeName ) === - 1 ) {
// We have a node that isn't a common block level element, like a form or td tag.
// Turn it into a div so it doesn't get filtered out later by accident.
this . log ( "Altering sibling:" , sibling , "to div." ) ;
sibling = this . _setNodeTag ( sibling , "DIV" ) ;
}
articleContent . appendChild ( sibling ) ;
// Fetch children again to make it compatible
// with DOM parsers without live collection support.
siblings = parentOfTopCandidate . children ;
// siblings is a reference to the children array, and
// sibling is removed from the array when we call appendChild().
// As a result, we must revisit this index since the nodes
// have been shifted.
s -= 1 ;
sl -= 1 ;
}
}
if ( this . _debug )
this . log ( "Article content pre-prep: " + articleContent . innerHTML ) ;
// So we have all of the content that we need. Now we clean it up for presentation.
this . _prepArticle ( articleContent ) ;
if ( this . _debug )
this . log ( "Article content post-prep: " + articleContent . innerHTML ) ;
if ( neededToCreateTopCandidate ) {
// We already created a fake div thing, and there wouldn't have been any siblings left
// for the previous loop, so there's no point trying to create a new div, and then
// move all the children over. Just assign IDs and class names here. No need to append
// because that already happened anyway.
topCandidate . id = "readability-page-1" ;
topCandidate . className = "page" ;
} else {
var div = doc . createElement ( "DIV" ) ;
div . id = "readability-page-1" ;
div . className = "page" ;
while ( articleContent . firstChild ) {
div . appendChild ( articleContent . firstChild ) ;
}
articleContent . appendChild ( div ) ;
}
if ( this . _debug )
this . log ( "Article content after paging: " + articleContent . innerHTML ) ;
var parseSuccessful = true ;
// Now that we've gone through the full algorithm, check to see if
// we got any meaningful content. If we didn't, we may need to re-run
// grabArticle with different flags set. This gives us a higher likelihood of
// finding the content, and the sieve approach gives us a higher likelihood of
// finding the -right- content.
var textLength = this . _getInnerText ( articleContent , true ) . length ;
if ( textLength < this . _charThreshold ) {
parseSuccessful = false ;
page . innerHTML = pageCacheHtml ;
if ( this . _flagIsActive ( this . FLAG _STRIP _UNLIKELYS ) ) {
this . _removeFlag ( this . FLAG _STRIP _UNLIKELYS ) ;
this . _attempts . push ( { articleContent : articleContent , textLength : textLength } ) ;
} else if ( this . _flagIsActive ( this . FLAG _WEIGHT _CLASSES ) ) {
this . _removeFlag ( this . FLAG _WEIGHT _CLASSES ) ;
this . _attempts . push ( { articleContent : articleContent , textLength : textLength } ) ;
} else if ( this . _flagIsActive ( this . FLAG _CLEAN _CONDITIONALLY ) ) {
this . _removeFlag ( this . FLAG _CLEAN _CONDITIONALLY ) ;
this . _attempts . push ( { articleContent : articleContent , textLength : textLength } ) ;
} else {
this . _attempts . push ( { articleContent : articleContent , textLength : textLength } ) ;
// No luck after removing flags, just return the longest text we found during the different loops
this . _attempts . sort ( function ( a , b ) {
return b . textLength - a . textLength ;
} ) ;
// But first check if we actually have something
if ( ! this . _attempts [ 0 ] . textLength ) {
return null ;
}
articleContent = this . _attempts [ 0 ] . articleContent ;
parseSuccessful = true ;
}
}
if ( parseSuccessful ) {
// Find out text direction from ancestors of final top candidate.
var ancestors = [ parentOfTopCandidate , topCandidate ] . concat ( this . _getNodeAncestors ( parentOfTopCandidate ) ) ;
this . _someNode ( ancestors , function ( ancestor ) {
if ( ! ancestor . tagName )
return false ;
var articleDir = ancestor . getAttribute ( "dir" ) ;
if ( articleDir ) {
this . _articleDir = articleDir ;
return true ;
}
return false ;
} ) ;
return articleContent ;
}
}
} ,
/ * *
* Check whether the input string could be a byline .
* This verifies that the input is a string , and that the length
* is less than 100 chars .
*
* @ param possibleByline { string } - a string to check whether its a byline .
* @ return Boolean - whether the input string is a byline .
* /
_isValidByline : function ( byline ) {
if ( typeof byline == "string" || byline instanceof String ) {
byline = byline . trim ( ) ;
return ( byline . length > 0 ) && ( byline . length < 100 ) ;
}
return false ;
} ,
/ * *
* Converts some of the common HTML entities in string to their corresponding characters .
*
* @ param str { string } - a string to unescape .
* @ return string without HTML entity .
* /
_unescapeHtmlEntities : function ( str ) {
if ( ! str ) {
return str ;
}
var htmlEscapeMap = this . HTML _ESCAPE _MAP ;
return str . replace ( /&(quot|amp|apos|lt|gt);/g , function ( _ , tag ) {
return htmlEscapeMap [ tag ] ;
} ) . replace ( /&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi , function ( _ , hex , numStr ) {
var num = parseInt ( hex || numStr , hex ? 16 : 10 ) ;
return String . fromCharCode ( num ) ;
} ) ;
} ,
/ * *
* Try to extract metadata from JSON - LD object .
* For now , only Schema . org objects of type Article or its subtypes are supported .
* @ return Object with any metadata that could be extracted ( possibly none )
* /
_getJSONLD : function ( doc ) {
var scripts = this . _getAllNodesWithTag ( doc , [ "script" ] ) ;
var metadata ;
this . _forEachNode ( scripts , function ( jsonLdElement ) {
if ( ! metadata && jsonLdElement . getAttribute ( "type" ) === "application/ld+json" ) {
try {
// Strip CDATA markers if present
var content = jsonLdElement . textContent . replace ( /^\s*<!\[CDATA\[|\]\]>\s*$/g , "" ) ;
var parsed = JSON . parse ( content ) ;
if (
! parsed [ "@context" ] ||
! parsed [ "@context" ] . match ( /^https?\:\/\/schema\.org$/ )
) {
return ;
}
if ( ! parsed [ "@type" ] && Array . isArray ( parsed [ "@graph" ] ) ) {
parsed = parsed [ "@graph" ] . find ( function ( it ) {
return ( it [ "@type" ] || "" ) . match (
this . REGEXPS . jsonLdArticleTypes
) ;
} ) ;
}
if (
! parsed ||
! parsed [ "@type" ] ||
! parsed [ "@type" ] . match ( this . REGEXPS . jsonLdArticleTypes )
) {
return ;
}
metadata = { } ;
if ( typeof parsed . name === "string" && typeof parsed . headline === "string" && parsed . name !== parsed . headline ) {
// we have both name and headline element in the JSON-LD. They should both be the same but some websites like aktualne.cz
// put their own name into "name" and the article title to "headline" which confuses Readability. So we try to check if either
// "name" or "headline" closely matches the html title, and if so, use that one. If not, then we use "name" by default.
var title = this . _getArticleTitle ( ) ;
var nameMatches = this . _textSimilarity ( parsed . name , title ) > 0.75 ;
var headlineMatches = this . _textSimilarity ( parsed . headline , title ) > 0.75 ;
if ( headlineMatches && ! nameMatches ) {
metadata . title = parsed . headline ;
} else {
metadata . title = parsed . name ;
}
} else if ( typeof parsed . name === "string" ) {
metadata . title = parsed . name . trim ( ) ;
} else if ( typeof parsed . headline === "string" ) {
metadata . title = parsed . headline . trim ( ) ;
}
if ( parsed . author ) {
if ( typeof parsed . author . name === "string" ) {
metadata . byline = parsed . author . name . trim ( ) ;
} else if ( Array . isArray ( parsed . author ) && parsed . author [ 0 ] && typeof parsed . author [ 0 ] . name === "string" ) {
metadata . byline = parsed . author
. filter ( function ( author ) {
return author && typeof author . name === "string" ;
} )
. map ( function ( author ) {
return author . name . trim ( ) ;
} )
. join ( ", " ) ;
}
}
if ( typeof parsed . description === "string" ) {
metadata . excerpt = parsed . description . trim ( ) ;
}
if (
parsed . publisher &&
typeof parsed . publisher . name === "string"
) {
metadata . siteName = parsed . publisher . name . trim ( ) ;
}
return ;
} catch ( err ) {
this . log ( err . message ) ;
}
}
} ) ;
return metadata ? metadata : { } ;
} ,
/ * *
* Attempts to get excerpt and byline metadata for the article .
*
* @ param { Object } jsonld — object containing any metadata that
* could be extracted from JSON - LD object .
*
* @ return Object with optional "excerpt" and "byline" properties
* /
_getArticleMetadata : function ( jsonld ) {
var metadata = { } ;
var values = { } ;
var metaElements = this . _doc . getElementsByTagName ( "meta" ) ;
// property is a space-separated list of values
var propertyPattern = /\s*(dc|dcterm|og|twitter)\s*:\s*(author|creator|description|title|site_name)\s*/gi ;
// name is a single value
var namePattern = /^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i ;
// Find description tags.
this . _forEachNode ( metaElements , function ( element ) {
var elementName = element . getAttribute ( "name" ) ;
var elementProperty = element . getAttribute ( "property" ) ;
var content = element . getAttribute ( "content" ) ;
if ( ! content ) {
return ;
}
var matches = null ;
var name = null ;
if ( elementProperty ) {
matches = elementProperty . match ( propertyPattern ) ;
if ( matches ) {
// Convert to lowercase, and remove any whitespace
// so we can match below.
name = matches [ 0 ] . toLowerCase ( ) . replace ( /\s/g , "" ) ;
// multiple authors
values [ name ] = content . trim ( ) ;
}
}
if ( ! matches && elementName && namePattern . test ( elementName ) ) {
name = elementName ;
if ( content ) {
// Convert to lowercase, remove any whitespace, and convert dots
// to colons so we can match below.
name = name . toLowerCase ( ) . replace ( /\s/g , "" ) . replace ( /\./g , ":" ) ;
values [ name ] = content . trim ( ) ;
}
}
} ) ;
// get title
metadata . title = jsonld . title ||
values [ "dc:title" ] ||
values [ "dcterm:title" ] ||
values [ "og:title" ] ||
values [ "weibo:article:title" ] ||
values [ "weibo:webpage:title" ] ||
values [ "title" ] ||
values [ "twitter:title" ] ;
if ( ! metadata . title ) {
metadata . title = this . _getArticleTitle ( ) ;
}
// get author
metadata . byline = jsonld . byline ||
values [ "dc:creator" ] ||
values [ "dcterm:creator" ] ||
values [ "author" ] ;
// get description
metadata . excerpt = jsonld . excerpt ||
values [ "dc:description" ] ||
values [ "dcterm:description" ] ||
values [ "og:description" ] ||
values [ "weibo:article:description" ] ||
values [ "weibo:webpage:description" ] ||
values [ "description" ] ||
values [ "twitter:description" ] ;
// get site name
metadata . siteName = jsonld . siteName ||
values [ "og:site_name" ] ;
// in many sites the meta value is escaped with HTML entities,
// so here we need to unescape it
metadata . title = this . _unescapeHtmlEntities ( metadata . title ) ;
metadata . byline = this . _unescapeHtmlEntities ( metadata . byline ) ;
metadata . excerpt = this . _unescapeHtmlEntities ( metadata . excerpt ) ;
metadata . siteName = this . _unescapeHtmlEntities ( metadata . siteName ) ;
return metadata ;
} ,
/ * *
* Check if node is image , or if node contains exactly only one image
* whether as a direct child or as its descendants .
*
* @ param Element
* * /
_isSingleImage : function ( node ) {
if ( node . tagName === "IMG" ) {
return true ;
}
if ( node . children . length !== 1 || node . textContent . trim ( ) !== "" ) {
return false ;
}
return this . _isSingleImage ( node . children [ 0 ] ) ;
} ,
/ * *
* Find all < noscript > that are located after < img > nodes , and which contain only one
* < img > element . Replace the first image with the image from inside the < noscript > tag ,
* and remove the < noscript > tag . This improves the quality of the images we use on
* some sites ( e . g . Medium ) .
*
* @ param Element
* * /
_unwrapNoscriptImages : function ( doc ) {
// Find img without source or attributes that might contains image, and remove it.
// This is done to prevent a placeholder img is replaced by img from noscript in next step.
var imgs = Array . from ( doc . getElementsByTagName ( "img" ) ) ;
this . _forEachNode ( imgs , function ( img ) {
for ( var i = 0 ; i < img . attributes . length ; i ++ ) {
var attr = img . attributes [ i ] ;
switch ( attr . name ) {
case "src" :
case "srcset" :
case "data-src" :
case "data-srcset" :
return ;
}
if ( /\.(jpg|jpeg|png|webp)/i . test ( attr . value ) ) {
return ;
}
}
img . parentNode . removeChild ( img ) ;
} ) ;
// Next find noscript and try to extract its image
var noscripts = Array . from ( doc . getElementsByTagName ( "noscript" ) ) ;
this . _forEachNode ( noscripts , function ( noscript ) {
// Parse content of noscript and make sure it only contains image
var tmp = doc . createElement ( "div" ) ;
tmp . innerHTML = noscript . innerHTML ;
if ( ! this . _isSingleImage ( tmp ) ) {
return ;
}
// If noscript has previous sibling and it only contains image,
// replace it with noscript content. However we also keep old
// attributes that might contains image.
var prevElement = noscript . previousElementSibling ;
if ( prevElement && this . _isSingleImage ( prevElement ) ) {
var prevImg = prevElement ;
if ( prevImg . tagName !== "IMG" ) {
prevImg = prevElement . getElementsByTagName ( "img" ) [ 0 ] ;
}
var newImg = tmp . getElementsByTagName ( "img" ) [ 0 ] ;
for ( var i = 0 ; i < prevImg . attributes . length ; i ++ ) {
var attr = prevImg . attributes [ i ] ;
if ( attr . value === "" ) {
continue ;
}
if ( attr . name === "src" || attr . name === "srcset" || /\.(jpg|jpeg|png|webp)/i . test ( attr . value ) ) {
if ( newImg . getAttribute ( attr . name ) === attr . value ) {
continue ;
}
var attrName = attr . name ;
if ( newImg . hasAttribute ( attrName ) ) {
attrName = "data-old-" + attrName ;
}
newImg . setAttribute ( attrName , attr . value ) ;
}
}
noscript . parentNode . replaceChild ( tmp . firstElementChild , prevElement ) ;
}
} ) ;
} ,
/ * *
* Removes script tags from the document .
*
* @ param Element
* * /
_removeScripts : function ( doc ) {
this . _removeNodes ( this . _getAllNodesWithTag ( doc , [ "script" ] ) , function ( scriptNode ) {
scriptNode . nodeValue = "" ;
scriptNode . removeAttribute ( "src" ) ;
return true ;
} ) ;
this . _removeNodes ( this . _getAllNodesWithTag ( doc , [ "noscript" ] ) ) ;
} ,
/ * *
* Check if this node has only whitespace and a single element with given tag
* Returns false if the DIV node contains non - empty text nodes
* or if it contains no element with given tag or more than 1 element .
*
* @ param Element
* @ param string tag of child element
* * /
_hasSingleTagInsideElement : function ( element , tag ) {
// There should be exactly 1 element child with given tag
if ( element . children . length != 1 || element . children [ 0 ] . tagName !== tag ) {
return false ;
}
// And there should be no text nodes with real content
return ! this . _someNode ( element . childNodes , function ( node ) {
return node . nodeType === this . TEXT _NODE &&
this . REGEXPS . hasContent . test ( node . textContent ) ;
} ) ;
} ,
_isElementWithoutContent : function ( node ) {
return node . nodeType === this . ELEMENT _NODE &&
node . textContent . trim ( ) . length == 0 &&
( node . children . length == 0 ||
node . children . length == node . getElementsByTagName ( "br" ) . length + node . getElementsByTagName ( "hr" ) . length ) ;
} ,
/ * *
* Determine whether element has any children block level elements .
*
* @ param Element
* /
_hasChildBlockElement : function ( element ) {
return this . _someNode ( element . childNodes , function ( node ) {
return this . DIV _TO _P _ELEMS . has ( node . tagName ) ||
this . _hasChildBlockElement ( node ) ;
} ) ;
} ,
/ * * *
* Determine if a node qualifies as phrasing content .
* https : //developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
* * /
_isPhrasingContent : function ( node ) {
return node . nodeType === this . TEXT _NODE || this . PHRASING _ELEMS . indexOf ( node . tagName ) !== - 1 ||
( ( node . tagName === "A" || node . tagName === "DEL" || node . tagName === "INS" ) &&
this . _everyNode ( node . childNodes , this . _isPhrasingContent ) ) ;
} ,
_isWhitespace : function ( node ) {
return ( node . nodeType === this . TEXT _NODE && node . textContent . trim ( ) . length === 0 ) ||
( node . nodeType === this . ELEMENT _NODE && node . tagName === "BR" ) ;
} ,
/ * *
* Get the inner text of a node - cross browser compatibly .
* This also strips out any excess whitespace to be found .
*
* @ param Element
* @ param Boolean normalizeSpaces ( default : true )
* @ return string
* * /
_getInnerText : function ( e , normalizeSpaces ) {
normalizeSpaces = ( typeof normalizeSpaces === "undefined" ) ? true : normalizeSpaces ;
var textContent = e . textContent . trim ( ) ;
if ( normalizeSpaces ) {
return textContent . replace ( this . REGEXPS . normalize , " " ) ;
}
return textContent ;
} ,
/ * *
* Get the number of times a string s appears in the node e .
*
* @ param Element
* @ param string - what to split on . Default is ","
* @ return number ( integer )
* * /
_getCharCount : function ( e , s ) {
s = s || "," ;
return this . _getInnerText ( e ) . split ( s ) . length - 1 ;
} ,
/ * *
* Remove the style attribute on every e and under .
* TODO : Test if getElementsByTagName ( * ) is faster .
*
* @ param Element
* @ return void
* * /
_cleanStyles : function ( e ) {
if ( ! e || e . tagName . toLowerCase ( ) === "svg" )
return ;
// Remove `style` and deprecated presentational attributes
for ( var i = 0 ; i < this . PRESENTATIONAL _ATTRIBUTES . length ; i ++ ) {
e . removeAttribute ( this . PRESENTATIONAL _ATTRIBUTES [ i ] ) ;
}
if ( this . DEPRECATED _SIZE _ATTRIBUTE _ELEMS . indexOf ( e . tagName ) !== - 1 ) {
e . removeAttribute ( "width" ) ;
e . removeAttribute ( "height" ) ;
}
var cur = e . firstElementChild ;
while ( cur !== null ) {
this . _cleanStyles ( cur ) ;
cur = cur . nextElementSibling ;
}
} ,
/ * *
* Get the density of links as a percentage of the content
* This is the amount of text that is inside a link divided by the total text in the node .
*
* @ param Element
* @ return number ( float )
* * /
_getLinkDensity : function ( element ) {
var textLength = this . _getInnerText ( element ) . length ;
if ( textLength === 0 )
return 0 ;
var linkLength = 0 ;
// XXX implement _reduceNodeList?
this . _forEachNode ( element . getElementsByTagName ( "a" ) , function ( linkNode ) {
var href = linkNode . getAttribute ( "href" ) ;
var coefficient = href && this . REGEXPS . hashUrl . test ( href ) ? 0.3 : 1 ;
linkLength += this . _getInnerText ( linkNode ) . length * coefficient ;
} ) ;
return linkLength / textLength ;
} ,
/ * *
* Get an elements class / id weight . Uses regular expressions to tell if this
* element looks good or bad .
*
* @ param Element
* @ return number ( Integer )
* * /
_getClassWeight : function ( e ) {
if ( ! this . _flagIsActive ( this . FLAG _WEIGHT _CLASSES ) )
return 0 ;
var weight = 0 ;
// Look for a special classname
if ( typeof ( e . className ) === "string" && e . className !== "" ) {
if ( this . REGEXPS . negative . test ( e . className ) )
weight -= 25 ;
if ( this . REGEXPS . positive . test ( e . className ) )
weight += 25 ;
}
// Look for a special ID
if ( typeof ( e . id ) === "string" && e . id !== "" ) {
if ( this . REGEXPS . negative . test ( e . id ) )
weight -= 25 ;
if ( this . REGEXPS . positive . test ( e . id ) )
weight += 25 ;
}
return weight ;
} ,
/ * *
* Clean a node of all elements of type "tag" .
* ( Unless it ' s a youtube / vimeo video . People love movies . )
*
* @ param Element
* @ param string tag to clean
* @ return void
* * /
_clean : function ( e , tag ) {
var isEmbed = [ "object" , "embed" , "iframe" ] . indexOf ( tag ) !== - 1 ;
this . _removeNodes ( this . _getAllNodesWithTag ( e , [ tag ] ) , function ( element ) {
// Allow youtube and vimeo videos through as people usually want to see those.
if ( isEmbed ) {
// First, check the elements attributes to see if any of them contain youtube or vimeo
for ( var i = 0 ; i < element . attributes . length ; i ++ ) {
if ( this . REGEXPS . videos . test ( element . attributes [ i ] . value ) ) {
return false ;
}
}
// For embed with <object> tag, check inner HTML as well.
if ( element . tagName === "object" && this . REGEXPS . videos . test ( element . innerHTML ) ) {
return false ;
}
}
return true ;
} ) ;
} ,
/ * *
* Check if a given node has one of its ancestor tag name matching the
* provided one .
* @ param HTMLElement node
* @ param String tagName
* @ param Number maxDepth
* @ param Function filterFn a filter to invoke to determine whether this node 'counts'
* @ return Boolean
* /
_hasAncestorTag : function ( node , tagName , maxDepth , filterFn ) {
maxDepth = maxDepth || 3 ;
tagName = tagName . toUpperCase ( ) ;
var depth = 0 ;
while ( node . parentNode ) {
if ( maxDepth > 0 && depth > maxDepth )
return false ;
if ( node . parentNode . tagName === tagName && ( ! filterFn || filterFn ( node . parentNode ) ) )
return true ;
node = node . parentNode ;
depth ++ ;
}
return false ;
} ,
/ * *
* Return an object indicating how many rows and columns this table has .
* /
_getRowAndColumnCount : function ( table ) {
var rows = 0 ;
var columns = 0 ;
var trs = table . getElementsByTagName ( "tr" ) ;
for ( var i = 0 ; i < trs . length ; i ++ ) {
var rowspan = trs [ i ] . getAttribute ( "rowspan" ) || 0 ;
if ( rowspan ) {
rowspan = parseInt ( rowspan , 10 ) ;
}
rows += ( rowspan || 1 ) ;
// Now look for column-related info
var columnsInThisRow = 0 ;
var cells = trs [ i ] . getElementsByTagName ( "td" ) ;
for ( var j = 0 ; j < cells . length ; j ++ ) {
var colspan = cells [ j ] . getAttribute ( "colspan" ) || 0 ;
if ( colspan ) {
colspan = parseInt ( colspan , 10 ) ;
}
columnsInThisRow += ( colspan || 1 ) ;
}
columns = Math . max ( columns , columnsInThisRow ) ;
}
return { rows : rows , columns : columns } ;
} ,
/ * *
* Look for 'data' ( as opposed to 'layout' ) tables , for which we use
* similar checks as
* https : //searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19
* /
_markDataTables : function ( root ) {
var tables = root . getElementsByTagName ( "table" ) ;
for ( var i = 0 ; i < tables . length ; i ++ ) {
var table = tables [ i ] ;
var role = table . getAttribute ( "role" ) ;
if ( role == "presentation" ) {
table . _readabilityDataTable = false ;
continue ;
}
var datatable = table . getAttribute ( "datatable" ) ;
if ( datatable == "0" ) {
table . _readabilityDataTable = false ;
continue ;
}
var summary = table . getAttribute ( "summary" ) ;
if ( summary ) {
table . _readabilityDataTable = true ;
continue ;
}
var caption = table . getElementsByTagName ( "caption" ) [ 0 ] ;
if ( caption && caption . childNodes . length > 0 ) {
table . _readabilityDataTable = true ;
continue ;
}
// If the table has a descendant with any of these tags, consider a data table:
var dataTableDescendants = [ "col" , "colgroup" , "tfoot" , "thead" , "th" ] ;
var descendantExists = function ( tag ) {
return ! ! table . getElementsByTagName ( tag ) [ 0 ] ;
} ;
if ( dataTableDescendants . some ( descendantExists ) ) {
this . log ( "Data table because found data-y descendant" ) ;
table . _readabilityDataTable = true ;
continue ;
}
// Nested tables indicate a layout table:
if ( table . getElementsByTagName ( "table" ) [ 0 ] ) {
table . _readabilityDataTable = false ;
continue ;
}
var sizeInfo = this . _getRowAndColumnCount ( table ) ;
if ( sizeInfo . rows >= 10 || sizeInfo . columns > 4 ) {
table . _readabilityDataTable = true ;
continue ;
}
// Now just go by size entirely:
table . _readabilityDataTable = sizeInfo . rows * sizeInfo . columns > 10 ;
}
} ,
/* convert images and figures that have properties like data-src into images that can be loaded without JS */
_fixLazyImages : function ( root ) {
this . _forEachNode ( this . _getAllNodesWithTag ( root , [ "img" , "picture" , "figure" ] ) , function ( elem ) {
// In some sites (e.g. Kotaku), they put 1px square image as base64 data uri in the src attribute.
// So, here we check if the data uri is too short, just might as well remove it.
if ( elem . src && this . REGEXPS . b64DataUrl . test ( elem . src ) ) {
// Make sure it's not SVG, because SVG can have a meaningful image in under 133 bytes.
var parts = this . REGEXPS . b64DataUrl . exec ( elem . src ) ;
if ( parts [ 1 ] === "image/svg+xml" ) {
return ;
}
// Make sure this element has other attributes which contains image.
// If it doesn't, then this src is important and shouldn't be removed.
var srcCouldBeRemoved = false ;
for ( var i = 0 ; i < elem . attributes . length ; i ++ ) {
var attr = elem . attributes [ i ] ;
if ( attr . name === "src" ) {
continue ;
}
if ( /\.(jpg|jpeg|png|webp)/i . test ( attr . value ) ) {
srcCouldBeRemoved = true ;
break ;
}
}
// Here we assume if image is less than 100 bytes (or 133B after encoded to base64)
// it will be too small, therefore it might be placeholder image.
if ( srcCouldBeRemoved ) {
var b64starts = elem . src . search ( /base64\s*/i ) + 7 ;
var b64length = elem . src . length - b64starts ;
if ( b64length < 133 ) {
elem . removeAttribute ( "src" ) ;
}
}
}
// also check for "null" to work around https://github.com/jsdom/jsdom/issues/2580
if ( ( elem . src || ( elem . srcset && elem . srcset != "null" ) ) && elem . className . toLowerCase ( ) . indexOf ( "lazy" ) === - 1 ) {
return ;
}
for ( var j = 0 ; j < elem . attributes . length ; j ++ ) {
attr = elem . attributes [ j ] ;
if ( attr . name === "src" || attr . name === "srcset" || attr . name === "alt" ) {
continue ;
}
var copyTo = null ;
if ( /\.(jpg|jpeg|png|webp)\s+\d/ . test ( attr . value ) ) {
copyTo = "srcset" ;
} else if ( /^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/ . test ( attr . value ) ) {
copyTo = "src" ;
}
if ( copyTo ) {
//if this is an img or picture, set the attribute directly
if ( elem . tagName === "IMG" || elem . tagName === "PICTURE" ) {
elem . setAttribute ( copyTo , attr . value ) ;
} else if ( elem . tagName === "FIGURE" && ! this . _getAllNodesWithTag ( elem , [ "img" , "picture" ] ) . length ) {
//if the item is a <figure> that does not contain an image or picture, create one and place it inside the figure
//see the nytimes-3 testcase for an example
var img = this . _doc . createElement ( "img" ) ;
img . setAttribute ( copyTo , attr . value ) ;
elem . appendChild ( img ) ;
}
}
}
} ) ;
} ,
_getTextDensity : function ( e , tags ) {
var textLength = this . _getInnerText ( e , true ) . length ;
if ( textLength === 0 ) {
return 0 ;
}
var childrenLength = 0 ;
var children = this . _getAllNodesWithTag ( e , tags ) ;
this . _forEachNode ( children , ( child ) => childrenLength += this . _getInnerText ( child , true ) . length ) ;
return childrenLength / textLength ;
} ,
/ * *
* Clean an element of all tags of type "tag" if they look fishy .
* "Fishy" is an algorithm based on content length , classnames , link density , number of images & embeds , etc .
*
* @ return void
* * /
_cleanConditionally : function ( e , tag ) {
if ( ! this . _flagIsActive ( this . FLAG _CLEAN _CONDITIONALLY ) )
return ;
// Gather counts for other typical elements embedded within.
// Traverse backwards so we can remove nodes at the same time
// without effecting the traversal.
//
// TODO: Consider taking into account original contentScore here.
this . _removeNodes ( this . _getAllNodesWithTag ( e , [ tag ] ) , function ( node ) {
// First check if this node IS data table, in which case don't remove it.
var isDataTable = function ( t ) {
return t . _readabilityDataTable ;
} ;
var isList = tag === "ul" || tag === "ol" ;
if ( ! isList ) {
var listLength = 0 ;
var listNodes = this . _getAllNodesWithTag ( node , [ "ul" , "ol" ] ) ;
this . _forEachNode ( listNodes , ( list ) => listLength += this . _getInnerText ( list ) . length ) ;
isList = listLength / this . _getInnerText ( node ) . length > 0.9 ;
}
if ( tag === "table" && isDataTable ( node ) ) {
return false ;
}
// Next check if we're inside a data table, in which case don't remove it as well.
if ( this . _hasAncestorTag ( node , "table" , - 1 , isDataTable ) ) {
return false ;
}
if ( this . _hasAncestorTag ( node , "code" ) ) {
return false ;
}
var weight = this . _getClassWeight ( node ) ;
this . log ( "Cleaning Conditionally" , node ) ;
var contentScore = 0 ;
if ( weight + contentScore < 0 ) {
return true ;
}
if ( this . _getCharCount ( node , "," ) < 10 ) {
// If there are not very many commas, and the number of
// non-paragraph elements is more than paragraphs or other
// ominous signs, remove the element.
var p = node . getElementsByTagName ( "p" ) . length ;
var img = node . getElementsByTagName ( "img" ) . length ;
var li = node . getElementsByTagName ( "li" ) . length - 100 ;
var input = node . getElementsByTagName ( "input" ) . length ;
var headingDensity = this . _getTextDensity ( node , [ "h1" , "h2" , "h3" , "h4" , "h5" , "h6" ] ) ;
var embedCount = 0 ;
var embeds = this . _getAllNodesWithTag ( node , [ "object" , "embed" , "iframe" ] ) ;
for ( var i = 0 ; i < embeds . length ; i ++ ) {
// If this embed has attribute that matches video regex, don't delete it.
for ( var j = 0 ; j < embeds [ i ] . attributes . length ; j ++ ) {
if ( this . REGEXPS . videos . test ( embeds [ i ] . attributes [ j ] . value ) ) {
return false ;
}
}
// For embed with <object> tag, check inner HTML as well.
if ( embeds [ i ] . tagName === "object" && this . REGEXPS . videos . test ( embeds [ i ] . innerHTML ) ) {
return false ;
}
embedCount ++ ;
}
var linkDensity = this . _getLinkDensity ( node ) ;
var contentLength = this . _getInnerText ( node ) . length ;
var haveToRemove =
( img > 1 && p / img < 0.5 && ! this . _hasAncestorTag ( node , "figure" ) ) ||
( ! isList && li > p ) ||
( input > Math . floor ( p / 3 ) ) ||
( ! isList && headingDensity < 0.9 && contentLength < 25 && ( img === 0 || img > 2 ) && ! this . _hasAncestorTag ( node , "figure" ) ) ||
( ! isList && weight < 25 && linkDensity > 0.2 ) ||
( weight >= 25 && linkDensity > 0.5 ) ||
( ( embedCount === 1 && contentLength < 75 ) || embedCount > 1 ) ;
return haveToRemove ;
}
return false ;
} ) ;
} ,
/ * *
* Clean out elements that match the specified conditions
*
* @ param Element
* @ param Function determines whether a node should be removed
* @ return void
* * /
_cleanMatchedNodes : function ( e , filter ) {
var endOfSearchMarkerNode = this . _getNextNode ( e , true ) ;
var next = this . _getNextNode ( e ) ;
while ( next && next != endOfSearchMarkerNode ) {
if ( filter . call ( this , next , next . className + " " + next . id ) ) {
next = this . _removeAndGetNext ( next ) ;
} else {
next = this . _getNextNode ( next ) ;
}
}
} ,
/ * *
* Clean out spurious headers from an Element .
*
* @ param Element
* @ return void
* * /
_cleanHeaders : function ( e ) {
let headingNodes = this . _getAllNodesWithTag ( e , [ "h1" , "h2" ] ) ;
this . _removeNodes ( headingNodes , function ( node ) {
let shouldRemove = this . _getClassWeight ( node ) < 0 ;
if ( shouldRemove ) {
this . log ( "Removing header with low class weight:" , node ) ;
}
return shouldRemove ;
} ) ;
} ,
/ * *
* Check if this node is an H1 or H2 element whose content is mostly
* the same as the article title .
*
* @ param Element the node to check .
* @ return boolean indicating whether this is a title - like header .
* /
_headerDuplicatesTitle : function ( node ) {
if ( node . tagName != "H1" && node . tagName != "H2" ) {
return false ;
}
var heading = this . _getInnerText ( node , false ) ;
this . log ( "Evaluating similarity of header:" , heading , this . _articleTitle ) ;
return this . _textSimilarity ( this . _articleTitle , heading ) > 0.75 ;
} ,
_flagIsActive : function ( flag ) {
return ( this . _flags & flag ) > 0 ;
} ,
_removeFlag : function ( flag ) {
this . _flags = this . _flags & ~ flag ;
} ,
_isProbablyVisible : function ( node ) {
// Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes.
return ( ! node . style || node . style . display != "none" )
&& ! node . hasAttribute ( "hidden" )
//check for "fallback-image" so that wikimedia math images are displayed
&& ( ! node . hasAttribute ( "aria-hidden" ) || node . getAttribute ( "aria-hidden" ) != "true" || ( node . className && node . className . indexOf && node . className . indexOf ( "fallback-image" ) !== - 1 ) ) ;
} ,
/ * *
* Runs readability .
*
* Workflow :
* 1. Prep the document by removing script tags , css , etc .
* 2. Build readability ' s DOM tree .
* 3. Grab the article content from the current dom tree .
* 4. Replace the current DOM tree with the new one .
* 5. Read peacefully .
*
* @ return void
* * /
parse : function ( ) {
// Avoid parsing too large documents, as per configuration option
if ( this . _maxElemsToParse > 0 ) {
var numTags = this . _doc . getElementsByTagName ( "*" ) . length ;
if ( numTags > this . _maxElemsToParse ) {
throw new Error ( "Aborting parsing document; " + numTags + " elements found" ) ;
}
}
// Unwrap image from noscript
this . _unwrapNoscriptImages ( this . _doc ) ;
// Extract JSON-LD metadata before removing scripts
var jsonLd = this . _disableJSONLD ? { } : this . _getJSONLD ( this . _doc ) ;
// Remove script tags from the document.
this . _removeScripts ( this . _doc ) ;
this . _prepDocument ( ) ;
var metadata = this . _getArticleMetadata ( jsonLd ) ;
this . _articleTitle = metadata . title ;
var articleContent = this . _grabArticle ( ) ;
if ( ! articleContent )
return null ;
this . log ( "Grabbed: " + articleContent . innerHTML ) ;
this . _postProcessContent ( articleContent ) ;
// If we haven't found an excerpt in the article's metadata, use the article's
// first paragraph as the excerpt. This is used for displaying a preview of
// the article's content.
if ( ! metadata . excerpt ) {
var paragraphs = articleContent . getElementsByTagName ( "p" ) ;
if ( paragraphs . length > 0 ) {
metadata . excerpt = paragraphs [ 0 ] . textContent . trim ( ) ;
}
}
var textContent = articleContent . textContent ;
return {
title : this . _articleTitle ,
byline : metadata . byline || this . _articleByline ,
dir : this . _articleDir ,
lang : this . _articleLang ,
content : this . _serializer ( articleContent ) ,
textContent : textContent ,
length : textContent . length ,
excerpt : metadata . excerpt ,
siteName : metadata . siteName || this . _articleSiteName
} ;
}
} ;
{
module . exports = Readability ;
}
} ( Readability$1 ) ) ;
var ReadabilityReaderable = { exports : { } } ;
/* eslint-env es6:false */
( function ( module ) {
/ *
* Copyright ( c ) 2010 Arc90 Inc
*
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
/ *
* This code is heavily based on Arc90 ' s readability . js ( 1.7 . 1 ) script
* available at : http : //code.google.com/p/arc90labs-readability
* /
var REGEXPS = {
// NOTE: These two regular expressions are duplicated in
// Readability.js. Please keep both copies in sync.
unlikelyCandidates : /-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i ,
okMaybeItsACandidate : /and|article|body|column|content|main|shadow/i ,
} ;
function isNodeVisible ( node ) {
// Have to null-check node.style and node.className.indexOf to deal with SVG and MathML nodes.
return ( ! node . style || node . style . display != "none" )
&& ! node . hasAttribute ( "hidden" )
//check for "fallback-image" so that wikimedia math images are displayed
&& ( ! node . hasAttribute ( "aria-hidden" ) || node . getAttribute ( "aria-hidden" ) != "true" || ( node . className && node . className . indexOf && node . className . indexOf ( "fallback-image" ) !== - 1 ) ) ;
}
/ * *
* Decides whether or not the document is reader - able without parsing the whole thing .
* @ param { Object } options Configuration object .
* @ param { number } [ options . minContentLength = 140 ] The minimum node content length used to decide if the document is readerable .
* @ param { number } [ options . minScore = 20 ] The minumum cumulated 'score' used to determine if the document is readerable .
* @ param { Function } [ options . visibilityChecker = isNodeVisible ] The function used to determine if a node is visible .
* @ return { boolean } Whether or not we suspect Readability . parse ( ) will suceeed at returning an article object .
* /
function isProbablyReaderable ( doc , options = { } ) {
// For backward compatibility reasons 'options' can either be a configuration object or the function used
// to determine if a node is visible.
if ( typeof options == "function" ) {
options = { visibilityChecker : options } ;
}
var defaultOptions = { minScore : 20 , minContentLength : 140 , visibilityChecker : isNodeVisible } ;
options = Object . assign ( defaultOptions , options ) ;
var nodes = doc . querySelectorAll ( "p, pre, article" ) ;
// Get <div> nodes which have <br> node(s) and append them into the `nodes` variable.
// Some articles' DOM structures might look like
// <div>
// Sentences<br>
// <br>
// Sentences<br>
// </div>
var brNodes = doc . querySelectorAll ( "div > br" ) ;
if ( brNodes . length ) {
var set = new Set ( nodes ) ;
[ ] . forEach . call ( brNodes , function ( node ) {
set . add ( node . parentNode ) ;
} ) ;
nodes = Array . from ( set ) ;
}
var score = 0 ;
// This is a little cheeky, we use the accumulator 'score' to decide what to return from
// this callback:
return [ ] . some . call ( nodes , function ( node ) {
if ( ! options . visibilityChecker ( node ) ) {
return false ;
}
var matchString = node . className + " " + node . id ;
if ( REGEXPS . unlikelyCandidates . test ( matchString ) &&
! REGEXPS . okMaybeItsACandidate . test ( matchString ) ) {
return false ;
}
if ( node . matches ( "li p" ) ) {
return false ;
}
var textContentLength = node . textContent . trim ( ) . length ;
if ( textContentLength < options . minContentLength ) {
return false ;
}
score += Math . sqrt ( textContentLength - options . minContentLength ) ;
if ( score > options . minScore ) {
return true ;
}
return false ;
} ) ;
}
{
module . exports = isProbablyReaderable ;
}
} ( ReadabilityReaderable ) ) ;
var Readability = Readability$1 . exports ;
var isProbablyReaderable = ReadabilityReaderable . exports ;
var readability = {
Readability : Readability ,
isProbablyReaderable : isProbablyReaderable
} ;
var purify = { exports : { } } ;
/*! @license DOMPurify 2.3.6 | (c) Cure53 and other contributors | Released under the Apache license 2.0 and Mozilla Public License 2.0 | github.com/cure53/DOMPurify/blob/2.3.6/LICENSE */
( function ( module , exports ) {
( function ( global , factory ) {
module . exports = factory ( ) ;
} ( commonjsGlobal , function ( ) {
function _toConsumableArray ( arr ) { if ( Array . isArray ( arr ) ) { for ( var i = 0 , arr2 = Array ( arr . length ) ; i < arr . length ; i ++ ) { arr2 [ i ] = arr [ i ] ; } return arr2 ; } else { return Array . from ( arr ) ; } }
var hasOwnProperty = Object . hasOwnProperty ,
setPrototypeOf = Object . setPrototypeOf ,
isFrozen = Object . isFrozen ,
getPrototypeOf = Object . getPrototypeOf ,
getOwnPropertyDescriptor = Object . getOwnPropertyDescriptor ;
var freeze = Object . freeze ,
seal = Object . seal ,
create = Object . create ; // eslint-disable-line import/no-mutable-exports
var _ref = typeof Reflect !== 'undefined' && Reflect ,
apply = _ref . apply ,
construct = _ref . construct ;
if ( ! apply ) {
apply = function apply ( fun , thisValue , args ) {
return fun . apply ( thisValue , args ) ;
} ;
}
if ( ! freeze ) {
freeze = function freeze ( x ) {
return x ;
} ;
}
if ( ! seal ) {
seal = function seal ( x ) {
return x ;
} ;
}
if ( ! construct ) {
construct = function construct ( Func , args ) {
return new ( Function . prototype . bind . apply ( Func , [ null ] . concat ( _toConsumableArray ( args ) ) ) ) ( ) ;
} ;
}
var arrayForEach = unapply ( Array . prototype . forEach ) ;
var arrayPop = unapply ( Array . prototype . pop ) ;
var arrayPush = unapply ( Array . prototype . push ) ;
var stringToLowerCase = unapply ( String . prototype . toLowerCase ) ;
var stringMatch = unapply ( String . prototype . match ) ;
var stringReplace = unapply ( String . prototype . replace ) ;
var stringIndexOf = unapply ( String . prototype . indexOf ) ;
var stringTrim = unapply ( String . prototype . trim ) ;
var regExpTest = unapply ( RegExp . prototype . test ) ;
var typeErrorCreate = unconstruct ( TypeError ) ;
function unapply ( func ) {
return function ( thisArg ) {
for ( var _len = arguments . length , args = Array ( _len > 1 ? _len - 1 : 0 ) , _key = 1 ; _key < _len ; _key ++ ) {
args [ _key - 1 ] = arguments [ _key ] ;
}
return apply ( func , thisArg , args ) ;
} ;
}
function unconstruct ( func ) {
return function ( ) {
for ( var _len2 = arguments . length , args = Array ( _len2 ) , _key2 = 0 ; _key2 < _len2 ; _key2 ++ ) {
args [ _key2 ] = arguments [ _key2 ] ;
}
return construct ( func , args ) ;
} ;
}
/* Add properties to a lookup table */
function addToSet ( set , array ) {
if ( setPrototypeOf ) {
// Make 'in' and truthy checks like Boolean(set.constructor)
// independent of any properties defined on Object.prototype.
// Prevent prototype setters from intercepting set as a this value.
setPrototypeOf ( set , null ) ;
}
var l = array . length ;
while ( l -- ) {
var element = array [ l ] ;
if ( typeof element === 'string' ) {
var lcElement = stringToLowerCase ( element ) ;
if ( lcElement !== element ) {
// Config presets (e.g. tags.js, attrs.js) are immutable.
if ( ! isFrozen ( array ) ) {
array [ l ] = lcElement ;
}
element = lcElement ;
}
}
set [ element ] = true ;
}
return set ;
}
/* Shallow clone an object */
function clone ( object ) {
var newObject = create ( null ) ;
var property = void 0 ;
for ( property in object ) {
if ( apply ( hasOwnProperty , object , [ property ] ) ) {
newObject [ property ] = object [ property ] ;
}
}
return newObject ;
}
/ * I E 1 0 d o e s n ' t s u p p o r t _ _ l o o k u p G e t t e r _ _ s o l e t s '
* simulate it . It also automatically checks
* if the prop is function or getter and behaves
* accordingly . * /
function lookupGetter ( object , prop ) {
while ( object !== null ) {
var desc = getOwnPropertyDescriptor ( object , prop ) ;
if ( desc ) {
if ( desc . get ) {
return unapply ( desc . get ) ;
}
if ( typeof desc . value === 'function' ) {
return unapply ( desc . value ) ;
}
}
object = getPrototypeOf ( object ) ;
}
function fallbackValue ( element ) {
console . warn ( 'fallback value for' , element ) ;
return null ;
}
return fallbackValue ;
}
var html = freeze ( [ 'a' , 'abbr' , 'acronym' , 'address' , 'area' , 'article' , 'aside' , 'audio' , 'b' , 'bdi' , 'bdo' , 'big' , 'blink' , 'blockquote' , 'body' , 'br' , 'button' , 'canvas' , 'caption' , 'center' , 'cite' , 'code' , 'col' , 'colgroup' , 'content' , 'data' , 'datalist' , 'dd' , 'decorator' , 'del' , 'details' , 'dfn' , 'dialog' , 'dir' , 'div' , 'dl' , 'dt' , 'element' , 'em' , 'fieldset' , 'figcaption' , 'figure' , 'font' , 'footer' , 'form' , 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' , 'head' , 'header' , 'hgroup' , 'hr' , 'html' , 'i' , 'img' , 'input' , 'ins' , 'kbd' , 'label' , 'legend' , 'li' , 'main' , 'map' , 'mark' , 'marquee' , 'menu' , 'menuitem' , 'meter' , 'nav' , 'nobr' , 'ol' , 'optgroup' , 'option' , 'output' , 'p' , 'picture' , 'pre' , 'progress' , 'q' , 'rp' , 'rt' , 'ruby' , 's' , 'samp' , 'section' , 'select' , 'shadow' , 'small' , 'source' , 'spacer' , 'span' , 'strike' , 'strong' , 'style' , 'sub' , 'summary' , 'sup' , 'table' , 'tbody' , 'td' , 'template' , 'textarea' , 'tfoot' , 'th' , 'thead' , 'time' , 'tr' , 'track' , 'tt' , 'u' , 'ul' , 'var' , 'video' , 'wbr' ] ) ;
// SVG
var svg = freeze ( [ 'svg' , 'a' , 'altglyph' , 'altglyphdef' , 'altglyphitem' , 'animatecolor' , 'animatemotion' , 'animatetransform' , 'circle' , 'clippath' , 'defs' , 'desc' , 'ellipse' , 'filter' , 'font' , 'g' , 'glyph' , 'glyphref' , 'hkern' , 'image' , 'line' , 'lineargradient' , 'marker' , 'mask' , 'metadata' , 'mpath' , 'path' , 'pattern' , 'polygon' , 'polyline' , 'radialgradient' , 'rect' , 'stop' , 'style' , 'switch' , 'symbol' , 'text' , 'textpath' , 'title' , 'tref' , 'tspan' , 'view' , 'vkern' ] ) ;
var svgFilters = freeze ( [ 'feBlend' , 'feColorMatrix' , 'feComponentTransfer' , 'feComposite' , 'feConvolveMatrix' , 'feDiffuseLighting' , 'feDisplacementMap' , 'feDistantLight' , 'feFlood' , 'feFuncA' , 'feFuncB' , 'feFuncG' , 'feFuncR' , 'feGaussianBlur' , 'feImage' , 'feMerge' , 'feMergeNode' , 'feMorphology' , 'feOffset' , 'fePointLight' , 'feSpecularLighting' , 'feSpotLight' , 'feTile' , 'feTurbulence' ] ) ;
// List of SVG elements that are disallowed by default.
// We still need to know them so that we can do namespace
// checks properly in case one wants to add them to
// allow-list.
var svgDisallowed = freeze ( [ 'animate' , 'color-profile' , 'cursor' , 'discard' , 'fedropshadow' , 'font-face' , 'font-face-format' , 'font-face-name' , 'font-face-src' , 'font-face-uri' , 'foreignobject' , 'hatch' , 'hatchpath' , 'mesh' , 'meshgradient' , 'meshpatch' , 'meshrow' , 'missing-glyph' , 'script' , 'set' , 'solidcolor' , 'unknown' , 'use' ] ) ;
var mathMl = freeze ( [ 'math' , 'menclose' , 'merror' , 'mfenced' , 'mfrac' , 'mglyph' , 'mi' , 'mlabeledtr' , 'mmultiscripts' , 'mn' , 'mo' , 'mover' , 'mpadded' , 'mphantom' , 'mroot' , 'mrow' , 'ms' , 'mspace' , 'msqrt' , 'mstyle' , 'msub' , 'msup' , 'msubsup' , 'mtable' , 'mtd' , 'mtext' , 'mtr' , 'munder' , 'munderover' ] ) ;
// Similarly to SVG, we want to know all MathML elements,
// even those that we disallow by default.
var mathMlDisallowed = freeze ( [ 'maction' , 'maligngroup' , 'malignmark' , 'mlongdiv' , 'mscarries' , 'mscarry' , 'msgroup' , 'mstack' , 'msline' , 'msrow' , 'semantics' , 'annotation' , 'annotation-xml' , 'mprescripts' , 'none' ] ) ;
var text = freeze ( [ '#text' ] ) ;
var html$1 = freeze ( [ 'accept' , 'action' , 'align' , 'alt' , 'autocapitalize' , 'autocomplete' , 'autopictureinpicture' , 'autoplay' , 'background' , 'bgcolor' , 'border' , 'capture' , 'cellpadding' , 'cellspacing' , 'checked' , 'cite' , 'class' , 'clear' , 'color' , 'cols' , 'colspan' , 'controls' , 'controlslist' , 'coords' , 'crossorigin' , 'datetime' , 'decoding' , 'default' , 'dir' , 'disabled' , 'disablepictureinpicture' , 'disableremoteplayback' , 'download' , 'draggable' , 'enctype' , 'enterkeyhint' , 'face' , 'for' , 'headers' , 'height' , 'hidden' , 'high' , 'href' , 'hreflang' , 'id' , 'inputmode' , 'integrity' , 'ismap' , 'kind' , 'label' , 'lang' , 'list' , 'loading' , 'loop' , 'low' , 'max' , 'maxlength' , 'media' , 'method' , 'min' , 'minlength' , 'multiple' , 'muted' , 'name' , 'nonce' , 'noshade' , 'novalidate' , 'nowrap' , 'open' , 'optimum' , 'pattern' , 'placeholder' , 'playsinline' , 'poster' , 'preload' , 'pubdate' , 'radiogroup' , 'readonly' , 'rel' , 'required' , 'rev' , 'reversed' , 'role' , 'rows' , 'rowspan' , 'spellcheck' , 'scope' , 'selected' , 'shape' , 'size' , 'sizes' , 'span' , 'srclang' , 'start' , 'src' , 'srcset' , 'step' , 'style' , 'summary' , 'tabindex' , 'title' , 'translate' , 'type' , 'usemap' , 'valign' , 'value' , 'width' , 'xmlns' , 'slot' ] ) ;
var svg$1 = freeze ( [ 'accent-height' , 'accumulate' , 'additive' , 'alignment-baseline' , 'ascent' , 'attributename' , 'attributetype' , 'azimuth' , 'basefrequency' , 'baseline-shift' , 'begin' , 'bias' , 'by' , 'class' , 'clip' , 'clippathunits' , 'clip-path' , 'clip-rule' , 'color' , 'color-interpolation' , 'color-interpolation-filters' , 'color-profile' , 'color-rendering' , 'cx' , 'cy' , 'd' , 'dx' , 'dy' , 'diffuseconstant' , 'direction' , 'display' , 'divisor' , 'dur' , 'edgemode' , 'elevation' , 'end' , 'fill' , 'fill-opacity' , 'fill-rule' , 'filter' , 'filterunits' , 'flood-color' , 'flood-opacity' , 'font-family' , 'font-size' , 'font-size-adjust' , 'font-stretch' , 'font-style' , 'font-variant' , 'font-weight' , 'fx' , 'fy' , 'g1' , 'g2' , 'glyph-name' , 'glyphref' , 'gradientunits' , 'gradienttransform' , 'height' , 'href' , 'id' , 'image-rendering' , 'in' , 'in2' , 'k' , 'k1' , 'k2' , 'k3' , 'k4' , 'kerning' , 'keypoints' , 'keysplines' , 'keytimes' , 'lang' , 'lengthadjust' , 'letter-spacing' , 'kernelmatrix' , 'kernelunitlength' , 'lighting-color' , 'local' , 'marker-end' , 'marker-mid' , 'marker-start' , 'markerheight' , 'markerunits' , 'markerwidth' , 'maskcontentunits' , 'maskunits' , 'max' , 'mask' , 'media' , 'method' , 'mode' , 'min' , 'name' , 'numoctaves' , 'offset' , 'operator' , 'opacity' , 'order' , 'orient' , 'orientation' , 'origin' , 'overflow' , 'paint-order' , 'path' , 'pathlength' , 'patterncontentunits' , 'patterntransform' , 'patternunits' , 'points' , 'preservealpha' , 'preserveaspectratio' , 'primitiveunits' , 'r' , 'rx' , 'ry' , 'radius' , 'refx' , 'refy' , 'repeatcount' , 'repeatdur' , 'restart' , 'result' , 'rotate' , 'scale' , 'seed' , 'shape-rendering' , 'specularconstant' , 'specularexponent' , 'spreadmethod' , 'startoffset' , 'stddeviation' , 'stitchtiles' , 'stop-color' , 'stop-opacity' , 'stroke-dasharray' , 'stroke-dashoffset' , 'stroke-linecap' , 'stroke-linejoin' , 'stroke-miterlimit' , 'stroke-opacity' , 'stroke' , 'stroke-width' , 'style' , 'surfacescale' , 'systemlanguage' , 'tabindex' , 'targetx' , 'targety' , 'transform' , 'transform-origin' , 'text-anchor' , 'text-decoration' , 'text-rendering' , 'textlength' , 'type' , 'u1' , 'u2' , 'unicode' , 'values' , 'viewbox' , 'visibility' , 'version' , 'vert-adv-y' , 'vert-origin-x' , 'vert-origin-y' , 'width' , 'word-spacing' , 'wrap' , 'writing-mode' , 'xchannelselector' , 'ychannelselector' , 'x' , 'x1' , 'x2' , 'xmlns' , 'y' , 'y1' , 'y2' , 'z' , 'zoomandpan' ] ) ;
var mathMl$1 = freeze ( [ 'accent' , 'accentunder' , 'align' , 'bevelled' , 'close' , 'columnsalign' , 'columnlines' , 'columnspan' , 'denomalign' , 'depth' , 'dir' , 'display' , 'displaystyle' , 'encoding' , 'fence' , 'frame' , 'height' , 'href' , 'id' , 'largeop' , 'length' , 'linethickness' , 'lspace' , 'lquote' , 'mathbackground' , 'mathcolor' , 'mathsize' , 'mathvariant' , 'maxsize' , 'minsize' , 'movablelimits' , 'notation' , 'numalign' , 'open' , 'rowalign' , 'rowlines' , 'rowspacing' , 'rowspan' , 'rspace' , 'rquote' , 'scriptlevel' , 'scriptminsize' , 'scriptsizemultiplier' , 'selection' , 'separator' , 'separators' , 'stretchy' , 'subscriptshift' , 'supscriptshift' , 'symmetric' , 'voffset' , 'width' , 'xmlns' ] ) ;
var xml = freeze ( [ 'xlink:href' , 'xml:id' , 'xlink:title' , 'xml:space' , 'xmlns:xlink' ] ) ;
// eslint-disable-next-line unicorn/better-regex
var MUSTACHE _EXPR = seal ( /\{\{[\s\S]*|[\s\S]*\}\}/gm ) ; // Specify template detection regex for SAFE_FOR_TEMPLATES mode
var ERB _EXPR = seal ( /<%[\s\S]*|[\s\S]*%>/gm ) ;
var DATA _ATTR = seal ( /^data-[\-\w.\u00B7-\uFFFF]/ ) ; // eslint-disable-line no-useless-escape
var ARIA _ATTR = seal ( /^aria-[\-\w]+$/ ) ; // eslint-disable-line no-useless-escape
var IS _ALLOWED _URI = seal ( /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i // eslint-disable-line no-useless-escape
) ;
var IS _SCRIPT _OR _DATA = seal ( /^(?:\w+script|data):/i ) ;
var ATTR _WHITESPACE = seal ( /[\u0000-\u0020\u00A0\u1680\u180E\u2000-\u2029\u205F\u3000]/g // eslint-disable-line no-control-regex
) ;
var DOCTYPE _NAME = seal ( /^html$/i ) ;
var _typeof = typeof Symbol === "function" && typeof Symbol . iterator === "symbol" ? function ( obj ) { return typeof obj ; } : function ( obj ) { return obj && typeof Symbol === "function" && obj . constructor === Symbol && obj !== Symbol . prototype ? "symbol" : typeof obj ; } ;
function _toConsumableArray$1 ( arr ) { if ( Array . isArray ( arr ) ) { for ( var i = 0 , arr2 = Array ( arr . length ) ; i < arr . length ; i ++ ) { arr2 [ i ] = arr [ i ] ; } return arr2 ; } else { return Array . from ( arr ) ; } }
var getGlobal = function getGlobal ( ) {
return typeof window === 'undefined' ? null : window ;
} ;
/ * *
* Creates a no - op policy for internal use only .
* Don ' t export this function outside this module !
* @ param { ? TrustedTypePolicyFactory } trustedTypes The policy factory .
* @ param { Document } document The document object ( to determine policy name suffix )
* @ return { ? TrustedTypePolicy } The policy created ( or null , if Trusted Types
* are not supported ) .
* /
var _createTrustedTypesPolicy = function _createTrustedTypesPolicy ( trustedTypes , document ) {
if ( ( typeof trustedTypes === 'undefined' ? 'undefined' : _typeof ( trustedTypes ) ) !== 'object' || typeof trustedTypes . createPolicy !== 'function' ) {
return null ;
}
// Allow the callers to control the unique policy name
// by adding a data-tt-policy-suffix to the script element with the DOMPurify.
// Policy creation with duplicate names throws in Trusted Types.
var suffix = null ;
var ATTR _NAME = 'data-tt-policy-suffix' ;
if ( document . currentScript && document . currentScript . hasAttribute ( ATTR _NAME ) ) {
suffix = document . currentScript . getAttribute ( ATTR _NAME ) ;
}
var policyName = 'dompurify' + ( suffix ? '#' + suffix : '' ) ;
try {
return trustedTypes . createPolicy ( policyName , {
createHTML : function createHTML ( html$$1 ) {
return html$$1 ;
}
} ) ;
} catch ( _ ) {
// Policy creation failed (most likely another DOMPurify script has
// already run). Skip creating the policy, as this will only cause errors
// if TT are enforced.
console . warn ( 'TrustedTypes policy ' + policyName + ' could not be created.' ) ;
return null ;
}
} ;
function createDOMPurify ( ) {
var window = arguments . length > 0 && arguments [ 0 ] !== undefined ? arguments [ 0 ] : getGlobal ( ) ;
var DOMPurify = function DOMPurify ( root ) {
return createDOMPurify ( root ) ;
} ;
/ * *
* Version label , exposed for easier checks
* if DOMPurify is up to date or not
* /
DOMPurify . version = '2.3.6' ;
/ * *
* Array of elements that DOMPurify removed during sanitation .
* Empty if nothing was removed .
* /
DOMPurify . removed = [ ] ;
if ( ! window || ! window . document || window . document . nodeType !== 9 ) {
// Not running in a browser, provide a factory function
// so that you can pass your own Window
DOMPurify . isSupported = false ;
return DOMPurify ;
}
var originalDocument = window . document ;
var document = window . document ;
var DocumentFragment = window . DocumentFragment ,
HTMLTemplateElement = window . HTMLTemplateElement ,
Node = window . Node ,
Element = window . Element ,
NodeFilter = window . NodeFilter ,
_window$NamedNodeMap = window . NamedNodeMap ,
NamedNodeMap = _window$NamedNodeMap === undefined ? window . NamedNodeMap || window . MozNamedAttrMap : _window$NamedNodeMap ,
HTMLFormElement = window . HTMLFormElement ,
DOMParser = window . DOMParser ,
trustedTypes = window . trustedTypes ;
var ElementPrototype = Element . prototype ;
var cloneNode = lookupGetter ( ElementPrototype , 'cloneNode' ) ;
var getNextSibling = lookupGetter ( ElementPrototype , 'nextSibling' ) ;
var getChildNodes = lookupGetter ( ElementPrototype , 'childNodes' ) ;
var getParentNode = lookupGetter ( ElementPrototype , 'parentNode' ) ;
// As per issue #47, the web-components registry is inherited by a
// new document created via createHTMLDocument. As per the spec
// (http://w3c.github.io/webcomponents/spec/custom/#creating-and-passing-registries)
// a new empty registry is used when creating a template contents owner
// document, so we use that as our parent document to ensure nothing
// is inherited.
if ( typeof HTMLTemplateElement === 'function' ) {
var template = document . createElement ( 'template' ) ;
if ( template . content && template . content . ownerDocument ) {
document = template . content . ownerDocument ;
}
}
var trustedTypesPolicy = _createTrustedTypesPolicy ( trustedTypes , originalDocument ) ;
var emptyHTML = trustedTypesPolicy ? trustedTypesPolicy . createHTML ( '' ) : '' ;
var _document = document ,
implementation = _document . implementation ,
createNodeIterator = _document . createNodeIterator ,
createDocumentFragment = _document . createDocumentFragment ,
getElementsByTagName = _document . getElementsByTagName ;
var importNode = originalDocument . importNode ;
var documentMode = { } ;
try {
documentMode = clone ( document ) . documentMode ? document . documentMode : { } ;
} catch ( _ ) { }
var hooks = { } ;
/ * *
* Expose whether this browser supports running the full DOMPurify .
* /
DOMPurify . isSupported = typeof getParentNode === 'function' && implementation && typeof implementation . createHTMLDocument !== 'undefined' && documentMode !== 9 ;
var MUSTACHE _EXPR$$1 = MUSTACHE _EXPR ,
ERB _EXPR$$1 = ERB _EXPR ,
DATA _ATTR$$1 = DATA _ATTR ,
ARIA _ATTR$$1 = ARIA _ATTR ,
IS _SCRIPT _OR _DATA$$1 = IS _SCRIPT _OR _DATA ,
ATTR _WHITESPACE$$1 = ATTR _WHITESPACE ;
var IS _ALLOWED _URI$$1 = IS _ALLOWED _URI ;
/ * *
* We consider the elements and attributes below to be safe . Ideally
* don ' t add any new ones but feel free to remove unwanted ones .
* /
/* allowed element names */
var ALLOWED _TAGS = null ;
var DEFAULT _ALLOWED _TAGS = addToSet ( { } , [ ] . concat ( _toConsumableArray$1 ( html ) , _toConsumableArray$1 ( svg ) , _toConsumableArray$1 ( svgFilters ) , _toConsumableArray$1 ( mathMl ) , _toConsumableArray$1 ( text ) ) ) ;
/* Allowed attribute names */
var ALLOWED _ATTR = null ;
var DEFAULT _ALLOWED _ATTR = addToSet ( { } , [ ] . concat ( _toConsumableArray$1 ( html$1 ) , _toConsumableArray$1 ( svg$1 ) , _toConsumableArray$1 ( mathMl$1 ) , _toConsumableArray$1 ( xml ) ) ) ;
/ *
* Configure how DOMPUrify should handle custom elements and their attributes as well as customized built - in elements .
* @ property { RegExp | Function | null } tagNameCheck one of [ null , regexPattern , predicate ] . Default : ` null ` ( disallow any custom elements )
* @ property { RegExp | Function | null } attributeNameCheck one of [ null , regexPattern , predicate ] . Default : ` null ` ( disallow any attributes not on the allow list )
* @ property { boolean } allowCustomizedBuiltInElements allow custom elements derived from built - ins if they pass CUSTOM _ELEMENT _HANDLING . tagNameCheck . Default : ` false ` .
* /
var CUSTOM _ELEMENT _HANDLING = Object . seal ( Object . create ( null , {
tagNameCheck : {
writable : true ,
configurable : false ,
enumerable : true ,
value : null
} ,
attributeNameCheck : {
writable : true ,
configurable : false ,
enumerable : true ,
value : null
} ,
allowCustomizedBuiltInElements : {
writable : true ,
configurable : false ,
enumerable : true ,
value : false
}
} ) ) ;
/* Explicitly forbidden tags (overrides ALLOWED_TAGS/ADD_TAGS) */
var FORBID _TAGS = null ;
/* Explicitly forbidden attributes (overrides ALLOWED_ATTR/ADD_ATTR) */
var FORBID _ATTR = null ;
/* Decide if ARIA attributes are okay */
var ALLOW _ARIA _ATTR = true ;
/* Decide if custom data attributes are okay */
var ALLOW _DATA _ATTR = true ;
/* Decide if unknown protocols are okay */
var ALLOW _UNKNOWN _PROTOCOLS = false ;
/ * O u t p u t s h o u l d b e s a f e f o r c o m m o n t e m p l a t e e n g i n e s .
* This means , DOMPurify removes data attributes , mustaches and ERB
* /
var SAFE _FOR _TEMPLATES = false ;
/* Decide if document with <html>... should be returned */
var WHOLE _DOCUMENT = false ;
/* Track whether config is already set on this instance of DOMPurify. */
var SET _CONFIG = false ;
/ * D e c i d e i f a l l e l e m e n t s ( e . g . s t y l e , s c r i p t ) m u s t b e c h i l d r e n o f
* document . body . By default , browsers might move them to document . head * /
var FORCE _BODY = false ;
/ * D e c i d e i f a D O M ` H T M L B o d y E l e m e n t ` s h o u l d b e r e t u r n e d , i n s t e a d o f a h t m l
* string ( or a TrustedHTML object if Trusted Types are supported ) .
* If ` WHOLE_DOCUMENT ` is enabled a ` HTMLHtmlElement ` will be returned instead
* /
var RETURN _DOM = false ;
/ * D e c i d e i f a D O M ` D o c u m e n t F r a g m e n t ` s h o u l d b e r e t u r n e d , i n s t e a d o f a h t m l
* string ( or a TrustedHTML object if Trusted Types are supported ) * /
var RETURN _DOM _FRAGMENT = false ;
/ * T r y t o r e t u r n a T r u s t e d T y p e o b j e c t i n s t e a d o f a s t r i n g , r e t u r n a s t r i n g i n
* case Trusted Types are not supported * /
var RETURN _TRUSTED _TYPE = false ;
/* Output should be free from DOM clobbering attacks? */
var SANITIZE _DOM = true ;
/* Keep element content when removing element? */
var KEEP _CONTENT = true ;
/ * I f a ` N o d e ` i s p a s s e d t o s a n i t i z e ( ) , t h e n p e r f o r m s s a n i t i z a t i o n i n - p l a c e i n s t e a d
* of importing it into a new Document and returning a sanitized copy * /
var IN _PLACE = false ;
/* Allow usage of profiles like html, svg and mathMl */
var USE _PROFILES = { } ;
/* Tags to ignore content of when KEEP_CONTENT is true */
var FORBID _CONTENTS = null ;
var DEFAULT _FORBID _CONTENTS = addToSet ( { } , [ 'annotation-xml' , 'audio' , 'colgroup' , 'desc' , 'foreignobject' , 'head' , 'iframe' , 'math' , 'mi' , 'mn' , 'mo' , 'ms' , 'mtext' , 'noembed' , 'noframes' , 'noscript' , 'plaintext' , 'script' , 'style' , 'svg' , 'template' , 'thead' , 'title' , 'video' , 'xmp' ] ) ;
/* Tags that are safe for data: URIs */
var DATA _URI _TAGS = null ;
var DEFAULT _DATA _URI _TAGS = addToSet ( { } , [ 'audio' , 'video' , 'img' , 'source' , 'image' , 'track' ] ) ;
/* Attributes safe for values like "javascript:" */
var URI _SAFE _ATTRIBUTES = null ;
var DEFAULT _URI _SAFE _ATTRIBUTES = addToSet ( { } , [ 'alt' , 'class' , 'for' , 'id' , 'label' , 'name' , 'pattern' , 'placeholder' , 'role' , 'summary' , 'title' , 'value' , 'style' , 'xmlns' ] ) ;
var MATHML _NAMESPACE = 'http://www.w3.org/1998/Math/MathML' ;
var SVG _NAMESPACE = 'http://www.w3.org/2000/svg' ;
var HTML _NAMESPACE = 'http://www.w3.org/1999/xhtml' ;
/* Document namespace */
var NAMESPACE = HTML _NAMESPACE ;
var IS _EMPTY _INPUT = false ;
/* Parsing of strict XHTML documents */
var PARSER _MEDIA _TYPE = void 0 ;
var SUPPORTED _PARSER _MEDIA _TYPES = [ 'application/xhtml+xml' , 'text/html' ] ;
var DEFAULT _PARSER _MEDIA _TYPE = 'text/html' ;
var transformCaseFunc = void 0 ;
/* Keep a reference to config to pass to hooks */
var CONFIG = null ;
/* Ideally, do not touch anything below this line */
/* ______________________________________________ */
var formElement = document . createElement ( 'form' ) ;
var isRegexOrFunction = function isRegexOrFunction ( testValue ) {
return testValue instanceof RegExp || testValue instanceof Function ;
} ;
/ * *
* _parseConfig
*
* @ param { Object } cfg optional config literal
* /
// eslint-disable-next-line complexity
var _parseConfig = function _parseConfig ( cfg ) {
if ( CONFIG && CONFIG === cfg ) {
return ;
}
/* Shield configuration object from tampering */
if ( ! cfg || ( typeof cfg === 'undefined' ? 'undefined' : _typeof ( cfg ) ) !== 'object' ) {
cfg = { } ;
}
/* Shield configuration object from prototype pollution */
cfg = clone ( cfg ) ;
/* Set configuration parameters */
ALLOWED _TAGS = 'ALLOWED_TAGS' in cfg ? addToSet ( { } , cfg . ALLOWED _TAGS ) : DEFAULT _ALLOWED _TAGS ;
ALLOWED _ATTR = 'ALLOWED_ATTR' in cfg ? addToSet ( { } , cfg . ALLOWED _ATTR ) : DEFAULT _ALLOWED _ATTR ;
URI _SAFE _ATTRIBUTES = 'ADD_URI_SAFE_ATTR' in cfg ? addToSet ( clone ( DEFAULT _URI _SAFE _ATTRIBUTES ) , cfg . ADD _URI _SAFE _ATTR ) : DEFAULT _URI _SAFE _ATTRIBUTES ;
DATA _URI _TAGS = 'ADD_DATA_URI_TAGS' in cfg ? addToSet ( clone ( DEFAULT _DATA _URI _TAGS ) , cfg . ADD _DATA _URI _TAGS ) : DEFAULT _DATA _URI _TAGS ;
FORBID _CONTENTS = 'FORBID_CONTENTS' in cfg ? addToSet ( { } , cfg . FORBID _CONTENTS ) : DEFAULT _FORBID _CONTENTS ;
FORBID _TAGS = 'FORBID_TAGS' in cfg ? addToSet ( { } , cfg . FORBID _TAGS ) : { } ;
FORBID _ATTR = 'FORBID_ATTR' in cfg ? addToSet ( { } , cfg . FORBID _ATTR ) : { } ;
USE _PROFILES = 'USE_PROFILES' in cfg ? cfg . USE _PROFILES : false ;
ALLOW _ARIA _ATTR = cfg . ALLOW _ARIA _ATTR !== false ; // Default true
ALLOW _DATA _ATTR = cfg . ALLOW _DATA _ATTR !== false ; // Default true
ALLOW _UNKNOWN _PROTOCOLS = cfg . ALLOW _UNKNOWN _PROTOCOLS || false ; // Default false
SAFE _FOR _TEMPLATES = cfg . SAFE _FOR _TEMPLATES || false ; // Default false
WHOLE _DOCUMENT = cfg . WHOLE _DOCUMENT || false ; // Default false
RETURN _DOM = cfg . RETURN _DOM || false ; // Default false
RETURN _DOM _FRAGMENT = cfg . RETURN _DOM _FRAGMENT || false ; // Default false
RETURN _TRUSTED _TYPE = cfg . RETURN _TRUSTED _TYPE || false ; // Default false
FORCE _BODY = cfg . FORCE _BODY || false ; // Default false
SANITIZE _DOM = cfg . SANITIZE _DOM !== false ; // Default true
KEEP _CONTENT = cfg . KEEP _CONTENT !== false ; // Default true
IN _PLACE = cfg . IN _PLACE || false ; // Default false
IS _ALLOWED _URI$$1 = cfg . ALLOWED _URI _REGEXP || IS _ALLOWED _URI$$1 ;
NAMESPACE = cfg . NAMESPACE || HTML _NAMESPACE ;
if ( cfg . CUSTOM _ELEMENT _HANDLING && isRegexOrFunction ( cfg . CUSTOM _ELEMENT _HANDLING . tagNameCheck ) ) {
CUSTOM _ELEMENT _HANDLING . tagNameCheck = cfg . CUSTOM _ELEMENT _HANDLING . tagNameCheck ;
}
if ( cfg . CUSTOM _ELEMENT _HANDLING && isRegexOrFunction ( cfg . CUSTOM _ELEMENT _HANDLING . attributeNameCheck ) ) {
CUSTOM _ELEMENT _HANDLING . attributeNameCheck = cfg . CUSTOM _ELEMENT _HANDLING . attributeNameCheck ;
}
if ( cfg . CUSTOM _ELEMENT _HANDLING && typeof cfg . CUSTOM _ELEMENT _HANDLING . allowCustomizedBuiltInElements === 'boolean' ) {
CUSTOM _ELEMENT _HANDLING . allowCustomizedBuiltInElements = cfg . CUSTOM _ELEMENT _HANDLING . allowCustomizedBuiltInElements ;
}
PARSER _MEDIA _TYPE =
// eslint-disable-next-line unicorn/prefer-includes
SUPPORTED _PARSER _MEDIA _TYPES . indexOf ( cfg . PARSER _MEDIA _TYPE ) === - 1 ? PARSER _MEDIA _TYPE = DEFAULT _PARSER _MEDIA _TYPE : PARSER _MEDIA _TYPE = cfg . PARSER _MEDIA _TYPE ;
// HTML tags and attributes are not case-sensitive, converting to lowercase. Keeping XHTML as is.
transformCaseFunc = PARSER _MEDIA _TYPE === 'application/xhtml+xml' ? function ( x ) {
return x ;
} : stringToLowerCase ;
if ( SAFE _FOR _TEMPLATES ) {
ALLOW _DATA _ATTR = false ;
}
if ( RETURN _DOM _FRAGMENT ) {
RETURN _DOM = true ;
}
/* Parse profile info */
if ( USE _PROFILES ) {
ALLOWED _TAGS = addToSet ( { } , [ ] . concat ( _toConsumableArray$1 ( text ) ) ) ;
ALLOWED _ATTR = [ ] ;
if ( USE _PROFILES . html === true ) {
addToSet ( ALLOWED _TAGS , html ) ;
addToSet ( ALLOWED _ATTR , html$1 ) ;
}
if ( USE _PROFILES . svg === true ) {
addToSet ( ALLOWED _TAGS , svg ) ;
addToSet ( ALLOWED _ATTR , svg$1 ) ;
addToSet ( ALLOWED _ATTR , xml ) ;
}
if ( USE _PROFILES . svgFilters === true ) {
addToSet ( ALLOWED _TAGS , svgFilters ) ;
addToSet ( ALLOWED _ATTR , svg$1 ) ;
addToSet ( ALLOWED _ATTR , xml ) ;
}
if ( USE _PROFILES . mathMl === true ) {
addToSet ( ALLOWED _TAGS , mathMl ) ;
addToSet ( ALLOWED _ATTR , mathMl$1 ) ;
addToSet ( ALLOWED _ATTR , xml ) ;
}
}
/* Merge configuration parameters */
if ( cfg . ADD _TAGS ) {
if ( ALLOWED _TAGS === DEFAULT _ALLOWED _TAGS ) {
ALLOWED _TAGS = clone ( ALLOWED _TAGS ) ;
}
addToSet ( ALLOWED _TAGS , cfg . ADD _TAGS ) ;
}
if ( cfg . ADD _ATTR ) {
if ( ALLOWED _ATTR === DEFAULT _ALLOWED _ATTR ) {
ALLOWED _ATTR = clone ( ALLOWED _ATTR ) ;
}
addToSet ( ALLOWED _ATTR , cfg . ADD _ATTR ) ;
}
if ( cfg . ADD _URI _SAFE _ATTR ) {
addToSet ( URI _SAFE _ATTRIBUTES , cfg . ADD _URI _SAFE _ATTR ) ;
}
if ( cfg . FORBID _CONTENTS ) {
if ( FORBID _CONTENTS === DEFAULT _FORBID _CONTENTS ) {
FORBID _CONTENTS = clone ( FORBID _CONTENTS ) ;
}
addToSet ( FORBID _CONTENTS , cfg . FORBID _CONTENTS ) ;
}
/* Add #text in case KEEP_CONTENT is set to true */
if ( KEEP _CONTENT ) {
ALLOWED _TAGS [ '#text' ] = true ;
}
/* Add html, head and body to ALLOWED_TAGS in case WHOLE_DOCUMENT is true */
if ( WHOLE _DOCUMENT ) {
addToSet ( ALLOWED _TAGS , [ 'html' , 'head' , 'body' ] ) ;
}
/* Add tbody to ALLOWED_TAGS in case tables are permitted, see #286, #365 */
if ( ALLOWED _TAGS . table ) {
addToSet ( ALLOWED _TAGS , [ 'tbody' ] ) ;
delete FORBID _TAGS . tbody ;
}
// Prevent further manipulation of configuration.
// Not available in IE8, Safari 5, etc.
if ( freeze ) {
freeze ( cfg ) ;
}
CONFIG = cfg ;
} ;
var MATHML _TEXT _INTEGRATION _POINTS = addToSet ( { } , [ 'mi' , 'mo' , 'mn' , 'ms' , 'mtext' ] ) ;
var HTML _INTEGRATION _POINTS = addToSet ( { } , [ 'foreignobject' , 'desc' , 'title' , 'annotation-xml' ] ) ;
/ * K e e p t r a c k o f a l l p o s s i b l e S V G a n d M a t h M L t a g s
* so that we can perform the namespace checks
* correctly . * /
var ALL _SVG _TAGS = addToSet ( { } , svg ) ;
addToSet ( ALL _SVG _TAGS , svgFilters ) ;
addToSet ( ALL _SVG _TAGS , svgDisallowed ) ;
var ALL _MATHML _TAGS = addToSet ( { } , mathMl ) ;
addToSet ( ALL _MATHML _TAGS , mathMlDisallowed ) ;
/ * *
*
*
* @ param { Element } element a DOM element whose namespace is being checked
* @ returns { boolean } Return false if the element has a
* namespace that a spec - compliant parser would never
* return . Return true otherwise .
* /
var _checkValidNamespace = function _checkValidNamespace ( element ) {
var parent = getParentNode ( element ) ;
// In JSDOM, if we're inside shadow DOM, then parentNode
// can be null. We just simulate parent in this case.
if ( ! parent || ! parent . tagName ) {
parent = {
namespaceURI : HTML _NAMESPACE ,
tagName : 'template'
} ;
}
var tagName = stringToLowerCase ( element . tagName ) ;
var parentTagName = stringToLowerCase ( parent . tagName ) ;
if ( element . namespaceURI === SVG _NAMESPACE ) {
// The only way to switch from HTML namespace to SVG
// is via <svg>. If it happens via any other tag, then
// it should be killed.
if ( parent . namespaceURI === HTML _NAMESPACE ) {
return tagName === 'svg' ;
}
// The only way to switch from MathML to SVG is via
// svg if parent is either <annotation-xml> or MathML
// text integration points.
if ( parent . namespaceURI === MATHML _NAMESPACE ) {
return tagName === 'svg' && ( parentTagName === 'annotation-xml' || MATHML _TEXT _INTEGRATION _POINTS [ parentTagName ] ) ;
}
// We only allow elements that are defined in SVG
// spec. All others are disallowed in SVG namespace.
return Boolean ( ALL _SVG _TAGS [ tagName ] ) ;
}
if ( element . namespaceURI === MATHML _NAMESPACE ) {
// The only way to switch from HTML namespace to MathML
// is via <math>. If it happens via any other tag, then
// it should be killed.
if ( parent . namespaceURI === HTML _NAMESPACE ) {
return tagName === 'math' ;
}
// The only way to switch from SVG to MathML is via
// <math> and HTML integration points
if ( parent . namespaceURI === SVG _NAMESPACE ) {
return tagName === 'math' && HTML _INTEGRATION _POINTS [ parentTagName ] ;
}
// We only allow elements that are defined in MathML
// spec. All others are disallowed in MathML namespace.
return Boolean ( ALL _MATHML _TAGS [ tagName ] ) ;
}
if ( element . namespaceURI === HTML _NAMESPACE ) {
// The only way to switch from SVG to HTML is via
// HTML integration points, and from MathML to HTML
// is via MathML text integration points
if ( parent . namespaceURI === SVG _NAMESPACE && ! HTML _INTEGRATION _POINTS [ parentTagName ] ) {
return false ;
}
if ( parent . namespaceURI === MATHML _NAMESPACE && ! MATHML _TEXT _INTEGRATION _POINTS [ parentTagName ] ) {
return false ;
}
// Certain elements are allowed in both SVG and HTML
// namespace. We need to specify them explicitly
// so that they don't get erronously deleted from
// HTML namespace.
var commonSvgAndHTMLElements = addToSet ( { } , [ 'title' , 'style' , 'font' , 'a' , 'script' ] ) ;
// We disallow tags that are specific for MathML
// or SVG and should never appear in HTML namespace
return ! ALL _MATHML _TAGS [ tagName ] && ( commonSvgAndHTMLElements [ tagName ] || ! ALL _SVG _TAGS [ tagName ] ) ;
}
// The code should never reach this place (this means
// that the element somehow got namespace that is not
// HTML, SVG or MathML). Return false just in case.
return false ;
} ;
/ * *
* _forceRemove
*
* @ param { Node } node a DOM node
* /
var _forceRemove = function _forceRemove ( node ) {
arrayPush ( DOMPurify . removed , { element : node } ) ;
try {
// eslint-disable-next-line unicorn/prefer-dom-node-remove
node . parentNode . removeChild ( node ) ;
} catch ( _ ) {
try {
node . outerHTML = emptyHTML ;
} catch ( _ ) {
node . remove ( ) ;
}
}
} ;
/ * *
* _removeAttribute
*
* @ param { String } name an Attribute name
* @ param { Node } node a DOM node
* /
var _removeAttribute = function _removeAttribute ( name , node ) {
try {
arrayPush ( DOMPurify . removed , {
attribute : node . getAttributeNode ( name ) ,
from : node
} ) ;
} catch ( _ ) {
arrayPush ( DOMPurify . removed , {
attribute : null ,
from : node
} ) ;
}
node . removeAttribute ( name ) ;
// We void attribute values for unremovable "is"" attributes
if ( name === 'is' && ! ALLOWED _ATTR [ name ] ) {
if ( RETURN _DOM || RETURN _DOM _FRAGMENT ) {
try {
_forceRemove ( node ) ;
} catch ( _ ) { }
} else {
try {
node . setAttribute ( name , '' ) ;
} catch ( _ ) { }
}
}
} ;
/ * *
* _initDocument
*
* @ param { String } dirty a string of dirty markup
* @ return { Document } a DOM , filled with the dirty markup
* /
var _initDocument = function _initDocument ( dirty ) {
/* Create a HTML document */
var doc = void 0 ;
var leadingWhitespace = void 0 ;
if ( FORCE _BODY ) {
dirty = '<remove></remove>' + dirty ;
} else {
/* If FORCE_BODY isn't used, leading whitespace needs to be preserved manually */
var matches = stringMatch ( dirty , /^[\r\n\t ]+/ ) ;
leadingWhitespace = matches && matches [ 0 ] ;
}
if ( PARSER _MEDIA _TYPE === 'application/xhtml+xml' ) {
// Root of XHTML doc must contain xmlns declaration (see https://www.w3.org/TR/xhtml1/normative.html#strict)
dirty = '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body>' + dirty + '</body></html>' ;
}
var dirtyPayload = trustedTypesPolicy ? trustedTypesPolicy . createHTML ( dirty ) : dirty ;
/ *
* Use the DOMParser API by default , fallback later if needs be
* DOMParser not work for svg when has multiple root element .
* /
if ( NAMESPACE === HTML _NAMESPACE ) {
try {
doc = new DOMParser ( ) . parseFromString ( dirtyPayload , PARSER _MEDIA _TYPE ) ;
} catch ( _ ) { }
}
/* Use createHTMLDocument in case DOMParser is not available */
if ( ! doc || ! doc . documentElement ) {
doc = implementation . createDocument ( NAMESPACE , 'template' , null ) ;
try {
doc . documentElement . innerHTML = IS _EMPTY _INPUT ? '' : dirtyPayload ;
} catch ( _ ) {
// Syntax error if dirtyPayload is invalid xml
}
}
var body = doc . body || doc . documentElement ;
if ( dirty && leadingWhitespace ) {
body . insertBefore ( document . createTextNode ( leadingWhitespace ) , body . childNodes [ 0 ] || null ) ;
}
/* Work on whole document or just its body */
if ( NAMESPACE === HTML _NAMESPACE ) {
return getElementsByTagName . call ( doc , WHOLE _DOCUMENT ? 'html' : 'body' ) [ 0 ] ;
}
return WHOLE _DOCUMENT ? doc . documentElement : body ;
} ;
/ * *
* _createIterator
*
* @ param { Document } root document / fragment to create iterator for
* @ return { Iterator } iterator instance
* /
var _createIterator = function _createIterator ( root ) {
return createNodeIterator . call ( root . ownerDocument || root , root ,
// eslint-disable-next-line no-bitwise
NodeFilter . SHOW _ELEMENT | NodeFilter . SHOW _COMMENT | NodeFilter . SHOW _TEXT , null , false ) ;
} ;
/ * *
* _isClobbered
*
* @ param { Node } elm element to check for clobbering attacks
* @ return { Boolean } true if clobbered , false if safe
* /
var _isClobbered = function _isClobbered ( elm ) {
return elm instanceof HTMLFormElement && ( typeof elm . nodeName !== 'string' || typeof elm . textContent !== 'string' || typeof elm . removeChild !== 'function' || ! ( elm . attributes instanceof NamedNodeMap ) || typeof elm . removeAttribute !== 'function' || typeof elm . setAttribute !== 'function' || typeof elm . namespaceURI !== 'string' || typeof elm . insertBefore !== 'function' ) ;
} ;
/ * *
* _isNode
*
* @ param { Node } obj object to check whether it ' s a DOM node
* @ return { Boolean } true is object is a DOM node
* /
var _isNode = function _isNode ( object ) {
return ( typeof Node === 'undefined' ? 'undefined' : _typeof ( Node ) ) === 'object' ? object instanceof Node : object && ( typeof object === 'undefined' ? 'undefined' : _typeof ( object ) ) === 'object' && typeof object . nodeType === 'number' && typeof object . nodeName === 'string' ;
} ;
/ * *
* _executeHook
* Execute user configurable hooks
*
* @ param { String } entryPoint Name of the hook ' s entry point
* @ param { Node } currentNode node to work on with the hook
* @ param { Object } data additional hook parameters
* /
var _executeHook = function _executeHook ( entryPoint , currentNode , data ) {
if ( ! hooks [ entryPoint ] ) {
return ;
}
arrayForEach ( hooks [ entryPoint ] , function ( hook ) {
hook . call ( DOMPurify , currentNode , data , CONFIG ) ;
} ) ;
} ;
/ * *
* _sanitizeElements
*
* @ protect nodeName
* @ protect textContent
* @ protect removeChild
*
* @ param { Node } currentNode to check for permission to exist
* @ return { Boolean } true if node was killed , false if left alive
* /
var _sanitizeElements = function _sanitizeElements ( currentNode ) {
var content = void 0 ;
/* Execute a hook if present */
_executeHook ( 'beforeSanitizeElements' , currentNode , null ) ;
/* Check if element is clobbered or can clobber */
if ( _isClobbered ( currentNode ) ) {
_forceRemove ( currentNode ) ;
return true ;
}
/* Check if tagname contains Unicode */
if ( stringMatch ( currentNode . nodeName , /[\u0080-\uFFFF]/ ) ) {
_forceRemove ( currentNode ) ;
return true ;
}
/* Now let's check the element's type and name */
var tagName = transformCaseFunc ( currentNode . nodeName ) ;
/* Execute a hook if present */
_executeHook ( 'uponSanitizeElement' , currentNode , {
tagName : tagName ,
allowedTags : ALLOWED _TAGS
} ) ;
/* Detect mXSS attempts abusing namespace confusion */
if ( ! _isNode ( currentNode . firstElementChild ) && ( ! _isNode ( currentNode . content ) || ! _isNode ( currentNode . content . firstElementChild ) ) && regExpTest ( /<[/\w]/g , currentNode . innerHTML ) && regExpTest ( /<[/\w]/g , currentNode . textContent ) ) {
_forceRemove ( currentNode ) ;
return true ;
}
/* Mitigate a problem with templates inside select */
if ( tagName === 'select' && regExpTest ( /<template/i , currentNode . innerHTML ) ) {
_forceRemove ( currentNode ) ;
return true ;
}
/* Remove element if anything forbids its presence */
if ( ! ALLOWED _TAGS [ tagName ] || FORBID _TAGS [ tagName ] ) {
/* Check if we have a custom element to handle */
if ( ! FORBID _TAGS [ tagName ] && _basicCustomElementTest ( tagName ) ) {
if ( CUSTOM _ELEMENT _HANDLING . tagNameCheck instanceof RegExp && regExpTest ( CUSTOM _ELEMENT _HANDLING . tagNameCheck , tagName ) ) return false ;
if ( CUSTOM _ELEMENT _HANDLING . tagNameCheck instanceof Function && CUSTOM _ELEMENT _HANDLING . tagNameCheck ( tagName ) ) return false ;
}
/* Keep content except for bad-listed elements */
if ( KEEP _CONTENT && ! FORBID _CONTENTS [ tagName ] ) {
var parentNode = getParentNode ( currentNode ) || currentNode . parentNode ;
var childNodes = getChildNodes ( currentNode ) || currentNode . childNodes ;
if ( childNodes && parentNode ) {
var childCount = childNodes . length ;
for ( var i = childCount - 1 ; i >= 0 ; -- i ) {
parentNode . insertBefore ( cloneNode ( childNodes [ i ] , true ) , getNextSibling ( currentNode ) ) ;
}
}
}
_forceRemove ( currentNode ) ;
return true ;
}
/* Check whether element has a valid namespace */
if ( currentNode instanceof Element && ! _checkValidNamespace ( currentNode ) ) {
_forceRemove ( currentNode ) ;
return true ;
}
if ( ( tagName === 'noscript' || tagName === 'noembed' ) && regExpTest ( /<\/no(script|embed)/i , currentNode . innerHTML ) ) {
_forceRemove ( currentNode ) ;
return true ;
}
/* Sanitize element content to be template-safe */
if ( SAFE _FOR _TEMPLATES && currentNode . nodeType === 3 ) {
/* Get the element's text content */
content = currentNode . textContent ;
content = stringReplace ( content , MUSTACHE _EXPR$$1 , ' ' ) ;
content = stringReplace ( content , ERB _EXPR$$1 , ' ' ) ;
if ( currentNode . textContent !== content ) {
arrayPush ( DOMPurify . removed , { element : currentNode . cloneNode ( ) } ) ;
currentNode . textContent = content ;
}
}
/* Execute a hook if present */
_executeHook ( 'afterSanitizeElements' , currentNode , null ) ;
return false ;
} ;
/ * *
* _isValidAttribute
*
* @ param { string } lcTag Lowercase tag name of containing element .
* @ param { string } lcName Lowercase attribute name .
* @ param { string } value Attribute value .
* @ return { Boolean } Returns true if ` value ` is valid , otherwise false .
* /
// eslint-disable-next-line complexity
var _isValidAttribute = function _isValidAttribute ( lcTag , lcName , value ) {
/* Make sure attribute cannot clobber */
if ( SANITIZE _DOM && ( lcName === 'id' || lcName === 'name' ) && ( value in document || value in formElement ) ) {
return false ;
}
/ * A l l o w v a l i d d a t a - * a t t r i b u t e s : A t l e a s t o n e c h a r a c t e r a f t e r " - "
( https : //html.spec.whatwg.org/multipage/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes)
XML - compatible ( https : //html.spec.whatwg.org/multipage/infrastructure.html#xml-compatible and http://www.w3.org/TR/xml/#d0e804)
We don 't need to check the value; it' s always URI safe . * /
if ( ALLOW _DATA _ATTR && ! FORBID _ATTR [ lcName ] && regExpTest ( DATA _ATTR$$1 , lcName ) ) ; else if ( ALLOW _ARIA _ATTR && regExpTest ( ARIA _ATTR$$1 , lcName ) ) ; else if ( ! ALLOWED _ATTR [ lcName ] || FORBID _ATTR [ lcName ] ) {
if (
// First condition does a very basic check if a) it's basically a valid custom element tagname AND
// b) if the tagName passes whatever the user has configured for CUSTOM_ELEMENT_HANDLING.tagNameCheck
// and c) if the attribute name passes whatever the user has configured for CUSTOM_ELEMENT_HANDLING.attributeNameCheck
_basicCustomElementTest ( lcTag ) && ( CUSTOM _ELEMENT _HANDLING . tagNameCheck instanceof RegExp && regExpTest ( CUSTOM _ELEMENT _HANDLING . tagNameCheck , lcTag ) || CUSTOM _ELEMENT _HANDLING . tagNameCheck instanceof Function && CUSTOM _ELEMENT _HANDLING . tagNameCheck ( lcTag ) ) && ( CUSTOM _ELEMENT _HANDLING . attributeNameCheck instanceof RegExp && regExpTest ( CUSTOM _ELEMENT _HANDLING . attributeNameCheck , lcName ) || CUSTOM _ELEMENT _HANDLING . attributeNameCheck instanceof Function && CUSTOM _ELEMENT _HANDLING . attributeNameCheck ( lcName ) ) ||
// Alternative, second condition checks if it's an `is`-attribute, AND
// the value passes whatever the user has configured for CUSTOM_ELEMENT_HANDLING.tagNameCheck
lcName === 'is' && CUSTOM _ELEMENT _HANDLING . allowCustomizedBuiltInElements && ( CUSTOM _ELEMENT _HANDLING . tagNameCheck instanceof RegExp && regExpTest ( CUSTOM _ELEMENT _HANDLING . tagNameCheck , value ) || CUSTOM _ELEMENT _HANDLING . tagNameCheck instanceof Function && CUSTOM _ELEMENT _HANDLING . tagNameCheck ( value ) ) ) ; else {
return false ;
}
/* Check value is safe. First, is attr inert? If so, is safe */
} else if ( URI _SAFE _ATTRIBUTES [ lcName ] ) ; else if ( regExpTest ( IS _ALLOWED _URI$$1 , stringReplace ( value , ATTR _WHITESPACE$$1 , '' ) ) ) ; else if ( ( lcName === 'src' || lcName === 'xlink:href' || lcName === 'href' ) && lcTag !== 'script' && stringIndexOf ( value , 'data:' ) === 0 && DATA _URI _TAGS [ lcTag ] ) ; else if ( ALLOW _UNKNOWN _PROTOCOLS && ! regExpTest ( IS _SCRIPT _OR _DATA$$1 , stringReplace ( value , ATTR _WHITESPACE$$1 , '' ) ) ) ; else if ( ! value ) ; else {
return false ;
}
return true ;
} ;
/ * *
* _basicCustomElementCheck
* checks if at least one dash is included in tagName , and it ' s not the first char
* for more sophisticated checking see https : //github.com/sindresorhus/validate-element-name
* @ param { string } tagName name of the tag of the node to sanitize
* /
var _basicCustomElementTest = function _basicCustomElementTest ( tagName ) {
return tagName . indexOf ( '-' ) > 0 ;
} ;
/ * *
* _sanitizeAttributes
*
* @ protect attributes
* @ protect nodeName
* @ protect removeAttribute
* @ protect setAttribute
*
* @ param { Node } currentNode to sanitize
* /
var _sanitizeAttributes = function _sanitizeAttributes ( currentNode ) {
var attr = void 0 ;
var value = void 0 ;
var lcName = void 0 ;
var l = void 0 ;
/* Execute a hook if present */
_executeHook ( 'beforeSanitizeAttributes' , currentNode , null ) ;
var attributes = currentNode . attributes ;
/* Check if we have attributes; if not we might have a text node */
if ( ! attributes ) {
return ;
}
var hookEvent = {
attrName : '' ,
attrValue : '' ,
keepAttr : true ,
allowedAttributes : ALLOWED _ATTR
} ;
l = attributes . length ;
/* Go backwards over all attributes; safely remove bad ones */
while ( l -- ) {
attr = attributes [ l ] ;
var _attr = attr ,
name = _attr . name ,
namespaceURI = _attr . namespaceURI ;
value = stringTrim ( attr . value ) ;
lcName = transformCaseFunc ( name ) ;
/* Execute a hook if present */
hookEvent . attrName = lcName ;
hookEvent . attrValue = value ;
hookEvent . keepAttr = true ;
hookEvent . forceKeepAttr = undefined ; // Allows developers to see this is a property they can set
_executeHook ( 'uponSanitizeAttribute' , currentNode , hookEvent ) ;
value = hookEvent . attrValue ;
/* Did the hooks approve of the attribute? */
if ( hookEvent . forceKeepAttr ) {
continue ;
}
/* Remove attribute */
_removeAttribute ( name , currentNode ) ;
/* Did the hooks approve of the attribute? */
if ( ! hookEvent . keepAttr ) {
continue ;
}
/* Work around a security issue in jQuery 3.0 */
if ( regExpTest ( /\/>/i , value ) ) {
_removeAttribute ( name , currentNode ) ;
continue ;
}
/* Sanitize attribute content to be template-safe */
if ( SAFE _FOR _TEMPLATES ) {
value = stringReplace ( value , MUSTACHE _EXPR$$1 , ' ' ) ;
value = stringReplace ( value , ERB _EXPR$$1 , ' ' ) ;
}
/* Is `value` valid for this attribute? */
var lcTag = transformCaseFunc ( currentNode . nodeName ) ;
if ( ! _isValidAttribute ( lcTag , lcName , value ) ) {
continue ;
}
/* Handle invalid data-* attribute set by try-catching it */
try {
if ( namespaceURI ) {
currentNode . setAttributeNS ( namespaceURI , name , value ) ;
} else {
/* Fallback to setAttribute() for browser-unrecognized namespaces e.g. "x-schema". */
currentNode . setAttribute ( name , value ) ;
}
arrayPop ( DOMPurify . removed ) ;
} catch ( _ ) { }
}
/* Execute a hook if present */
_executeHook ( 'afterSanitizeAttributes' , currentNode , null ) ;
} ;
/ * *
* _sanitizeShadowDOM
*
* @ param { DocumentFragment } fragment to iterate over recursively
* /
var _sanitizeShadowDOM = function _sanitizeShadowDOM ( fragment ) {
var shadowNode = void 0 ;
var shadowIterator = _createIterator ( fragment ) ;
/* Execute a hook if present */
_executeHook ( 'beforeSanitizeShadowDOM' , fragment , null ) ;
while ( shadowNode = shadowIterator . nextNode ( ) ) {
/* Execute a hook if present */
_executeHook ( 'uponSanitizeShadowNode' , shadowNode , null ) ;
/* Sanitize tags and elements */
if ( _sanitizeElements ( shadowNode ) ) {
continue ;
}
/* Deep shadow DOM detected */
if ( shadowNode . content instanceof DocumentFragment ) {
_sanitizeShadowDOM ( shadowNode . content ) ;
}
/* Check attributes, sanitize if necessary */
_sanitizeAttributes ( shadowNode ) ;
}
/* Execute a hook if present */
_executeHook ( 'afterSanitizeShadowDOM' , fragment , null ) ;
} ;
/ * *
* Sanitize
* Public method providing core sanitation functionality
*
* @ param { String | Node } dirty string or DOM node
* @ param { Object } configuration object
* /
// eslint-disable-next-line complexity
DOMPurify . sanitize = function ( dirty , cfg ) {
var body = void 0 ;
var importedNode = void 0 ;
var currentNode = void 0 ;
var oldNode = void 0 ;
var returnNode = void 0 ;
/ * M a k e s u r e w e h a v e a s t r i n g t o s a n i t i z e .
DO NOT return early , as this will return the wrong type if
the user has requested a DOM object rather than a string * /
IS _EMPTY _INPUT = ! dirty ;
if ( IS _EMPTY _INPUT ) {
dirty = '<!-->' ;
}
/* Stringify, in case dirty is an object */
if ( typeof dirty !== 'string' && ! _isNode ( dirty ) ) {
// eslint-disable-next-line no-negated-condition
if ( typeof dirty . toString !== 'function' ) {
throw typeErrorCreate ( 'toString is not a function' ) ;
} else {
dirty = dirty . toString ( ) ;
if ( typeof dirty !== 'string' ) {
throw typeErrorCreate ( 'dirty is not a string, aborting' ) ;
}
}
}
/* Check we can run. Otherwise fall back or ignore */
if ( ! DOMPurify . isSupported ) {
if ( _typeof ( window . toStaticHTML ) === 'object' || typeof window . toStaticHTML === 'function' ) {
if ( typeof dirty === 'string' ) {
return window . toStaticHTML ( dirty ) ;
}
if ( _isNode ( dirty ) ) {
return window . toStaticHTML ( dirty . outerHTML ) ;
}
}
return dirty ;
}
/* Assign config vars */
if ( ! SET _CONFIG ) {
_parseConfig ( cfg ) ;
}
/* Clean up removed elements */
DOMPurify . removed = [ ] ;
/* Check if dirty is correctly typed for IN_PLACE */
if ( typeof dirty === 'string' ) {
IN _PLACE = false ;
}
if ( IN _PLACE ) {
/* Do some early pre-sanitization to avoid unsafe root nodes */
if ( dirty . nodeName ) {
var tagName = transformCaseFunc ( dirty . nodeName ) ;
if ( ! ALLOWED _TAGS [ tagName ] || FORBID _TAGS [ tagName ] ) {
throw typeErrorCreate ( 'root node is forbidden and cannot be sanitized in-place' ) ;
}
}
} else if ( dirty instanceof Node ) {
/ * I f d i r t y i s a D O M e l e m e n t , a p p e n d t o a n e m p t y d o c u m e n t t o a v o i d
elements being stripped by the parser * /
body = _initDocument ( '<!---->' ) ;
importedNode = body . ownerDocument . importNode ( dirty , true ) ;
if ( importedNode . nodeType === 1 && importedNode . nodeName === 'BODY' ) {
/* Node is already a body, use as is */
body = importedNode ;
} else if ( importedNode . nodeName === 'HTML' ) {
body = importedNode ;
} else {
// eslint-disable-next-line unicorn/prefer-dom-node-append
body . appendChild ( importedNode ) ;
}
} else {
/* Exit directly if we have nothing to do */
if ( ! RETURN _DOM && ! SAFE _FOR _TEMPLATES && ! WHOLE _DOCUMENT &&
// eslint-disable-next-line unicorn/prefer-includes
dirty . indexOf ( '<' ) === - 1 ) {
return trustedTypesPolicy && RETURN _TRUSTED _TYPE ? trustedTypesPolicy . createHTML ( dirty ) : dirty ;
}
/* Initialize the document to work on */
body = _initDocument ( dirty ) ;
/* Check we have a DOM node from the data */
if ( ! body ) {
return RETURN _DOM ? null : RETURN _TRUSTED _TYPE ? emptyHTML : '' ;
}
}
/* Remove first element node (ours) if FORCE_BODY is set */
if ( body && FORCE _BODY ) {
_forceRemove ( body . firstChild ) ;
}
/* Get node iterator */
var nodeIterator = _createIterator ( IN _PLACE ? dirty : body ) ;
/* Now start iterating over the created document */
while ( currentNode = nodeIterator . nextNode ( ) ) {
/* Fix IE's strange behavior with manipulated textNodes #89 */
if ( currentNode . nodeType === 3 && currentNode === oldNode ) {
continue ;
}
/* Sanitize tags and elements */
if ( _sanitizeElements ( currentNode ) ) {
continue ;
}
/* Shadow DOM detected, sanitize it */
if ( currentNode . content instanceof DocumentFragment ) {
_sanitizeShadowDOM ( currentNode . content ) ;
}
/* Check attributes, sanitize if necessary */
_sanitizeAttributes ( currentNode ) ;
oldNode = currentNode ;
}
oldNode = null ;
/* If we sanitized `dirty` in-place, return it. */
if ( IN _PLACE ) {
return dirty ;
}
/* Return sanitized string or DOM */
if ( RETURN _DOM ) {
if ( RETURN _DOM _FRAGMENT ) {
returnNode = createDocumentFragment . call ( body . ownerDocument ) ;
while ( body . firstChild ) {
// eslint-disable-next-line unicorn/prefer-dom-node-append
returnNode . appendChild ( body . firstChild ) ;
}
} else {
returnNode = body ;
}
if ( ALLOWED _ATTR . shadowroot ) {
/ *
AdoptNode ( ) is not used because internal state is not reset
( e . g . the past names map of a HTMLFormElement ) , this is safe
in theory but we would rather not risk another attack vector .
The state that is cloned by importNode ( ) is explicitly defined
by the specs .
* /
returnNode = importNode . call ( originalDocument , returnNode , true ) ;
}
return returnNode ;
}
var serializedHTML = WHOLE _DOCUMENT ? body . outerHTML : body . innerHTML ;
/* Serialize doctype if allowed */
if ( WHOLE _DOCUMENT && ALLOWED _TAGS [ '!doctype' ] && body . ownerDocument && body . ownerDocument . doctype && body . ownerDocument . doctype . name && regExpTest ( DOCTYPE _NAME , body . ownerDocument . doctype . name ) ) {
serializedHTML = '<!DOCTYPE ' + body . ownerDocument . doctype . name + '>\n' + serializedHTML ;
}
/* Sanitize final string template-safe */
if ( SAFE _FOR _TEMPLATES ) {
serializedHTML = stringReplace ( serializedHTML , MUSTACHE _EXPR$$1 , ' ' ) ;
serializedHTML = stringReplace ( serializedHTML , ERB _EXPR$$1 , ' ' ) ;
}
return trustedTypesPolicy && RETURN _TRUSTED _TYPE ? trustedTypesPolicy . createHTML ( serializedHTML ) : serializedHTML ;
} ;
/ * *
* Public method to set the configuration once
* setConfig
*
* @ param { Object } cfg configuration object
* /
DOMPurify . setConfig = function ( cfg ) {
_parseConfig ( cfg ) ;
SET _CONFIG = true ;
} ;
/ * *
* Public method to remove the configuration
* clearConfig
*
* /
DOMPurify . clearConfig = function ( ) {
CONFIG = null ;
SET _CONFIG = false ;
} ;
/ * *
* Public method to check if an attribute value is valid .
* Uses last set config , if any . Otherwise , uses config defaults .
* isValidAttribute
*
* @ param { string } tag Tag name of containing element .
* @ param { string } attr Attribute name .
* @ param { string } value Attribute value .
* @ return { Boolean } Returns true if ` value ` is valid . Otherwise , returns false .
* /
DOMPurify . isValidAttribute = function ( tag , attr , value ) {
/* Initialize shared config vars if necessary. */
if ( ! CONFIG ) {
_parseConfig ( { } ) ;
}
var lcTag = transformCaseFunc ( tag ) ;
var lcName = transformCaseFunc ( attr ) ;
return _isValidAttribute ( lcTag , lcName , value ) ;
} ;
/ * *
* AddHook
* Public method to add DOMPurify hooks
*
* @ param { String } entryPoint entry point for the hook to add
* @ param { Function } hookFunction function to execute
* /
DOMPurify . addHook = function ( entryPoint , hookFunction ) {
if ( typeof hookFunction !== 'function' ) {
return ;
}
hooks [ entryPoint ] = hooks [ entryPoint ] || [ ] ;
arrayPush ( hooks [ entryPoint ] , hookFunction ) ;
} ;
/ * *
* RemoveHook
* Public method to remove a DOMPurify hook at a given entryPoint
* ( pops it from the stack of hooks if more are present )
*
* @ param { String } entryPoint entry point for the hook to remove
* /
DOMPurify . removeHook = function ( entryPoint ) {
if ( hooks [ entryPoint ] ) {
arrayPop ( hooks [ entryPoint ] ) ;
}
} ;
/ * *
* RemoveHooks
* Public method to remove all DOMPurify hooks at a given entryPoint
*
* @ param { String } entryPoint entry point for the hooks to remove
* /
DOMPurify . removeHooks = function ( entryPoint ) {
if ( hooks [ entryPoint ] ) {
hooks [ entryPoint ] = [ ] ;
}
} ;
/ * *
* RemoveAllHooks
* Public method to remove all DOMPurify hooks
*
* /
DOMPurify . removeAllHooks = function ( ) {
hooks = { } ;
} ;
return DOMPurify ;
}
var purify = createDOMPurify ( ) ;
return purify ;
} ) ) ;
} ( purify ) ) ;
var browser = window . DOMPurify || ( window . DOMPurify = purify . exports . default || purify . exports ) ;
class WebsiteParser extends Parser {
constructor ( app , settings ) {
super ( app , settings ) ;
}
test ( url ) {
return this . isValidUrl ( url ) ;
}
prepareNote ( url ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const response = yield obsidian . request ( { method : 'GET' , url } ) ;
const document = new DOMParser ( ) . parseFromString ( response , 'text/html' ) ;
// Set base to allow Readability to resolve relative path's
const baseEl = document . createElement ( 'base' ) ;
baseEl . setAttribute ( 'href' , getBaseUrl ( url ) ) ;
document . head . append ( baseEl ) ;
const cleanDocumentBody = browser . sanitize ( document . body . innerHTML ) ;
document . body . innerHTML = cleanDocumentBody ;
if ( ! readability . isProbablyReaderable ( document ) ) {
new obsidian . Notice ( '@mozilla/readability considers this document to unlikely be readerable.' ) ;
}
const readableDocument = new readability . Readability ( document ) . parse ( ) ;
return ( readableDocument === null || readableDocument === void 0 ? void 0 : readableDocument . content )
? yield this . parsableArticle ( this . app , readableDocument , url )
: this . notParsableArticle ( url ) ;
} ) ;
}
parsableArticle ( app , article , url ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const title = article . title || 'No title' ;
let content = yield parseHtmlContent ( article . content ) ;
if ( this . settings . downloadImages && obsidian . Platform . isDesktop ) {
content = yield replaceImages ( app , content , this . settings . assetsDir ) ;
}
const processedContent = this . settings . parsableArticleNote
. replace ( /%articleTitle%/g , title )
. replace ( /%articleURL%/g , url )
. replace ( /%articleContent%/g , content ) ;
const fileNameTemplate = this . settings . parseableArticleNoteTitle . replace ( /%title%/g , title ) ;
const fileName = ` ${ fileNameTemplate } .md ` ;
return new Note ( fileName , processedContent ) ;
} ) ;
}
notParsableArticle ( url ) {
console . error ( 'Website not parseable' ) ;
const content = this . settings . notParsableArticleNote . replace ( '%articleURL%' , url ) ;
const fileNameTemplate = this . settings . notParsableArticleNote . replace ( /%date%/g , this . getFormattedDateForFilename ( ) ) ;
const fileName = ` ${ fileNameTemplate } .md ` ;
return new Note ( fileName , content ) ;
}
}
class TextSnippetParser extends Parser {
constructor ( app , settings ) {
super ( app , settings ) ;
}
test ( ) {
return true ;
}
prepareNote ( text ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const fileName = ` ${ this . getFormattedDateForFilename ( ) } .md ` ;
const content = this . settings . textSnippetNote . replace ( /%content%/g , text ) ;
return new Note ( fileName , content ) ;
} ) ;
}
}
class ReadItLaterSettingsTab extends obsidian . PluginSettingTab {
constructor ( app , plugin ) {
super ( app , plugin ) ;
this . plugin = plugin ;
}
display ( ) {
const { containerEl } = this ;
containerEl . empty ( ) ;
containerEl . createEl ( 'h2' , { text : 'Settings for the ReadItLater plugin.' } ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Inbox dir' )
. setDesc ( 'Enter valid folder name. For nested folders use this format: Folder A/Folder B. If no folder is enetred, new note will be created in vault root.' )
. addText ( ( text ) => text
. setPlaceholder ( 'Defaults to root' )
. setValue ( this . plugin . settings . inboxDir || DEFAULT _SETTINGS . inboxDir )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . inboxDir = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Open new note' )
. setDesc ( 'If enabled, new note will open in current workspace' )
. addToggle ( ( toggle ) => toggle
. setValue ( this . plugin . settings . openNewNote || DEFAULT _SETTINGS . openNewNote )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . openNewNote = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Download images' )
. setDesc ( 'If this is true, the used images are downloaded to the defined folder (just on Desktop)' )
. addToggle ( ( toggle ) => toggle
. setValue ( this . plugin . settings . downloadImages || DEFAULT _SETTINGS . downloadImages )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . downloadImages = value ;
assetDirSetting . setDisabled ( ! value ) ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
const assetDirSetting = new obsidian . Setting ( containerEl )
. setName ( 'Assets dir' )
. setDesc ( 'Enter valid folder name. For nested folders use this format: Folder A/Folder B. If no folder is enetred, new note will be created in vault root.' )
. addText ( ( text ) => text
. setPlaceholder ( 'Defaults to root' )
. setValue ( this . plugin . settings . assetsDir || DEFAULT _SETTINGS . inboxDir + '/assets' )
. setDisabled ( ! this . plugin . settings . downloadImages )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . assetsDir = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Youtube note template title' )
. setDesc ( 'Available variables: %title%' )
. addText ( ( text ) => text
. setPlaceholder ( 'Defaults to %title%' )
. setValue ( this . plugin . settings . youtubeNoteTitle || DEFAULT _SETTINGS . youtubeNoteTitle )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . youtubeNoteTitle = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Youtube note template' )
. setDesc ( 'Available variables: %videoTitle%, %videoURL%, %videoId%, %videoPlayer%' )
. addTextArea ( ( textarea ) => {
textarea
. setValue ( this . plugin . settings . youtubeNote || DEFAULT _SETTINGS . youtubeNote )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . youtubeNote = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ;
textarea . inputEl . rows = 10 ;
textarea . inputEl . cols = 25 ;
} ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Twitter note template title' )
. setDesc ( 'Available variables: %tweetAuthorName%, %date%' )
. addText ( ( text ) => text
. setPlaceholder ( 'Defaults to %tweetAuthorName%' )
. setValue ( this . plugin . settings . twitterNoteTitle || DEFAULT _SETTINGS . twitterNoteTitle )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . twitterNoteTitle = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Twitter note template' )
. setDesc ( 'Available variables: %tweetAuthorName%, %tweetURL%, %tweetContent%' )
. addTextArea ( ( textarea ) => {
textarea
. setValue ( this . plugin . settings . twitterNote || DEFAULT _SETTINGS . twitterNote )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . twitterNote = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ;
textarea . inputEl . rows = 10 ;
textarea . inputEl . cols = 25 ;
} ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Parsable article note template title' )
. setDesc ( 'Available variables: %title%' )
. addText ( ( text ) => text
. setPlaceholder ( 'Defaults to %title%' )
. setValue ( this . plugin . settings . parseableArticleNoteTitle || DEFAULT _SETTINGS . parseableArticleNoteTitle )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . parseableArticleNoteTitle = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Parsable article note template' )
. setDesc ( 'Available variables: %articleTitle%, %articleURL%, %articleContent%' )
. addTextArea ( ( textarea ) => {
textarea
. setValue ( this . plugin . settings . parsableArticleNote || DEFAULT _SETTINGS . parsableArticleNote )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . parsableArticleNote = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ;
textarea . inputEl . rows = 10 ;
textarea . inputEl . cols = 25 ;
} ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Not paresable article note template title' )
. setDesc ( 'Available variables: %date%' )
. addText ( ( text ) => text
. setPlaceholder ( ` Defaults to 'Article %date%' ` )
. setValue ( this . plugin . settings . notParseableArticleNoteTitle ||
DEFAULT _SETTINGS . notParseableArticleNoteTitle )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . notParseableArticleNoteTitle = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Not parseable article note template' )
. setDesc ( 'Available variables: %articleURL%' )
. addTextArea ( ( textarea ) => {
textarea
. setValue ( this . plugin . settings . notParsableArticleNote || DEFAULT _SETTINGS . notParsableArticleNote )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . notParsableArticleNote = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ;
textarea . inputEl . rows = 10 ;
textarea . inputEl . cols = 25 ;
} ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Text snippet note template title' )
. setDesc ( 'Available variables: %date%' )
. addText ( ( text ) => text
. setPlaceholder ( ` Defaults to 'Notice %date%' ` )
. setValue ( this . plugin . settings . textSnippetNoteTitle || DEFAULT _SETTINGS . textSnippetNoteTitle )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . textSnippetNoteTitle = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ) ;
new obsidian . Setting ( containerEl )
. setName ( 'Text snippet note template' )
. setDesc ( 'Available variables: %content%' )
. addTextArea ( ( textarea ) => {
textarea
. setValue ( this . plugin . settings . textSnippetNote || DEFAULT _SETTINGS . textSnippetNote )
. onChange ( ( value ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . plugin . settings . textSnippetNote = value ;
yield this . plugin . saveSettings ( ) ;
} ) ) ;
textarea . inputEl . rows = 10 ;
textarea . inputEl . cols = 25 ;
} ) ;
}
}
class ReadItLaterPlugin extends obsidian . Plugin {
onload ( ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
yield this . loadSettings ( ) ;
this . parsers = [
new YoutubeParser ( this . app , this . settings ) ,
new TwitterParser ( this . app , this . settings ) ,
new WebsiteParser ( this . app , this . settings ) ,
new TextSnippetParser ( this . app , this . settings ) ,
] ;
obsidian . addIcon ( 'read-it-later' , clipboardIcon ) ;
this . addRibbonIcon ( 'read-it-later' , 'ReadItLater: Save clipboard' , ( ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
yield this . processClipboard ( ) ;
} ) ) ;
this . addCommand ( {
id : 'save-clipboard-to-notice' ,
name : 'Save clipboard' ,
callback : ( ) => _ _awaiter ( this , void 0 , void 0 , function * ( ) {
yield this . processClipboard ( ) ;
} ) ,
} ) ;
this . addSettingTab ( new ReadItLaterSettingsTab ( this . app , this ) ) ;
} ) ;
}
loadSettings ( ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
this . settings = Object . assign ( { } , DEFAULT _SETTINGS , yield this . loadData ( ) ) ;
} ) ;
}
saveSettings ( ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
yield this . saveData ( this . settings ) ;
} ) ;
}
processClipboard ( ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
const clipboardContent = yield navigator . clipboard . readText ( ) ;
for ( const parser of this . parsers ) {
if ( parser . test ( clipboardContent ) ) {
const note = yield parser . prepareNote ( clipboardContent ) ;
yield this . writeFile ( note . fileName , note . content ) ;
break ;
}
}
} ) ;
}
writeFile ( fileName , content ) {
return _ _awaiter ( this , void 0 , void 0 , function * ( ) {
let filePath ;
fileName = normalizeFilename ( fileName ) ;
yield checkAndCreateFolder ( this . app . vault , this . settings . inboxDir ) ;
if ( this . settings . inboxDir ) {
filePath = obsidian . normalizePath ( ` ${ this . settings . inboxDir } / ${ fileName } ` ) ;
}
else {
filePath = obsidian . normalizePath ( ` / ${ fileName } ` ) ;
}
if ( yield this . app . vault . adapter . exists ( filePath ) ) {
new obsidian . Notice ( ` ${ fileName } already exists! ` ) ;
}
else {
const newFile = yield this . app . vault . create ( filePath , content ) ;
if ( this . settings . openNewNote ) {
this . app . workspace . getLeaf ( false ) . openFile ( newFile ) ;
}
new obsidian . Notice ( ` ${ fileName } created successful ` ) ;
}
} ) ;
}
}
const clipboardIcon = `
< svg fill = "currentColor" stroke = "currentColor" version = "1.1" viewBox = "0 0 512 512" xmlns = "http://www.w3.org/2000/svg" >
< g >
< path d = "m365.9,144.9c-12.3,0-24.2,1.8-35.4,5.2v-114.7h-96.9l7.3-35.4h-150.2l6.8,35.4h-97.5v454.6h330.5v-102.1c11.2,3.4 23.1,5.2 35.4,5.2 68.8-0.1 124.1-56.4 124.1-124.1 0-67.8-55.3-124.1-124.1-124.1zm-150.1-124l-10.4,50h-79.2l-9.4-50h99zm93.8,448.2h-288.7v-412.8h80.7l6.8,35.4h113.6l7.3-35.4h80.3v102.2c-27.3,14-48.8,37.9-59.7,66.7h-200.9v20.8h195c-1.4,7.4-2.2,15.1-2.2,22.9 0,13.4 2.2,26.4 6.2,38.6h-199v20.9h208.1c12,21.8 30.3,39.7 52.5,51.1v89.6zm56.3-98c-57.3,0-103.2-46.9-103.2-103.2s46.9-103.2 103.2-103.2c57.3,0 103.2,46.9 103.2,103.2s-45.8,103.2-103.2,103.2z" / >
< polygon points = "426.4,223.1 346.1,303.4 313.8,271.1 299.2,285.7 346.1,332.6 441,237.7 " / >
< rect width = "233.5" x = "49" y = "143.9" height = "20.9" / >
< rect width = "233.5" x = "49" y = "388.9" height = "20.9" / >
< / g >
< / s v g > ` ;
module . exports = ReadItLaterPlugin ;
//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibWFpbi5qcyIsInNvdXJjZXMiOlsibm9kZV9tb2R1bGVzL3RzbGliL3RzbGliLmVzNi5qcyIsInNyYy9oZWxwZXJzL2Rvd25sb2FkSW1hZ2UudHMiLCJzcmMvaGVscGVycy9jaGVja0FuZENyZWF0ZUZvbGRlci50cyIsInNyYy9oZWxwZXJzL2ZpbGV1dGlscy50cyIsIm5vZGVfbW9kdWxlcy9zcGFyay1tZDUvc3BhcmstbWQ1LmpzIiwic3JjL2hlbHBlcnMvbGlua0hhc2gudHMiLCJzcmMvaGVscGVycy9yZXBsYWNlSW1hZ2VzLnRzIiwic3JjL3NldHRpbmdzLnRzIiwic3JjL3BhcnNlcnMvTm90ZS50cyIsInNyYy9wYXJzZXJzL1BhcnNlci50cyIsInNyYy9wYXJzZXJzL1lvdXR1YmVQYXJzZXIudHMiLCJub2RlX21vZHVsZXMvdHVybmRvd24vbGliL3R1cm5kb3duLmJyb3dzZXIuZXMuanMiLCJub2RlX21vZHVsZXMvQGd1eXBsdXNwbHVzL3R1cm5kb3duLXBsdWdpbi1nZm0vbGliL3R1cm5kb3duLXBsdWdpbi1nZm0uY2pzLmpzIiwic3JjL3BhcnNlcnMvcGFyc2VodG1sLnRzIiwic3JjL3BhcnNlcnMvVHdpdHRlclBhcnNlci50cyIsIm5vZGVfbW9kdWxlcy9AbW96aWxsYS9yZWFkYWJpbGl0eS9SZWFkYWJpbGl0eS5qcyIsIm5vZGVfbW9kdWxlcy9AbW96aWxsYS9yZWFkYWJpbGl0eS9SZWFkYWJpbGl0eS1yZWFkZXJhYmxlLmpzIiwibm9kZV9tb2R1bGVzL0Btb3ppbGxhL3JlYWRhYmlsaXR5L2luZGV4LmpzIiwibm9kZV9tb2R1bGVzL2RvbXB1cmlmeS9kaXN0L3B1cmlmeS5qcyIsIm5vZGVfbW9kdWxlcy9pc29tb3JwaGljLWRvbXB1cmlmeS9icm93c2VyLmpzIiwic3JjL3BhcnNlcnMvV2Vic2l0ZVBhcnNlci50cyIsInNyYy9wYXJzZXJzL1RleHRTbmlwcGV0UGFyc2VyLnRzIiwic3JjL3ZpZXdzL3NldHRpbmdzLXRhYi50cyIsInNyYy9tYWluLnRzIl0sInNvdXJjZXNDb250ZW50IjpudWxsLCJuYW1lcyI6WyJub3JtYWxpemVQYXRoIiwiVEZvbGRlciIsInBhdGgiLCJ1bmRlZmluZWQiLCJTcGFya01ENSIsImJhc2VuYW1lIiwibW9tZW50IiwicmVxdWVzdCIsInJ1bGVzIiwidHVybmRvd25QbHVnaW5HZm0uZ2ZtIiwicmVxdWlyZSQkMCIsInJlcXVpcmUkJDEiLCJ0aGlzIiwiRE9NUHVyaWZ5LnNhbml0aXplIiwiaXNQcm9iYWJseVJlYWRlcmFibGUiLCJOb3RpY2UiLCJSZWFkYWJpbGl0eSIsIlBsYXRmb3JtIiwiUGx1Z2luU2V0dGluZ1RhYiIsIlNldHRpbmciLCJQbHVnaW4iLCJhZGRJY29uIl0sIm1hcHBpbmdzIjoiOzs7Ozs7Ozs7QUFBQTtBQUNBO0FBQ0E7QUFDQTtBQUNBO0FBQ0E7QUFDQTtBQUNBO0FBQ0E7QUFDQTtBQUNBO0FBQ0E7QUFDQTtBQUNBO0FBdURBO0FBQ08sU0FBUyxTQUFTLENBQUMsT0FBTyxFQUFFLFVBQVUsRUFBRSxDQUFDLEVBQUUsU0FBUyxFQUFFO0FBQzdELElBQUksU0FBUyxLQUFLLENBQUMsS0FBSyxFQUFFLEVBQUUsT0FBTyxLQUFLLFlBQVksQ0FBQyxHQUFHLEtBQUssR0FBRyxJQUFJLENBQUMsQ0FBQyxVQUFVLE9BQU8sRUFBRSxFQUFFLE9BQU8sQ0FBQyxLQUFLLENBQUMsQ0FBQyxFQUFFLENBQUMsQ0FBQyxFQUFFO0FBQ2hILElBQUksT0FBTyxLQUFLLENBQUMsS0FBSyxDQUFDLEdBQUcsT0FBTyxDQUFDLEVBQUUsVUFBVSxPQUFPLEVBQUUsTUFBTSxFQUFFO0FBQy9ELFFBQVEsU0FBUyxTQUFTLENBQUMsS0FBSyxFQUFFLEVBQUUsSUFBSSxFQUFFLElBQUksQ0FBQyxTQUFTLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxDQUFDLENBQUMsRUFBRSxDQUFDLE9BQU8sQ0FBQyxFQUFFLEVBQUUsTUFBTSxDQUFDLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRTtBQUNuRyxRQUFRLFNBQVMsUUFBUSxDQUFDLEtBQUssRUFBRSxFQUFFLElBQUksRUFBRSxJQUFJLENBQUMsU0FBUyxDQUFDLE9BQU8sQ0FBQyxDQUFDLEtBQUssQ0FBQyxDQUFDLENBQUMsRUFBRSxDQUFDLE9BQU8sQ0FBQyxFQUFFLEVBQUUsTUFBTSxDQUFDLENBQUMsQ0FBQyxDQUFDLEVBQUUsRUFBRTtBQUN0RyxRQUFRLFNBQVMsSUFBSSxDQUFDLE1BQU0sRUFBRSxFQUFFLE1BQU0sQ0FBQyxJQUFJLEdBQUcsT0FBTyxDQUFDLE1BQU0sQ0FBQyxLQUFLLENBQUMsR0FBRyxLQUFLLENBQUMsTUFBTSxDQUFDLEtBQUssQ0FBQyxDQUFDLElBQUksQ0FBQyxTQUFTLEVBQUUsUUFBUSxDQUFDLENBQUMsRUFBRTtBQUN0SCxRQUFRLElBQUksQ0FBQyxDQUFDLFNBQVMsR0FBRyxTQUFTLENBQUMsS0FBSyxDQUFDLE9BQU8sRUFBRSxVQUFVLElBQUksRUFBRSxDQUFDLEVBQUUsSUFBSSxFQUFFLENBQUMsQ0FBQztBQUM5RSxLQUFLLENBQUMsQ0FBQztBQUNQOztBQzdFTSxTQUFnQixhQUFhLENBQUMsR0FBVyxFQUFBOztBQUMzQyxRQUFBLE1BQU0sR0FBRyxHQUFHLE1BQU0sS0FBSyxDQUFDLEdBQUcsQ0FBQyxDQUFDO1FBRTdCLE9BQU87QUFDSCxZQUFBLFdBQVcsRUFBRSxNQUFNLEdBQUcsQ0FBQyxXQUFXLEVBQUU7WUFDcEMsYUFBYSxFQUFFLEdBQUcsQ0FBQyxLQUFLLENBQUMsR0FBRyxDQUFDLFdBQVcsQ0FBQyxHQUFHLENBQUMsQ0FBQztTQUNqRCxDQUFDO0tBQ0wsQ0FBQSxDQUFBO0FBQUE7O0FDTEQ7Ozs7QUFJRztBQUNtQixTQUFBLG9CQUFvQixDQUFDLEtBQVksRUFBRSxVQUFrQixFQUFBOztBQUN2RSxRQUFBLFVBQVUsR0FBR0Esc0JBQWEsQ0FBQyxVQUFVLENBQUMsQ0FBQztRQUN2QyxNQUFNLE1BQU0sR0FBRyxLQUFLLENBQUMscUJBQXFCLENBQUMsVUFBVSxDQUFDLENBQUM7QUFDdkQsUUFBQSxJQUFJLE1BQU0sSUFBSSxNQUFNLFlBQVlDLGdCQUFPLEVBQUU7WUFDckMsT0FBTztBQUNWLFNBQUE7QUFDRCxRQUFBLE1BQU0sS0FBSyxDQUFDLFlBQVksQ0FBQyxVQUFVLENBQUMsQ0FBQztLQUN4QyxDQUFBLENBQUE7QUFBQTs7QUNYSyxTQUFVLFVBQVUsQ0FBQyxHQUFXLEVBQUE7SUFDbEMsSUFBSTtBQUNBLFFBQUEsSUFBSSxHQUFHLENBQUMsR0FBRyxDQUFDLENBQUM7QUFDaEIsS0FBQTtBQUFDLElBQUEsT0FBTyxDQUFDLEVBQUU7QUFDUixRQUFBLE9BQU8sS0FBSyxDQUFDO0FBQ2hCLEtBQUE7QUFDRCxJQUFBLE9BQU8sSUFBSSxDQUFDO0FBQ2hCLENBQUM7QUFFZSxTQUFBLFVBQVUsQ0FBQyxHQUFZLEVBQUU