MyRepo-Ums/node_modules/parse5-sax-parser/dist/index.js

192 lines
6.0 KiB
JavaScript
Raw Normal View History

2024-01-19 10:09:11 +00:00
import { Transform } from 'node:stream';
import { DevNullStream } from './dev-null-stream.js';
import { ParserFeedbackSimulator } from './parser-feedback-simulator.js';
/**
* Streaming [SAX](https://en.wikipedia.org/wiki/Simple_API_for_XML)-style HTML parser.
* A [transform stream](https://nodejs.org/api/stream.html#stream_class_stream_transform) (which means you can pipe _through_ it, see example).
*
* @example
*
* ```js
* const SAXParser = require('parse5-sax-parser');
* const http = require('http');
* const fs = require('fs');
*
* const file = fs.createWriteStream('/home/google.com.html');
* const parser = new SAXParser();
*
* parser.on('text', text => {
* // Handle page text content
* ...
* });
*
* http.get('http://google.com', res => {
* // `SAXParser` is the `Transform` stream, which means you can pipe
* // through it. So, you can analyze the page content and, e.g., save it
* // to the file at the same time:
* res.pipe(parser).pipe(file);
* });
* ```
*/
export class SAXParser extends Transform {
/**
* @param options Parsing options.
*/
constructor(options = {}) {
super({ encoding: 'utf8', decodeStrings: false });
this.pendingText = null;
this.lastChunkWritten = false;
this.stopped = false;
this.options = {
sourceCodeLocationInfo: false,
...options,
};
this.parserFeedbackSimulator = new ParserFeedbackSimulator(this.options, this);
this.tokenizer = this.parserFeedbackSimulator.tokenizer;
// NOTE: always pipe the stream to the /dev/null stream to avoid
// the `highWaterMark` to be hit even if we don't have consumers.
// (see: https://github.com/inikulin/parse5/issues/97#issuecomment-171940774)
this.pipe(new DevNullStream());
}
//`Transform` implementation
_transform(chunk, _encoding, callback) {
if (typeof chunk !== 'string') {
throw new TypeError('Parser can work only with string streams.');
}
callback(null, this._transformChunk(chunk));
}
_final(callback) {
this.lastChunkWritten = true;
callback(null, this._transformChunk(''));
}
/**
* Stops parsing. Useful if you want the parser to stop consuming CPU time
* once you've obtained the desired info from the input stream. Doesn't
* prevent piping, so that data will flow through the parser as usual.
*
* @example
*
* ```js
* const SAXParser = require('parse5-sax-parser');
* const http = require('http');
* const fs = require('fs');
*
* const file = fs.createWriteStream('google.com.html');
* const parser = new SAXParser();
*
* parser.on('doctype', ({ name, publicId, systemId }) => {
* // Process doctype info and stop parsing
* ...
* parser.stop();
* });
*
* http.get('http://google.com', res => {
* // Despite the fact that parser.stop() was called whole
* // content of the page will be written to the file
* res.pipe(parser).pipe(file);
* });
* ```
*/
stop() {
this.stopped = true;
this.tokenizer.pause();
}
//Internals
_transformChunk(chunk) {
if (!this.stopped) {
this.tokenizer.write(chunk, this.lastChunkWritten);
}
return chunk;
}
/** @internal */
onCharacter({ chars, location }) {
if (this.pendingText === null) {
this.pendingText = { text: chars, sourceCodeLocation: location };
}
else {
this.pendingText.text += chars;
if (location && this.pendingText.sourceCodeLocation) {
const { endLine, endCol, endOffset } = location;
this.pendingText.sourceCodeLocation = {
...this.pendingText.sourceCodeLocation,
endLine,
endCol,
endOffset,
};
}
}
if (this.tokenizer.preprocessor.willDropParsedChunk()) {
this._emitPendingText();
}
}
/** @internal */
onWhitespaceCharacter(token) {
this.onCharacter(token);
}
/** @internal */
onNullCharacter(token) {
this.onCharacter(token);
}
/** @internal */
onEof() {
this._emitPendingText();
this.stopped = true;
}
/** @internal */
onStartTag(token) {
this._emitPendingText();
const startTag = {
tagName: token.tagName,
attrs: token.attrs,
selfClosing: token.selfClosing,
sourceCodeLocation: token.location,
};
this.emitIfListenerExists('startTag', startTag);
}
/** @internal */
onEndTag(token) {
this._emitPendingText();
const endTag = {
tagName: token.tagName,
sourceCodeLocation: token.location,
};
this.emitIfListenerExists('endTag', endTag);
}
/** @internal */
onDoctype(token) {
this._emitPendingText();
const doctype = {
name: token.name,
publicId: token.publicId,
systemId: token.systemId,
sourceCodeLocation: token.location,
};
this.emitIfListenerExists('doctype', doctype);
}
/** @internal */
onComment(token) {
this._emitPendingText();
const comment = {
text: token.data,
sourceCodeLocation: token.location,
};
this.emitIfListenerExists('comment', comment);
}
emitIfListenerExists(eventName, token) {
if (this.listenerCount(eventName) === 0) {
return false;
}
this._emitToken(eventName, token);
return true;
}
_emitToken(eventName, token) {
this.emit(eventName, token);
}
_emitPendingText() {
if (this.pendingText !== null) {
this.emitIfListenerExists('text', this.pendingText);
this.pendingText = null;
}
}
}
//# sourceMappingURL=index.js.map