Every new change

This commit is contained in:
Luca Schwan
2020-03-25 11:05:34 +01:00
parent ff8491e75f
commit b4d041c5de
7164 changed files with 499221 additions and 135 deletions

5
node_modules/chardet/.travis.yml generated vendored Normal file
View File

@ -0,0 +1,5 @@
language: node_js
node_js:
- "6"
- "8"
- "10"

19
node_modules/chardet/LICENSE generated vendored Normal file
View File

@ -0,0 +1,19 @@
Copyright (C) 2018 Dmitry Shirokov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

81
node_modules/chardet/README.md generated vendored Normal file
View File

@ -0,0 +1,81 @@
chardet [![Build Status](https://travis-ci.org/runk/node-chardet.png)](https://travis-ci.org/runk/node-chardet)
=====
Chardet is a character detection module for NodeJS written in pure Javascript.
Module is based on ICU project http://site.icu-project.org/, which uses character
occurency analysis to determine the most probable encoding.
## Installation
```
npm i chardet
```
## Usage
To return the encoding with the highest confidence:
```javascript
var chardet = require('chardet');
chardet.detect(Buffer.alloc('hello there!'));
// or
chardet.detectFile('/path/to/file', function(err, encoding) {});
// or
chardet.detectFileSync('/path/to/file');
```
To return the full list of possible encodings:
```javascript
var chardet = require('chardet');
chardet.detectAll(Buffer.alloc('hello there!'));
// or
chardet.detectFileAll('/path/to/file', function(err, encoding) {});
// or
chardet.detectFileAllSync('/path/to/file');
//Returned value is an array of objects sorted by confidence value in decending order
//e.g. [{ confidence: 90, name: 'UTF-8'}, {confidence: 20, name: 'windows-1252', lang: 'fr'}]
```
## Working with large data sets
Sometimes, when data set is huge and you want to optimize performace (in tradeoff of less accuracy),
you can sample only first N bytes of the buffer:
```javascript
chardet.detectFile('/path/to/file', { sampleSize: 32 }, function(err, encoding) {});
```
## Supported Encodings:
* UTF-8
* UTF-16 LE
* UTF-16 BE
* UTF-32 LE
* UTF-32 BE
* ISO-2022-JP
* ISO-2022-KR
* ISO-2022-CN
* Shift-JIS
* Big5
* EUC-JP
* EUC-KR
* GB18030
* ISO-8859-1
* ISO-8859-2
* ISO-8859-5
* ISO-8859-6
* ISO-8859-7
* ISO-8859-8
* ISO-8859-9
* windows-1250
* windows-1251
* windows-1252
* windows-1253
* windows-1254
* windows-1255
* windows-1256
* KOI8-R
Currently only these encodings are supported, more will be added soon.

141
node_modules/chardet/encoding/iso2022.js generated vendored Normal file
View File

@ -0,0 +1,141 @@
var util = require('util'),
Match = require ('../match');
/**
* This is a superclass for the individual detectors for
* each of the detectable members of the ISO 2022 family
* of encodings.
*/
function ISO_2022() {}
ISO_2022.prototype.match = function(det) {
/**
* Matching function shared among the 2022 detectors JP, CN and KR
* Counts up the number of legal an unrecognized escape sequences in
* the sample of text, and computes a score based on the total number &
* the proportion that fit the encoding.
*
*
* @param text the byte buffer containing text to analyse
* @param textLen the size of the text in the byte.
* @param escapeSequences the byte escape sequences to test for.
* @return match quality, in the range of 0-100.
*/
var i, j;
var escN;
var hits = 0;
var misses = 0;
var shifts = 0;
var quality;
// TODO: refactor me
var text = det.fInputBytes;
var textLen = det.fInputLen;
scanInput:
for (i = 0; i < textLen; i++) {
if (text[i] == 0x1b) {
checkEscapes:
for (escN = 0; escN < this.escapeSequences.length; escN++) {
var seq = this.escapeSequences[escN];
if ((textLen - i) < seq.length)
continue checkEscapes;
for (j = 1; j < seq.length; j++)
if (seq[j] != text[i + j])
continue checkEscapes;
hits++;
i += seq.length - 1;
continue scanInput;
}
misses++;
}
// Shift in/out
if (text[i] == 0x0e || text[i] == 0x0f)
shifts++;
}
if (hits == 0)
return null;
//
// Initial quality is based on relative proportion of recongized vs.
// unrecognized escape sequences.
// All good: quality = 100;
// half or less good: quality = 0;
// linear inbetween.
quality = (100 * hits - 100 * misses) / (hits + misses);
// Back off quality if there were too few escape sequences seen.
// Include shifts in this computation, so that KR does not get penalized
// for having only a single Escape sequence, but many shifts.
if (hits + shifts < 5)
quality -= (5 - (hits + shifts)) * 10;
return quality <= 0 ? null : new Match(det, this, quality);
};
module.exports.ISO_2022_JP = function() {
this.name = function() {
return 'ISO-2022-JP';
};
this.escapeSequences = [
[ 0x1b, 0x24, 0x28, 0x43 ], // KS X 1001:1992
[ 0x1b, 0x24, 0x28, 0x44 ], // JIS X 212-1990
[ 0x1b, 0x24, 0x40 ], // JIS C 6226-1978
[ 0x1b, 0x24, 0x41 ], // GB 2312-80
[ 0x1b, 0x24, 0x42 ], // JIS X 208-1983
[ 0x1b, 0x26, 0x40 ], // JIS X 208 1990, 1997
[ 0x1b, 0x28, 0x42 ], // ASCII
[ 0x1b, 0x28, 0x48 ], // JIS-Roman
[ 0x1b, 0x28, 0x49 ], // Half-width katakana
[ 0x1b, 0x28, 0x4a ], // JIS-Roman
[ 0x1b, 0x2e, 0x41 ], // ISO 8859-1
[ 0x1b, 0x2e, 0x46 ] // ISO 8859-7
];
};
util.inherits(module.exports.ISO_2022_JP, ISO_2022);
module.exports.ISO_2022_KR = function() {
this.name = function() {
return 'ISO-2022-KR';
};
this.escapeSequences = [
[ 0x1b, 0x24, 0x29, 0x43 ]
];
};
util.inherits(module.exports.ISO_2022_KR, ISO_2022);
module.exports.ISO_2022_CN = function() {
this.name = function() {
return 'ISO-2022-CN';
};
this.escapeSequences = [
[ 0x1b, 0x24, 0x29, 0x41 ], // GB 2312-80
[ 0x1b, 0x24, 0x29, 0x47 ], // CNS 11643-1992 Plane 1
[ 0x1b, 0x24, 0x2A, 0x48 ], // CNS 11643-1992 Plane 2
[ 0x1b, 0x24, 0x29, 0x45 ], // ISO-IR-165
[ 0x1b, 0x24, 0x2B, 0x49 ], // CNS 11643-1992 Plane 3
[ 0x1b, 0x24, 0x2B, 0x4A ], // CNS 11643-1992 Plane 4
[ 0x1b, 0x24, 0x2B, 0x4B ], // CNS 11643-1992 Plane 5
[ 0x1b, 0x24, 0x2B, 0x4C ], // CNS 11643-1992 Plane 6
[ 0x1b, 0x24, 0x2B, 0x4D ], // CNS 11643-1992 Plane 7
[ 0x1b, 0x4e ], // SS2
[ 0x1b, 0x4f ] // SS3
];
};
util.inherits(module.exports.ISO_2022_CN, ISO_2022);

502
node_modules/chardet/encoding/mbcs.js generated vendored Normal file
View File

@ -0,0 +1,502 @@
var util = require('util'),
Match = require ('../match');
/**
* Binary search implementation (recursive)
*/
function binarySearch(arr, searchValue) {
function find(arr, searchValue, left, right) {
if (right < left)
return -1;
/*
int mid = mid = (left + right) / 2;
There is a bug in the above line;
Joshua Bloch suggests the following replacement:
*/
var mid = Math.floor((left + right) >>> 1);
if (searchValue > arr[mid])
return find(arr, searchValue, mid + 1, right);
if (searchValue < arr[mid])
return find(arr, searchValue, left, mid - 1);
return mid;
};
return find(arr, searchValue, 0, arr.length - 1);
};
// 'Character' iterated character class.
// Recognizers for specific mbcs encodings make their 'characters' available
// by providing a nextChar() function that fills in an instance of iteratedChar
// with the next char from the input.
// The returned characters are not converted to Unicode, but remain as the raw
// bytes (concatenated into an int) from the codepage data.
//
// For Asian charsets, use the raw input rather than the input that has been
// stripped of markup. Detection only considers multi-byte chars, effectively
// stripping markup anyway, and double byte chars do occur in markup too.
//
function IteratedChar() {
this.charValue = 0; // 1-4 bytes from the raw input data
this.index = 0;
this.nextIndex = 0;
this.error = false;
this.done = false;
this.reset = function() {
this.charValue = 0;
this.index = -1;
this.nextIndex = 0;
this.error = false;
this.done = false;
};
this.nextByte = function(det) {
if (this.nextIndex >= det.fRawLength) {
this.done = true;
return -1;
}
var byteValue = det.fRawInput[this.nextIndex++] & 0x00ff;
return byteValue;
};
};
/**
* Asian double or multi-byte - charsets.
* Match is determined mostly by the input data adhering to the
* encoding scheme for the charset, and, optionally,
* frequency-of-occurence of characters.
*/
function mbcs() {};
/**
* Test the match of this charset with the input text data
* which is obtained via the CharsetDetector object.
*
* @param det The CharsetDetector, which contains the input text
* to be checked for being in this charset.
* @return Two values packed into one int (Damn java, anyhow)
* bits 0-7: the match confidence, ranging from 0-100
* bits 8-15: The match reason, an enum-like value.
*/
mbcs.prototype.match = function(det) {
var singleByteCharCount = 0, //TODO Do we really need this?
doubleByteCharCount = 0,
commonCharCount = 0,
badCharCount = 0,
totalCharCount = 0,
confidence = 0;
var iter = new IteratedChar();
detectBlock: {
for (iter.reset(); this.nextChar(iter, det);) {
totalCharCount++;
if (iter.error) {
badCharCount++;
} else {
var cv = iter.charValue & 0xFFFFFFFF;
if (cv <= 0xff) {
singleByteCharCount++;
} else {
doubleByteCharCount++;
if (this.commonChars != null) {
// NOTE: This assumes that there are no 4-byte common chars.
if (binarySearch(this.commonChars, cv) >= 0) {
commonCharCount++;
}
}
}
}
if (badCharCount >= 2 && badCharCount * 5 >= doubleByteCharCount) {
// console.log('its here!')
// Bail out early if the byte data is not matching the encoding scheme.
break detectBlock;
}
}
if (doubleByteCharCount <= 10 && badCharCount== 0) {
// Not many multi-byte chars.
if (doubleByteCharCount == 0 && totalCharCount < 10) {
// There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes.
// We don't have enough data to have any confidence.
// Statistical analysis of single byte non-ASCII charcters would probably help here.
confidence = 0;
}
else {
// ASCII or ISO file? It's probably not our encoding,
// but is not incompatible with our encoding, so don't give it a zero.
confidence = 10;
}
break detectBlock;
}
//
// No match if there are too many characters that don't fit the encoding scheme.
// (should we have zero tolerance for these?)
//
if (doubleByteCharCount < 20 * badCharCount) {
confidence = 0;
break detectBlock;
}
if (this.commonChars == null) {
// We have no statistics on frequently occuring characters.
// Assess confidence purely on having a reasonable number of
// multi-byte characters (the more the better
confidence = 30 + doubleByteCharCount - 20 * badCharCount;
if (confidence > 100) {
confidence = 100;
}
} else {
//
// Frequency of occurence statistics exist.
//
var maxVal = Math.log(parseFloat(doubleByteCharCount) / 4);
var scaleFactor = 90.0 / maxVal;
confidence = Math.floor(Math.log(commonCharCount + 1) * scaleFactor + 10);
confidence = Math.min(confidence, 100);
}
} // end of detectBlock:
return confidence == 0 ? null : new Match(det, this, confidence);
};
/**
* Get the next character (however many bytes it is) from the input data
* Subclasses for specific charset encodings must implement this function
* to get characters according to the rules of their encoding scheme.
*
* This function is not a method of class iteratedChar only because
* that would require a lot of extra derived classes, which is awkward.
* @param it The iteratedChar 'struct' into which the returned char is placed.
* @param det The charset detector, which is needed to get at the input byte data
* being iterated over.
* @return True if a character was returned, false at end of input.
*/
mbcs.prototype.nextChar = function(iter, det) {};
/**
* Shift-JIS charset recognizer.
*/
module.exports.sjis = function() {
this.name = function() {
return 'Shift-JIS';
};
this.language = function() {
return 'ja';
};
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
this.commonChars = [
0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa
];
this.nextChar = function(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
var firstByte;
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0)
return false;
if (firstByte <= 0x7f || (firstByte > 0xa0 && firstByte <= 0xdf))
return true;
var secondByte = iter.nextByte(det);
if (secondByte < 0)
return false;
iter.charValue = (firstByte << 8) | secondByte;
if (! ((secondByte >= 0x40 && secondByte <= 0x7f) || (secondByte >= 0x80 && secondByte <= 0xff))) {
// Illegal second byte value.
iter.error = true;
}
return true;
};
};
util.inherits(module.exports.sjis, mbcs);
/**
* Big5 charset recognizer.
*/
module.exports.big5 = function() {
this.name = function() {
return 'Big5';
};
this.language = function() {
return 'zh';
};
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
this.commonChars = [
0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f
];
this.nextChar = function(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
var firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0)
return false;
// single byte character.
if (firstByte <= 0x7f || firstByte == 0xff)
return true;
var secondByte = iter.nextByte(det);
if (secondByte < 0)
return false;
iter.charValue = (iter.charValue << 8) | secondByte;
if (secondByte < 0x40 || secondByte == 0x7f || secondByte == 0xff)
iter.error = true;
return true;
};
};
util.inherits(module.exports.big5, mbcs);
/**
* EUC charset recognizers. One abstract class that provides the common function
* for getting the next character according to the EUC encoding scheme,
* and nested derived classes for EUC_KR, EUC_JP, EUC_CN.
*
* Get the next character value for EUC based encodings.
* Character 'value' is simply the raw bytes that make up the character
* packed into an int.
*/
function eucNextChar(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
var firstByte = 0;
var secondByte = 0;
var thirdByte = 0;
//int fourthByte = 0;
buildChar: {
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) {
// Ran off the end of the input data
iter.done = true;
break buildChar;
}
if (firstByte <= 0x8d) {
// single byte char
break buildChar;
}
secondByte = iter.nextByte(det);
iter.charValue = (iter.charValue << 8) | secondByte;
if (firstByte >= 0xA1 && firstByte <= 0xfe) {
// Two byte Char
if (secondByte < 0xa1) {
iter.error = true;
}
break buildChar;
}
if (firstByte == 0x8e) {
// Code Set 2.
// In EUC-JP, total char size is 2 bytes, only one byte of actual char value.
// In EUC-TW, total char size is 4 bytes, three bytes contribute to char value.
// We don't know which we've got.
// Treat it like EUC-JP. If the data really was EUC-TW, the following two
// bytes will look like a well formed 2 byte char.
if (secondByte < 0xa1) {
iter.error = true;
}
break buildChar;
}
if (firstByte == 0x8f) {
// Code set 3.
// Three byte total char size, two bytes of actual char value.
thirdByte = iter.nextByte(det);
iter.charValue = (iter.charValue << 8) | thirdByte;
if (thirdByte < 0xa1) {
iter.error = true;
}
}
}
return iter.done == false;
};
/**
* The charset recognize for EUC-JP. A singleton instance of this class
* is created and kept by the public CharsetDetector class
*/
module.exports.euc_jp = function() {
this.name = function() {
return 'EUC-JP';
};
this.language = function() {
return 'ja';
};
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
this.commonChars = [
0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1
];
this.nextChar = eucNextChar;
};
util.inherits(module.exports.euc_jp, mbcs);
/**
* The charset recognize for EUC-KR. A singleton instance of this class
* is created and kept by the public CharsetDetector class
*/
module.exports.euc_kr = function() {
this.name = function() {
return 'EUC-KR';
};
this.language = function() {
return 'ko';
};
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
this.commonChars = [
0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad
];
this.nextChar = eucNextChar;
};
util.inherits(module.exports.euc_kr, mbcs);
/**
* GB-18030 recognizer. Uses simplified Chinese statistics.
*/
module.exports.gb_18030 = function() {
this.name = function() {
return 'GB18030';
};
this.language = function() {
return 'zh';
};
/*
* Get the next character value for EUC based encodings.
* Character 'value' is simply the raw bytes that make up the character
* packed into an int.
*/
this.nextChar = function(iter, det) {
iter.index = iter.nextIndex;
iter.error = false;
var firstByte = 0;
var secondByte = 0;
var thirdByte = 0;
var fourthByte = 0;
buildChar: {
firstByte = iter.charValue = iter.nextByte(det);
if (firstByte < 0) {
// Ran off the end of the input data
iter.done = true;
break buildChar;
}
if (firstByte <= 0x80) {
// single byte char
break buildChar;
}
secondByte = iter.nextByte(det);
iter.charValue = (iter.charValue << 8) | secondByte;
if (firstByte >= 0x81 && firstByte <= 0xFE) {
// Two byte Char
if ((secondByte >= 0x40 && secondByte <= 0x7E) || (secondByte >=80 && secondByte <= 0xFE)) {
break buildChar;
}
// Four byte char
if (secondByte >= 0x30 && secondByte <= 0x39) {
thirdByte = iter.nextByte(det);
if (thirdByte >= 0x81 && thirdByte <= 0xFE) {
fourthByte = iter.nextByte(det);
if (fourthByte >= 0x30 && fourthByte <= 0x39) {
iter.charValue = (iter.charValue << 16) | (thirdByte << 8) | fourthByte;
break buildChar;
}
}
}
iter.error = true;
break buildChar;
}
}
return iter.done == false;
};
// TODO: This set of data comes from the character frequency-
// of-occurence analysis tool. The data needs to be moved
// into a resource and loaded from there.
this.commonChars = [
0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0
];
};
util.inherits(module.exports.gb_18030, mbcs);

907
node_modules/chardet/encoding/sbcs.js generated vendored Normal file
View File

@ -0,0 +1,907 @@
var util = require('util'),
Match = require ('../match');
/**
* This class recognizes single-byte encodings. Because the encoding scheme is so
* simple, language statistics are used to do the matching.
*/
function NGramParser(theNgramList, theByteMap) {
var N_GRAM_MASK = 0xFFFFFF;
this.byteIndex = 0;
this.ngram = 0;
this.ngramList = theNgramList;
this.byteMap = theByteMap;
this.ngramCount = 0;
this.hitCount = 0;
this.spaceChar;
/*
* Binary search for value in table, which must have exactly 64 entries.
*/
this.search = function(table, value) {
var index = 0;
if (table[index + 32] <= value) index += 32;
if (table[index + 16] <= value) index += 16;
if (table[index + 8] <= value) index += 8;
if (table[index + 4] <= value) index += 4;
if (table[index + 2] <= value) index += 2;
if (table[index + 1] <= value) index += 1;
if (table[index] > value) index -= 1;
if (index < 0 || table[index] != value)
return -1;
return index;
};
this.lookup = function(thisNgram) {
this.ngramCount += 1;
if (this.search(this.ngramList, thisNgram) >= 0) {
this.hitCount += 1;
}
};
this.addByte = function(b) {
this.ngram = ((this.ngram << 8) + (b & 0xFF)) & N_GRAM_MASK;
this.lookup(this.ngram);
}
this.nextByte = function(det) {
if (this.byteIndex >= det.fInputLen)
return -1;
return det.fInputBytes[this.byteIndex++] & 0xFF;
}
this.parse = function(det, spaceCh) {
var b, ignoreSpace = false;
this.spaceChar = spaceCh;
while ((b = this.nextByte(det)) >= 0) {
var mb = this.byteMap[b];
// TODO: 0x20 might not be a space in all character sets...
if (mb != 0) {
if (!(mb == this.spaceChar && ignoreSpace)) {
this.addByte(mb);
}
ignoreSpace = (mb == this.spaceChar);
}
}
// TODO: Is this OK? The buffer could have ended in the middle of a word...
this.addByte(this.spaceChar);
var rawPercent = this.hitCount / this.ngramCount;
// TODO - This is a bit of a hack to take care of a case
// were we were getting a confidence of 135...
if (rawPercent > 0.33)
return 98;
return Math.floor(rawPercent * 300.0);
};
};
function NGramsPlusLang(la, ng) {
this.fLang = la;
this.fNGrams = ng;
};
function sbcs() {};
sbcs.prototype.spaceChar = 0x20;
sbcs.prototype.ngrams = function() {};
sbcs.prototype.byteMap = function() {};
sbcs.prototype.match = function(det) {
var ngrams = this.ngrams();
var multiple = (Array.isArray(ngrams) && ngrams[0] instanceof NGramsPlusLang);
if (!multiple) {
var parser = new NGramParser(ngrams, this.byteMap());
var confidence = parser.parse(det, this.spaceChar);
return confidence <= 0 ? null : new Match(det, this, confidence);
}
var bestConfidenceSoFar = -1;
var lang = null;
for (var i = ngrams.length - 1; i >= 0; i--) {
var ngl = ngrams[i];
var parser = new NGramParser(ngl.fNGrams, this.byteMap());
var confidence = parser.parse(det, this.spaceChar);
if (confidence > bestConfidenceSoFar) {
bestConfidenceSoFar = confidence;
lang = ngl.fLang;
}
}
var name = this.name(det);
return bestConfidenceSoFar <= 0 ? null : new Match(det, this, bestConfidenceSoFar, name, lang);
};
module.exports.ISO_8859_1 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
];
};
this.ngrams = function() {
return [
new NGramsPlusLang('da', [
0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920,
0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574,
0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67,
0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065,
0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572
]),
new NGramsPlusLang('de', [
0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765,
0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E,
0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65,
0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368,
0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572
]),
new NGramsPlusLang('en', [
0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E,
0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465,
0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20,
0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169,
0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320
]),
new NGramsPlusLang('es', [
0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E,
0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369,
0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320,
0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573,
0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20
]),
new NGramsPlusLang('fr', [
0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61,
0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520,
0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420,
0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064,
0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220
]),
new NGramsPlusLang('it', [
0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E,
0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064,
0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20,
0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572,
0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F
]),
new NGramsPlusLang('nl', [
0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765,
0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220,
0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520,
0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368,
0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F
]),
new NGramsPlusLang('no', [
0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920,
0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574,
0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520,
0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465,
0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572
]),
new NGramsPlusLang('pt', [
0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61,
0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20,
0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120,
0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064,
0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F
]),
new NGramsPlusLang('sv', [
0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E,
0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474,
0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564,
0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073,
0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
])
];
};
this.name = function(det) {
return (det && det.fC1Bytes) ? 'windows-1252' : 'ISO-8859-1';
};
};
util.inherits(module.exports.ISO_8859_1, sbcs);
module.exports.ISO_8859_2 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20
];
}
this.ngrams = function() {
return [
new NGramsPlusLang('cs', [
0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64,
0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073,
0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E,
0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20,
0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
]),
new NGramsPlusLang('hu', [
0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F,
0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073,
0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920,
0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74,
0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
]),
new NGramsPlusLang('pl', [
0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61,
0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79,
0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920,
0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69,
0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
]),
new NGramsPlusLang('ro', [
0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469,
0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172,
0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063,
0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520,
0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
])
];
};
this.name = function(det) {
return (det && det.fC1Bytes) ? 'windows-1250' : 'ISO-8859-2';
};
};
util.inherits(module.exports.ISO_8859_2, sbcs);
module.exports.ISO_8859_5 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF
];
};
this.ngrams = function() {
return [
0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0,
0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2,
0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF,
0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2,
0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520
];
};
this.name = function(det) {
return 'ISO-8859-5';
};
this.language = function() {
return 'ru';
};
};
util.inherits(module.exports.ISO_8859_5, sbcs);
module.exports.ISO_8859_6 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20
];
};
this.ngrams = function() {
return [
0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7,
0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5,
0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920,
0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4,
0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620
];
};
this.name = function(det) {
return 'ISO-8859-6';
};
this.language = function() {
return 'ar';
};
};
util.inherits(module.exports.ISO_8859_6, sbcs);
module.exports.ISO_8859_7 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20
];
};
this.ngrams = function() {
return [
0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5,
0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220,
0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0,
0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9,
0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20
];
};
this.name = function(det) {
return (det && det.fC1Bytes) ? 'windows-1253' : 'ISO-8859-7';
};
this.language = function() {
return 'el';
};
};
util.inherits(module.exports.ISO_8859_7, sbcs);
module.exports.ISO_8859_8 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20
];
};
this.ngrams = function() {
return [
new NGramsPlusLang('he', [
0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5,
0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE,
0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0,
0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4,
0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
]),
new NGramsPlusLang('he', [
0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2,
0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4,
0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020,
0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420,
0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
])
];
};
this.name = function(det) {
return (det && det.fC1Bytes) ? 'windows-1255' : 'ISO-8859-8';
};
this.language = function() {
return 'he';
};
};
util.inherits(module.exports.ISO_8859_8, sbcs);
module.exports.ISO_8859_9 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
];
};
this.ngrams = function() {
return [
0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C,
0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261,
0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20,
0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E,
0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD
];
};
this.name = function(det) {
return (det && det.fC1Bytes) ? 'windows-1254' : 'ISO-8859-9';
};
this.language = function() {
return 'tr';
};
};
util.inherits(module.exports.ISO_8859_9, sbcs);
module.exports.windows_1251 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
];
};
this.ngrams = function() {
return [
0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0,
0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2,
0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF,
0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2,
0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520
];
};
this.name = function(det) {
return 'windows-1251';
};
this.language = function() {
return 'ru';
};
};
util.inherits(module.exports.windows_1251, sbcs);
module.exports.windows_1256 = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF
];
};
this.ngrams = function() {
return [
0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7,
0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3,
0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920,
0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1,
0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420
];
};
this.name = function(det) {
return 'windows-1256';
};
this.language = function() {
return 'ar';
};
};
util.inherits(module.exports.windows_1256, sbcs);
module.exports.KOI8_R = function() {
this.byteMap = function() {
return [
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF
];
};
this.ngrams = function() {
return [
0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF,
0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420,
0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3,
0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1,
0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF
];
};
this.name = function(det) {
return 'KOI8-R';
};
this.language = function() {
return 'ru';
};
};
util.inherits(module.exports.KOI8_R, sbcs);
/*
module.exports.ISO_8859_7 = function() {
this.byteMap = function() {
return [
];
};
this.ngrams = function() {
return [
];
};
this.name = function(det) {
if (typeof det == 'undefined')
return 'ISO-8859-7';
return det.fC1Bytes ? 'windows-1253' : 'ISO-8859-7';
};
this.language = function() {
return 'el';
};
};
util.inherits(module.exports.ISO_8859_7, sbcs);
*/

112
node_modules/chardet/encoding/unicode.js generated vendored Normal file
View File

@ -0,0 +1,112 @@
'use strict';
var util = require('util'),
Match = require ('../match');
/**
* This class matches UTF-16 and UTF-32, both big- and little-endian. The
* BOM will be used if it is present.
*/
module.exports.UTF_16BE = function() {
this.name = function() {
return 'UTF-16BE';
};
this.match = function(det) {
var input = det.fRawInput;
if (input.length >= 2 && ((input[0] & 0xff) == 0xfe && (input[1] & 0xff) == 0xff)) {
return new Match(det, this, 100); // confidence = 100
}
// TODO: Do some statistics to check for unsigned UTF-16BE
return null;
};
};
module.exports.UTF_16LE = function() {
this.name = function() {
return 'UTF-16LE';
};
this.match = function(det) {
var input = det.fRawInput;
if (input.length >= 2 && ((input[0] & 0xff) == 0xff && (input[1] & 0xff) == 0xfe)) {
// LE BOM is present.
if (input.length >= 4 && input[2] == 0x00 && input[3] == 0x00) {
// It is probably UTF-32 LE, not UTF-16
return null;
}
return new Match(det, this, 100); // confidence = 100
}
// TODO: Do some statistics to check for unsigned UTF-16LE
return null;
}
};
function UTF_32() {};
UTF_32.prototype.match = function(det) {
var input = det.fRawInput,
limit = (det.fRawLength / 4) * 4,
numValid = 0,
numInvalid = 0,
hasBOM = false,
confidence = 0;
if (limit == 0) {
return null;
}
if (this.getChar(input, 0) == 0x0000FEFF) {
hasBOM = true;
}
for (var i = 0; i < limit; i += 4) {
var ch = this.getChar(input, i);
if (ch < 0 || ch >= 0x10FFFF || (ch >= 0xD800 && ch <= 0xDFFF)) {
numInvalid += 1;
} else {
numValid += 1;
}
}
// Cook up some sort of confidence score, based on presence of a BOM
// and the existence of valid and/or invalid multi-byte sequences.
if (hasBOM && numInvalid == 0) {
confidence = 100;
} else if (hasBOM && numValid > numInvalid * 10) {
confidence = 80;
} else if (numValid > 3 && numInvalid == 0) {
confidence = 100;
} else if (numValid > 0 && numInvalid == 0) {
confidence = 80;
} else if (numValid > numInvalid * 10) {
// Probably corrupt UTF-32BE data. Valid sequences aren't likely by chance.
confidence = 25;
}
// return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
return confidence == 0 ? null : new Match(det, this, confidence);
};
module.exports.UTF_32BE = function() {
this.name = function() {
return 'UTF-32BE';
};
this.getChar = function(input, index) {
return (input[index + 0] & 0xff) << 24 | (input[index + 1] & 0xff) << 16 |
(input[index + 2] & 0xff) << 8 | (input[index + 3] & 0xff);
};
};
util.inherits(module.exports.UTF_32BE, UTF_32);
module.exports.UTF_32LE = function() {
this.name = function() {
return 'UTF-32LE';
};
this.getChar = function(input, index) {
return (input[index + 3] & 0xff) << 24 | (input[index + 2] & 0xff) << 16 |
(input[index + 1] & 0xff) << 8 | (input[index + 0] & 0xff);
};
};
util.inherits(module.exports.UTF_32LE, UTF_32);

84
node_modules/chardet/encoding/utf8.js generated vendored Normal file
View File

@ -0,0 +1,84 @@
var Match = require ('../match');
/**
* Charset recognizer for UTF-8
*/
module.exports = function() {
this.name = function() {
return 'UTF-8';
};
this.match = function(det) {
var hasBOM = false,
numValid = 0,
numInvalid = 0,
input = det.fRawInput,
trailBytes = 0,
confidence;
if (det.fRawLength >= 3 &&
(input[0] & 0xff) == 0xef && (input[1] & 0xff) == 0xbb && (input[2] & 0xff) == 0xbf) {
hasBOM = true;
}
// Scan for multi-byte sequences
for (var i = 0; i < det.fRawLength; i++) {
var b = input[i];
if ((b & 0x80) == 0)
continue; // ASCII
// Hi bit on char found. Figure out how long the sequence should be
if ((b & 0x0e0) == 0x0c0) {
trailBytes = 1;
} else if ((b & 0x0f0) == 0x0e0) {
trailBytes = 2;
} else if ((b & 0x0f8) == 0xf0) {
trailBytes = 3;
} else {
numInvalid++;
if (numInvalid > 5)
break;
trailBytes = 0;
}
// Verify that we've got the right number of trail bytes in the sequence
for (;;) {
i++;
if (i >= det.fRawLength)
break;
if ((input[i] & 0xc0) != 0x080) {
numInvalid++;
break;
}
if (--trailBytes == 0) {
numValid++;
break;
}
}
}
// Cook up some sort of confidence score, based on presense of a BOM
// and the existence of valid and/or invalid multi-byte sequences.
confidence = 0;
if (hasBOM && numInvalid == 0)
confidence = 100;
else if (hasBOM && numValid > numInvalid * 10)
confidence = 80;
else if (numValid > 3 && numInvalid == 0)
confidence = 100;
else if (numValid > 0 && numInvalid == 0)
confidence = 80;
else if (numValid == 0 && numInvalid == 0)
// Plain ASCII.
confidence = 10;
else if (numValid > numInvalid * 10)
// Probably corruput utf-8 data. Valid sequences aren't likely by chance.
confidence = 25;
else
return null
return new Match(det, this, confidence);
};
};

151
node_modules/chardet/index.js generated vendored Normal file
View File

@ -0,0 +1,151 @@
var fs = require('fs');
var utf8 = require('./encoding/utf8'),
unicode = require('./encoding/unicode'),
mbcs = require('./encoding/mbcs'),
sbcs = require('./encoding/sbcs'),
iso2022 = require('./encoding/iso2022');
var self = this;
var recognisers = [
new utf8,
new unicode.UTF_16BE,
new unicode.UTF_16LE,
new unicode.UTF_32BE,
new unicode.UTF_32LE,
new mbcs.sjis,
new mbcs.big5,
new mbcs.euc_jp,
new mbcs.euc_kr,
new mbcs.gb_18030,
new iso2022.ISO_2022_JP,
new iso2022.ISO_2022_KR,
new iso2022.ISO_2022_CN,
new sbcs.ISO_8859_1,
new sbcs.ISO_8859_2,
new sbcs.ISO_8859_5,
new sbcs.ISO_8859_6,
new sbcs.ISO_8859_7,
new sbcs.ISO_8859_8,
new sbcs.ISO_8859_9,
new sbcs.windows_1251,
new sbcs.windows_1256,
new sbcs.KOI8_R
];
module.exports.detect = function(buffer, opts) {
// Tally up the byte occurence statistics.
var fByteStats = [];
for (var i = 0; i < 256; i++)
fByteStats[i] = 0;
for (var i = buffer.length - 1; i >= 0; i--)
fByteStats[buffer[i] & 0x00ff]++;
var fC1Bytes = false;
for (var i = 0x80; i <= 0x9F; i += 1) {
if (fByteStats[i] != 0) {
fC1Bytes = true;
break;
}
}
var context = {
fByteStats: fByteStats,
fC1Bytes: fC1Bytes,
fRawInput: buffer,
fRawLength: buffer.length,
fInputBytes: buffer,
fInputLen: buffer.length
};
var matches = recognisers.map(function(rec) {
return rec.match(context);
}).filter(function(match) {
return !!match;
}).sort(function(a, b) {
return b.confidence - a.confidence;
});
if (opts && opts.returnAllMatches === true) {
return matches;
}
else {
return matches.length > 0 ? matches[0].name : null;
}
};
module.exports.detectFile = function(filepath, opts, cb) {
if (typeof opts === 'function') {
cb = opts;
opts = undefined;
}
var fd;
var handler = function(err, buffer) {
if (fd) {
fs.closeSync(fd);
}
if (err) return cb(err, null);
cb(null, self.detect(buffer, opts));
};
if (opts && opts.sampleSize) {
fd = fs.openSync(filepath, 'r'),
sample = Buffer.allocUnsafe(opts.sampleSize);
fs.read(fd, sample, 0, opts.sampleSize, null, function(err) {
handler(err, sample);
});
return;
}
fs.readFile(filepath, handler);
};
module.exports.detectFileSync = function(filepath, opts) {
if (opts && opts.sampleSize) {
var fd = fs.openSync(filepath, 'r'),
sample = Buffer.allocUnsafe(opts.sampleSize);
fs.readSync(fd, sample, 0, opts.sampleSize);
fs.closeSync(fd);
return self.detect(sample, opts);
}
return self.detect(fs.readFileSync(filepath), opts);
};
// Wrappers for the previous functions to return all encodings
module.exports.detectAll = function(buffer, opts) {
if (typeof opts !== 'object') {
opts = {};
}
opts.returnAllMatches = true;
return self.detect(buffer, opts);
}
module.exports.detectFileAll = function(filepath, opts, cb) {
if (typeof opts === 'function') {
cb = opts;
opts = undefined;
}
if (typeof opts !== 'object') {
opts = {};
}
opts.returnAllMatches = true;
self.detectFile(filepath, opts, cb);
}
module.exports.detectFileAllSync = function(filepath, opts) {
if (typeof opts !== 'object') {
opts = {};
}
opts.returnAllMatches = true;
return self.detectFileSync(filepath, opts);
}

6
node_modules/chardet/match.js generated vendored Normal file
View File

@ -0,0 +1,6 @@
module.exports = function(det, rec, confidence, name, lang) {
this.confidence = confidence;
this.name = name || rec.name(det);
this.lang = lang;
};

83
node_modules/chardet/package.json generated vendored Normal file
View File

@ -0,0 +1,83 @@
{
"_from": "chardet@^0.7.0",
"_id": "chardet@0.7.0",
"_inBundle": false,
"_integrity": "sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==",
"_location": "/chardet",
"_phantomChildren": {},
"_requested": {
"type": "range",
"registry": true,
"raw": "chardet@^0.7.0",
"name": "chardet",
"escapedName": "chardet",
"rawSpec": "^0.7.0",
"saveSpec": null,
"fetchSpec": "^0.7.0"
},
"_requiredBy": [
"/external-editor"
],
"_resolved": "https://registry.npmjs.org/chardet/-/chardet-0.7.0.tgz",
"_shasum": "90094849f0937f2eedc2425d0d28a9e5f0cbad9e",
"_spec": "chardet@^0.7.0",
"_where": "/Users/Luca_Schwan/Desktop/DSABot/node_modules/external-editor",
"author": {
"name": "Dmitry Shirokov",
"email": "deadrunk@gmail.com"
},
"bugs": {
"url": "http://github.com/runk/node-chardet/issues"
},
"bundleDependencies": false,
"contributors": [
{
"name": "@spikying"
},
{
"name": "@wtgtybhertgeghgtwtg"
},
{
"name": "@suisho"
},
{
"name": "@seangarner"
},
{
"name": "@zevanty"
}
],
"deprecated": false,
"description": "Character detector",
"devDependencies": {
"github-publish-release": "^5.0.0",
"mocha": "^5.2.0"
},
"directories": {
"test": "test"
},
"engine": {
"node": ">=4"
},
"homepage": "https://github.com/runk/node-chardet",
"keywords": [
"encoding",
"character",
"utf8",
"detector",
"chardet",
"icu"
],
"license": "MIT",
"main": "index.js",
"name": "chardet",
"repository": {
"type": "git",
"url": "git+ssh://git@github.com/runk/node-chardet.git"
},
"scripts": {
"release": "scripts/release",
"test": "mocha -R spec --recursive --bail"
},
"version": "0.7.0"
}