English
LSTM-1225 / tokenizer.js
Fishfishfishfishfish's picture
Upload 4 files
a1b5703 verified
raw
history blame contribute delete
544 Bytes
const fs = require('fs');
function tokenizeText(text) {
return text.split(/([\s,.!?:;()*-])/).filter(token => token.trim() !== '');
}
fs.readFile('text.txt', 'utf8', (err, data) => {
if (err) {
console.error('Error reading file:', err);
return;
}
const tokens = tokenizeText(data);
const jsonData = JSON.stringify(tokens);
fs.writeFile('tokens.json', jsonData, (err) => {
if (err) {
console.error('Error writing file:', err);
} else {
console.log('Tokens written to tokens.json');
}
});
});