Package detail

@nlpjs/nlu

axa-group75.7kMIT4.27.0

Natural Language Understanding

readme

NLPjs logo

@nlpjs/nlu

Installation

You can install @nlpjs/nlu:

    npm install @nlpjs/nlu

NluNeural

Class NluNeural is an abstraction built on top of NeuralNetwork that help to use it with a corpus. A language can be used as a plugin in order to use the correct tokenizer and stemmer for this language. In this example both versions, with language and without language, are used in order to compare the results.

const { containerBootstrap } = require('@nlpjs/core');
const { NluNeural } = require('@nlpjs/nlu');
const { LangEn } = require('@nlpjs/lang-en');
const corpus = require('./corpus50.json');

function prepareCorpus(input, isTests = false) {
  const result = [];
  for (let i = 0; i < input.data.length; i += 1) {
    const { intent } = input.data[i];
    const utterances = isTests ? input.data[i].tests : input.data[i].utterances;
    for (let j = 0; j < utterances.length; j += 1) {
      result.push({ utterance: utterances[j], intent });
    }
  }
  return result;
}

async function measure(useStemmer) {
  const container = await containerBootstrap();
  if (useStemmer) {
    container.use(LangEn);
  }
  const nlu = new NluNeural({ container, locale: 'en', log: false });
  await nlu.train(prepareCorpus(corpus));
  const tests = prepareCorpus(corpus, true);
  let good = 0;
  let total = 0;
  for (let i = 0; i < tests.length; i += 1) {
    const { utterance, intent } = tests[i];
    const result = await nlu.process(utterance);
    total += 1;
    if (result.classifications[0].intent === intent) {
      good += 1;
    }
  }
  console.log(
    `Stemmer: ${useStemmer} Good: ${good} Total: ${total} Precision: ${
      good / total
    }`
  );
}

(async () => {
  await measure(false);
  await measure(true);
})();

DomainManager

DomainManager is the class that is an abstraction on top of NluNeural. It adds the concept of domain, so each intent belongs to one domain; that way we can have domains for smalltalk, human resources, claims, or whatever logical split of intents that we want to have. Each DomainManager instance has only one language. It can be trained by domain or all together:

All together means that all the intents are trained into the same model, no matters the domain of the intent
By domain means that every single domain has its own model trained, and there is a master model that is trained to classify an utterance into a domain. That way, when a utterance is classified, it is processed by the master domain to classify into the domain, and then is processed by the model of the domain to calculate the intent.

const { containerBootstrap } = require('@nlpjs/core');
const { DomainManager, NluNeural } = require('@nlpjs/nlu');
const { LangEn } = require('@nlpjs/lang-en');

function addFoodDomain(manager) {
  manager.add('food', 'what do I have in my basket', 'order.check');
  manager.add('food', 'check my cart', 'order.check');
  manager.add('food', "show me what I've ordered", 'order.check');
  manager.add('food', "what's in my basket", 'order.check');
  manager.add('food', 'check my order', 'order.check');
  manager.add('food', 'check what I have ordered', 'order.check');
  manager.add('food', 'show my order', 'order.check');
  manager.add('food', 'check my basket', 'order.check');
  manager.add('food', 'how soon will it be delivered', 'order.check_status');
  manager.add('food', 'check the status of my delivery', 'order.check_status');
  manager.add('food', 'when should I expect delivery', 'order.check_status');
  manager.add(
    'food',
    'what is the status of my delivery',
    'order.check_status'
  );
  manager.add('food', 'check my order status', 'order.check_status');
  manager.add('food', 'where is my order', 'order.check_status');
  manager.add('food', 'where is my delivery', 'order.check_status');
  manager.add('food', 'status of my order', 'order.check_status');
}

function addPersonalityDomain(manager) {
  manager.add('personality', 'say about you', 'agent.acquaintance');
  manager.add('personality', 'why are you here', 'agent.acquaintance');
  manager.add('personality', 'what is your personality', 'agent.acquaintance');
  manager.add('personality', 'describe yourself', 'agent.acquaintance');
  manager.add('personality', 'tell me about yourself', 'agent.acquaintance');
  manager.add('personality', 'tell me about you', 'agent.acquaintance');
  manager.add('personality', 'what are you', 'agent.acquaintance');
  manager.add('personality', 'who are you', 'agent.acquaintance');
  manager.add('personality', 'talk about yourself', 'agent.acquaintance');
  manager.add('personality', 'your age', 'agent.age');
  manager.add('personality', 'how old is your platform', 'agent.age');
  manager.add('personality', 'how old are you', 'agent.age');
  manager.add('personality', "what's your age", 'agent.age');
  manager.add('personality', "I'd like to know your age", 'agent.age');
  manager.add('personality', 'tell me your age', 'agent.age');
  manager.add('personality', "you're annoying me", 'agent.annoying');
  manager.add('personality', 'you are such annoying', 'agent.annoying');
  manager.add('personality', 'you annoy me', 'agent.annoying');
  manager.add('personality', 'you are annoying', 'agent.annoying');
  manager.add('personality', 'you are irritating', 'agent.annoying');
  manager.add('personality', 'you are annoying me so much', 'agent.annoying');
  manager.add('personality', "you're bad", 'agent.bad');
  manager.add('personality', "you're horrible", 'agent.bad');
  manager.add('personality', "you're useless", 'agent.bad');
  manager.add('personality', "you're waste", 'agent.bad');
  manager.add('personality', "you're the worst", 'agent.bad');
  manager.add('personality', 'you are a lame', 'agent.bad');
  manager.add('personality', 'I hate you', 'agent.bad');
  manager.add('personality', 'be more clever', 'agent.beclever');
  manager.add('personality', 'can you get smarter', 'agent.beclever');
  manager.add('personality', 'you must learn', 'agent.beclever');
  manager.add('personality', 'you must study', 'agent.beclever');
  manager.add('personality', 'be clever', 'agent.beclever');
  manager.add('personality', 'be smart', 'agent.beclever');
  manager.add('personality', 'be smarter', 'agent.beclever');
  manager.add('personality', 'you are looking awesome', 'agent.beautiful');
  manager.add('personality', "you're looking good", 'agent.beautiful');
  manager.add('personality', "you're looking fantastic", 'agent.beautiful');
  manager.add('personality', 'you look greet today', 'agent.beautiful');
  manager.add('personality', "I think you're beautiful", 'agent.beautiful');
  manager.add('personality', 'you look amazing today', 'agent.beautiful');
  manager.add('personality', "you're so beautiful today", 'agent.beautiful');
  manager.add('personality', 'you look very pretty', 'agent.beautiful');
  manager.add('personality', 'you look pretty good', 'agent.beautiful');
  manager.add('personality', 'when is your birthday', 'agent.birthday');
  manager.add('personality', 'when were you born', 'agent.birthday');
  manager.add('personality', 'when do you have birthday', 'agent.birthday');
  manager.add('personality', 'date of your birthday', 'agent.birthday');
}

(async () => {
  const container = await containerBootstrap();
  container.use(NluNeural);
  container.use(LangEn);
  // Set trainByDomain to true to train by domain
  const manager = new DomainManager({ container, trainByDomain: false });
  addFoodDomain(manager);
  addPersonalityDomain(manager);
  await manager.train();
  const actual = await manager.process('tell me what is in my basket');
  console.log(actual);
})();

NluManager

NluManager is the abstraction over DomainManager: it contains one DomainManager instance per each language that we want to use. It is also able to guess automatically the language of the sentence, so we can provide the locale of the sentence or omit it.

This is an example with two languages (english and spanish) with two domains each (personality and food).

const { containerBootstrap } = require('@nlpjs/core');
const { NluManager, NluNeural } = require('@nlpjs/nlu');
const { LangEn } = require('@nlpjs/lang-en');
const { LangEs } = require('@nlpjs/lang-es');

function addFoodDomainEn(manager) {
  manager.assignDomain('en', 'order.check', 'food');
  manager.add('en', 'what do I have in my basket', 'order.check');
  manager.add('en', 'check my cart', 'order.check');
  manager.add('en', "show me what I've ordered", 'order.check');
  manager.add('en', "what's in my basket", 'order.check');
  manager.add('en', 'check my order', 'order.check');
  manager.add('en', 'check what I have ordered', 'order.check');
  manager.add('en', 'show my order', 'order.check');
  manager.add('en', 'check my basket', 'order.check');

  manager.assignDomain('en', 'order.check_status', 'food');
  manager.add('en', 'how soon will it be delivered', 'order.check_status');
  manager.add('en', 'check the status of my delivery', 'order.check_status');
  manager.add('en', 'when should I expect delivery', 'order.check_status');
  manager.add('en', 'check my order status', 'order.check_status');
  manager.add('en', 'where is my order', 'order.check_status');
  manager.add('en', 'where is my delivery', 'order.check_status');
  manager.add('en', 'status of my order', 'order.check_status');
}

function addFoodDomainEs(manager) {
  manager.assignDomain('es', 'order.check', 'food');
  manager.add('es', 'qué tengo en mi cesta', 'order.check');
  manager.add('es', 'comprueba mi carrito', 'order.check');
  manager.add('es', 'enséñame qué he pedido', 'order.check');
  manager.add('es', 'qué hay en mi carrito?', 'order.check');
  manager.add('es', 'comprueba mi compra', 'order.check');
  manager.add('es', 'comprueba qué he comprado', 'order.check');
  manager.add('es', 'muéstrame mi compra', 'order.check');

  manager.assignDomain('es', 'order.check_status', 'food');
  manager.add('es', 'cuándo me lo van a traer?', 'order.check_status');
  manager.add('es', 'cómo va la entrega?', 'order.check_status');
  manager.add('es', 'cuándo me traerán mi pedido?', 'order.check_status');
  manager.add('es', 'en qué estado está mi pedido?', 'order.check_status');
  manager.add('es', 'dónde está mi compra?', 'order.check_status');
  manager.add('es', 'dónde está mi pedido?', 'order.check_status');
  manager.add('es', 'estado de mi compra', 'order.check_status');
}

function addPersonalityDomainEn(manager) {
  manager.assignDomain('en', 'agent.acquaintance', 'personality');
  manager.add('en', 'say about you', 'agent.acquaintance');
  manager.add('en', 'why are you here', 'agent.acquaintance');
  manager.add('en', 'what is your personality', 'agent.acquaintance');
  manager.add('en', 'describe yourself', 'agent.acquaintance');
  manager.add('en', 'tell me about yourself', 'agent.acquaintance');
  manager.add('en', 'tell me about you', 'agent.acquaintance');
  manager.add('en', 'what are you', 'agent.acquaintance');
  manager.add('en', 'who are you', 'agent.acquaintance');
  manager.add('en', 'talk about yourself', 'agent.acquaintance');

  manager.assignDomain('en', 'agent.age', 'personality');
  manager.add('en', 'your age', 'agent.age');
  manager.add('en', 'how old is your platform', 'agent.age');
  manager.add('en', 'how old are you', 'agent.age');
  manager.add('en', "what's your age", 'agent.age');
  manager.add('en', "I'd like to know your age", 'agent.age');
  manager.add('en', 'tell me your age', 'agent.age');
}

function addPersonalityDomainEs(manager) {
  manager.assignDomain('es', 'agent.acquaintance', 'personality');
  manager.add('es', 'cuéntame sobre ti', 'agent.acquaintance');
  manager.add('es', 'qué haces aquí?', 'agent.acquaintance');
  manager.add('es', 'cómo es tu personalidad?', 'agent.acquaintance');
  manager.add('es', 'descríbete', 'agent.acquaintance');
  manager.add('es', 'quién eres?', 'agent.acquaintance');
  manager.add('es', 'qué eres?', 'agent.acquaintance');
  manager.add('es', 'háblame de ti', 'agent.acquaintance');

  manager.assignDomain('es', 'agent.age', 'personality');
  manager.add('es', 'qué edad tienes?', 'agent.age');
  manager.add('es', 'cuántos años tienes?', 'agent.age');
  manager.add('es', 'cuál es tu edad?', 'agent.age');
  manager.add('es', 'quiero saber tu edad', 'agent.age');
  manager.add('es', 'dime tu edad', 'agent.age');
}

(async () => {
  const container = await containerBootstrap();
  container.use(LangEn);
  container.use(LangEs);
  container.use(NluNeural);
  const manager = new NluManager({
    container,
    locales: ['en', 'es'],
    trainByDomain: false,
  });
  addFoodDomainEn(manager);
  addFoodDomainEs(manager);
  addPersonalityDomainEn(manager);
  addPersonalityDomainEs(manager);
  await manager.train();
  // You can provide the locale of the language
  let actual = await manager.process('es', 'dime quién eres tú');
  console.log(actual);
  // If the locale is not provided, then the language is guessed
  actual = await manager.process('dime quién eres tú');
  console.log(actual);
  // {
  //   locale: 'es',
  //   utterance: 'dime quién eres tú',
  //   domain: 'personality',
  //   languageGuessed: true,
  //   localeIso2: 'es',
  //   language: 'Spanish',
  //   nluAnswer: {
  //     classifications: [ [Object], [Object] ],
  //     entities: undefined,
  //     explanation: undefined
  //   },
  //   classifications: [
  //     { intent: 'agent.acquaintance', score: 0.8546458520495468 },
  //     { intent: 'agent.age', score: 0.14535414795045312 }
  //   ],
  //   intent: 'agent.acquaintance',
  //   score: 0.8546458520495468
  // }
})();

Contributing

You can read the guide of how to contribute at Contributing.

Contributors

Made with contributors-img.

Code of Conduct

You can read the Code of Conduct at Code of Conduct.

Who is behind it`?`

This project is developed by AXA Group Operations Spain S.A.

If you need to contact us, you can do it at the email opensource@axa.com

License

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

changelog

Changelog

All notable changes to release of this project will be documented in this file.

For detailed change-info on the commit level please see our GitHub commit history.

[3.10.0] - 2019-10-07

Added

Traverse for handlebars, so answers now can be arrays or objects
Automatic stemmer: is able to learn rules from languages without stemmer when the languages are inflected.
Tests of the automatic stemmer in polish
Spell checking: now users can write with small typos
Changelog
Portuguese sentiment analysis
Contributor pictures to the readme
Bengali sentiment analysis

Changed

Faster Levenshtein implementation
Now the browser version is generated with terser

Fixed

Extended NER to support datetimerange
Sort classifications in the NER manager
Use performance.now instead of process.hrtime for browser compatibility

[3.9.0] - 2019-09-15

Added

Support for Ukrainian language
Duckling support

Changed

General code cleanup removing dead & unused code from the project
Dependencies have been updated
README.md has been updated

Fixed

now using url.parse instead of new URL due to support of node version 8

[3.8.0] - 2019-09-12

Added

Support for Bengali language
Support for Greek language

[3.7.2] - 2019-09-07

Added

Support for Thai language

[3.7.1] - 2019-09-07

Added

Added examples for huge training (10k intents) and benchmark (Corpus50)

[3.7.0] - 2019-09-05

Added

Improved false-positive avoidance
Training of huge datasets is now feasible

[3.5.2] - 2019-08-20

Added

English tokenizer has been improved

Changed

Dependencies have been updated
Package lockfile (JS) has been updated
README.md has been updated

Fixed

Various typos in the documentation
Bugs regarding contraction

[3.5.1] - 2019-08-09

Added

Model sizes has been significantly reduced

[3.5.0] - 2019-08-09

Added

Emoji support 🥳
Sentiment analysis for the following languages: Finish, Danish, Russian
Added a "default" sentiment analysis

Changed

Documentation has been updated

[3.4.0] - 2019-07-24

Added

Added a default intent and score when score is less than threshold
Now uses decay learning rate

Changed

Updated license in documentation
Removed handlebars dependency
Dependencies have been updated
Adjustments to tests

[3.2.1] - 2019-07-16

Fixed

Fixed an error that occured when retrieving entites from whitelist

[3.1.1] - 2019-05-06

Changed

General performance update. Increaed performance over 3.1.0

[3.1.0] - 2019-05-05

Added

Actions
Japanase language stemmer

Changed

Now builds in node v12
Dependencies have been updated
Tweaked hyperparameters for best performance

Fixed

Issues with NLP Util tests have been fixed

[3.0.2] - 2019-04-19

Fixed

"is Alphanumeric" should now work with all most commonly used charsets

[3.0.1] - 2019-04-17

Added

The language guesser is now trained with the trigrams from the utterances used to train. That means that it has a best guess, and also that fictional languages can be guessed (example, klingon).
Added Tagalog and Galician languages.

Changed

-NlpClassifier no longer exists, in favor of NluManager as the manager of several NLU classes, and is able to manage several languages and several domains inside each language.

Now by default, each domain of a language has it's own neural network classifier. When a language has more than 1 domain, a master neural network is trained that instead of classifying into the intent, classify into de domain. That way the models are faster to train and have a better score.
The console-bot example training time in version 2.x in my laptop was 108 seconds, in the version 3.x the training time went down to 3 seconds, so the improvement in performance is notable.
Size of the model.nlp files is decreased, the console-bot example went from 1614KB down to 928KB.
The browser version has decreased from 5.08MB down to 2.3MB

[2.5.2] - 2019-03-26

Added

Added multiple different score calculation methods when combining LRC and Neural

Changed:

Default threshold (ner-manager) is now 0.8

[2.5.1] - 2019-03-07

Added

Reduced the filesizes of our sentiment resorces

Changes

Updated dependencies
Fixed issues with getter

[2.4.1] - 2019-01-30

Changed

Moved to brain.js version 1.6.0
Minimized the browser bundle

[2.4.0] - 2019-01-25

Added

Support for "any" language
Better documentation regarding language support

Fixed

NLU benchmark run

[2.3.2] - 2019-01-22

Fixed

Fixed a bug in the load/export and classification behaviour

[2.3.1] - 2019-01-10

Changed

Moved to using a non-blocking trainAsync, preventing the event loop from being blocked
Updted dependencies
LRC has been removed from the list of supported classifiers
Updated the classifier, manager & recognizer tests

Fixed

Fixed a bug where an error would be thrown when attempting to read the content's length in several stemmers
Fixed various prettifier bugs

[2.3.0] - 2018-11-26

Added

Test cases for the English aggresive tokenizer
Smoth tests for the bayes classifier
Now includes normalization tests for the following tokenizers: fr, it, nl, no, pl

Changed

Recognizer now recognizes microsoft bot framework v4 contexts

Fixed

Fixed bug prventing tests with istanbul frontend parts from running
English stemmer is now always the default alternative stemmer
English natural stemmer now always uses english aggresive tokenizer
Fixed contractions in the English tokenizer

[2.1.2] - 2018-10-28

Added

Naive Bayes Classifier

Fixed

Minor bugfixes in slot manager
Fixed fails in the language guesser for the chinese language

[2.1.0] - 2018-10-12

Added

Documentation for context, import and export
Added new Binary Relevance Neural Network Classifier

[2.0.4] - 2018-10-06

Added

Basic benchmarking support
Codebase now has precommit hooks
Created stemmers and tokenizers from Natural

Changed

NLP Classifier Train interface is now async
Removed Natural

[2.0.3] - 2018-10-03

Added

Built-in exctraction for Chinese
Built-in exctraction for Japanese
Documentation for Tamil language support
npmignore no longer uploads docs or testing model.nlp
Documentation for built-in entity extraction
Method for entity extraction without intent recognition in NLP Manger

Changed

Upgraded Microsoft recognizer to version 1.1.3
Tests changed from French to English

[2.0.2] - 2018-09-22

Added

Tamil & Armenian language support

[2.0.1] - 2018-09-21

Added

Catalan language
Arabic stemmer & documentation

Fixed

Errors affecting certain German stems

[2.0.0] - 2018-09-18

Added

Load and Save Trim Entities
Adding coveralls to the repo
Slot Filling
Microsoft Bot Framework Recognizer with Slot Filling

Package detail

readme

@nlpjs/nlu

TABLE OF CONTENTS

Installation

NluNeural

DomainManager

NluManager

Contributing

Contributors

Code of Conduct

Who is behind it?

License

changelog

Changelog

[3.10.0] - 2019-10-07

Added

Changed

Fixed

[3.9.0] - 2019-09-15

Added

Changed

Fixed

[3.8.0] - 2019-09-12

Added

[3.7.2] - 2019-09-07

Added

[3.7.1] - 2019-09-07

Added

[3.7.0] - 2019-09-05

Added

[3.5.2] - 2019-08-20

Added

Changed

Fixed

[3.5.1] - 2019-08-09

Added

[3.5.0] - 2019-08-09

Added

Changed

[3.4.0] - 2019-07-24

Added

Changed

[3.2.1] - 2019-07-16

Fixed

[3.1.1] - 2019-05-06

Changed

[3.1.0] - 2019-05-05

Added

Changed

Fixed

[3.0.2] - 2019-04-19

Fixed

[3.0.1] - 2019-04-17

Added

Changed

[2.5.2] - 2019-03-26

Added

Changed:

[2.5.1] - 2019-03-07

Added

Changes

[2.4.1] - 2019-01-30

Changed

[2.4.0] - 2019-01-25

Added

Fixed

[2.3.2] - 2019-01-22

Fixed

[2.3.1] - 2019-01-10

Changed

Fixed

[2.3.0] - 2018-11-26

Added

Changed

Fixed

[2.1.2] - 2018-10-28

Added

Fixed

[2.1.0] - 2018-10-12

Who is behind it`?`