/**
* A version of the Natural Language Processing core. It processess entities in strict sequential order. More precise but harder hits.
* @constructor
*/
Bravey.Nlp.Sequential = function(nlpName, extensions) {
extensions = extensions || {};
var intents = {};
var documentClassifier = new Bravey.DocumentClassifier(extensions);
var entities = {};
var allEntities = [];
var confidence = 0.75;
function getIntentRoot(intentName) {
return intentName.split(/~/)[0];
}
function sortEntities(ent) {
ent.sort(function(a, b) {
if (a.position < b.position) return -1;
if (a.position > b.position) return 1;
if (a.string.length > b.string.length) return -1;
if (a.string.length < b.string.length) return 1;
if (a.priority > b.priority) return -1;
if (a.priority < b.priority) return 1;
return 0;
});
}
function extractEntities(text, ent) {
var out = [],
done = {};
for (var i = 0; i < ent.length; i++)
if (!done[ent[i].entity]) {
done[ent[i].entity] = 1;
entities[ent[i].entity].getEntities(text, out);
}
sortEntities(out);
return out;
}
function guessIntent(text, root, names) {
var intentname = root || "",
outtext = "",
entities = [],
counters = {},
cur = 0,
pos = -1;
var out = extractEntities(text, allEntities);
for (var i = 0; i < out.length; i++) {
ent = out[i].entity;
if (out[i].position >= pos) {
intentname += "~" + ent;
if (counters[ent] == undefined) counters[ent] = 0;
else counters[ent]++;
nextid = names && names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : "");
entities.push({
entity: ent,
id: nextid
});
if (pos == -1) outtext += text.substr(0, out[i].position);
else outtext += text.substr(pos, out[i].position - pos);
outtext += "{" + ent + "}";
pos = out[i].position + out[i].string.length;
cur++;
}
}
outtext += text.substr(pos == -1 ? 0 : pos);
return {
error: false,
text: outtext,
name: intentname,
entities: entities
}
}
function guessIntentFromTagged(text, root, names) {
var nextid, cur = -1,
counters = {};
var intentname = root || "",
outentities = [],
counters = {},
cur = 0,
error = false;
text.replace(/\{([.a-z_-]+)\}/g, function(m, ent) {
if (!entities[ent]) error = ent;
else {
intentname += "~" + ent;
if (counters[ent] == undefined) counters[ent] = 0;
else counters[ent]++;
nextid = names && names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : "");
outentities.push({
entity: ent,
id: nextid
});
cur++;
}
});
return {
error: error,
text: text,
name: intentname,
entities: outentities
};
}
function getEntities(text, intent) {
var out = extractEntities(text, intent.entities);
var outentities = [],
outentitiesindex = {},
missingEntities = false,
ent, pos = -1,
entitypos = 0;
function forward() { // @TODO: This part can be improved.
while (intent.entities[entitypos] && entities[intent.entities[entitypos].entity].expand) {
var match = {
position: pos < 0 ? 0 : pos,
entity: intent.entities[entitypos].entity,
value: "",
string: text.substr(pos < 0 ? 0 : pos),
id: intent.entities[entitypos].id
};
outentitiesindex[match.id] = match;
outentities.push(match);
entitypos++;
}
}
forward();
if (out.length)
for (var i = 0; i < out.length; i++) {
ent = out[i].entity;
if (out[i].position >= pos) {
if (intent.entities[entitypos].entity == ent) {
var match = {
position: out[i].position,
entity: ent,
value: out[i].value,
string: out[i].string,
id: intent.entities[entitypos].id
};
outentities.push(match);
outentitiesindex[match.id] = match;
pos = out[i].position + out[i].string.length;
entitypos++;
forward();
if (entitypos >= intent.entities.length) break;
}
}
}
else missingEntities = intent.entities.length;
var nextentity, outtext = "",
prevstring = "",
pos = 0,
sentence = [],
oldposition;
for (var i = 0; i < outentities.length; i++) {
if (entities[outentities[i].entity].expand) {
nextentity = outentities[i + 1];
if (nextentity) outentities[i].string = outentities[i].string.substr(0, nextentity.position - outentities[i].position);
oldposition = outentities[i].position;
entities[outentities[i].entity].expand(outentities[i]);
if (entities[outentities[i].entity].position > oldposition) {
prevstring = text.substr(pos, entities[outentities[i].entity].position - oldposition);
sentence.push({
string: prevstring
});
outtext += prevstring;
pos += entities[outentities[i].entity].position - oldposition;
}
}
prevstring = text.substr(pos, outentities[i].position - pos);
if (prevstring) {
sentence.push({
string: prevstring
});
outtext += prevstring;
}
sentence.push(outentities[i]);
outtext += "{" + outentities[i].entity + "}";
pos = outentities[i].position + outentities[i].string.length;
}
if (prevstring = text.substr(pos)) {
sentence.push({
string: prevstring
});
outtext += prevstring;
}
return {
found: outentities.length,
exceedEntities: false,
missingEntities: outentities.length < intent.entities.length,
extraEntities: outentities.length > intent.entities.length,
text: outtext,
entities: outentities,
sentence: sentence,
entitiesIndex: outentitiesindex
};
}
/**
* Adds an intent.
* @param {string} intentName - The name of the new intent.
* @param {IntentEntity[]} entities - The produced entities.
* @returns {boolean} True when successful.
*/
this.addIntent = function(intentName, entities) {
var index = {},
intentFullName = intentName;
for (var i = 0; i < entities.length; i++) {
intentFullName += "~" + entities[i].entity;
if (!index[entities[i].entity]) index[entities[i].entity] = [];
index[entities[i].entity].push(entities[i].id);
}
intents[intentFullName] = {
name: intentFullName,
root: intentName,
entities: entities,
index: index
};
return true;
}
/**
* Adds an entity recognizer.
* @param {EntityRecognizer} entity - The entity recognizer to be addded.
* @returns {boolean} True when successful.
*/
this.addEntity = function(entity) {
var entityName = entity.getName();
if (!entities[entityName])
allEntities.push({
entity: entityName,
id: "none"
});
entities[entityName] = entity;
return true;
}
/**
* Check if an entity with a given name exists.
* @param {string} entityName - The entity name.
* @returns {boolean} True when found.
*/
this.hasEntity = function(entityName) {
return !!entities[entityName];
}
/**
* Set confidence ratio, from 0.5 to 1. The higher the more strict.
* @param {number} ratio - The confidence ratio to be set.
*/
this.setConfidence = function(ratio) {
confidence = ratio;
}
/**
* Returns the confidence ratio.
* @returns {number} The confidence ratio.
*/
this.getConfidence = function(c) {
return confidence;
}
/**
* Add a new document.
* @param {string} text - The document content.
* @param {string} intent - The related intent. If not guessing, must prepend entity types separated by ~. (i.e. "call~phone_number")
* @param {boolean} [guess.fromFullSentence=false] - Indicates that the document is a full untagged sentence. (i.e. "Please call the 333-123456")
* @param {boolean} [guess.fromTaggedSentence=true] - Indicates that the document is tagged with braces. (i.e "Please call the {phone_number}")
* @param {boolean} [guess.expandIntent=false] - Extends or creates a new intent if exceeded entities are found. New entities id will be autogenerated and progressive.
* @param {string[]} [guess.withNames=[]] - Uses the given names for auto-extending intents. Positions in array are matched with positions of entity into the specified sentence.
* @returns {boolean} True when found.
*/
this.addDocument = function(text, intent, guess) {
if (guess) {
if (guess.fromFullSentence) { // From a full sentence...
text = Bravey.Text.clean(text);
if (guess.expandIntent) { // Expand intent with found items
var found = guessIntent(text, intent, guess.withNames);
if (!intents[found.name]) {
console.warn("Adding intent", found.name);
this.addIntent(intent, found.entities);
}
return documentClassifier.addDocument(found.text, intent);
}
} else if (guess.fromTaggedSentence) { // From a {tagged} sentence...
var found = guessIntentFromTagged(text, intent, guess.withNames);
if (found.error !== false) {
console.warn("Can't find entity typed", found.error);
return false;
} else {
if (!intents[found.name]) {
console.warn("Adding intent", found.name);
this.addIntent(intent, found.entities);
}
return documentClassifier.addDocument(found.text, intent);
}
}
console.warn("Can't guess...");
return false;
} else { // Link a marked sentence to a particular intent
if (intents[intent])
return documentClassifier.addDocument(Bravey.Text.clean(text), getIntentRoot(intent));
else {
console.warn("Can't find intent", intent);
return false;
}
}
}
/**
* Check if a given sentence matches an intent and extracts its entities.
* @param {string} text - The sentence to be processed.
* @param {string} [method="default"] - The extraction method.
* @returns {NlpResult} When an intent is found.
* @returns {false} When the sentence doesn't match any intent.
*/
this.test = function(text, method) {
text = Bravey.Text.clean(text);
switch (method) {
default: { // When entities are enough, check classifier.
var score, classification, entlist, result = false,
resultscore = -1,
resultfound = -1;
for (var intent in intents) {
entlist = getEntities(text, intents[intent]);
if (!entlist.exceedEntities && !entlist.extraEntities && !entlist.missingEntities) { // No unwanted entites, entity count under the threshold and 0 entities for no entities intents
classification = documentClassifier.classifyDocument(entlist.text);
score = classification.scores[intents[intent].root];
if ((score > confidence) && ((score > resultscore) || (entlist.found > resultfound))) {
result = entlist;
result.score = resultscore = score;
resultfound = result.found;
result.intent = intents[intent].root;
}
}
}
return result;
}
}
return false;
}
}
/**
Describes an entity to be matched in an intent.
@typedef IntentEntity
@type {Object}
@property {string} entity The entity type to be found.
@property {string} id The entity ID to be assigned when found.
*/
/**
Describes a sentence classification and entities.
@typedef NlpResult
@type {Object}
@property {Entity[]} entities The ordered list of found entities.
@property {number[]} entitiesIndex An map version of entities, with key as entity ID and value as entity value.
@property {string} intent The matched intent.
@property {number} score The score of the matched sentence intent.
*/