/**
* A version of the Natural Language Processing core. Doesn't follow entity order but tries to guess names by samples. Less precise but easier hits.
* @constructor
*/
Bravey.Nlp.Fuzzy = function(nlpName, extensions) {
extensions = extensions || {};
var intents = {};
var documentClassifier = new Bravey.DocumentClassifier(extensions);
var entities = {};
var allEntities = [];
var confidence = 0.75;
function sortEntities(ent) {
ent.sort(function(a, b) {
if (a.position < b.position) return -1;
if (a.position > b.position) return 1;
if (a.string.length > b.string.length) return -1;
if (a.string.length < b.string.length) return 1;
if (a.priority > b.priority) return -1;
if (a.priority < b.priority) return 1;
return 0;
});
}
function extractEntities(text, ent) {
var out = [],
done = {};
for (var i = 0; i < ent.length; i++)
if (!done[ent[i].entity]) {
done[ent[i].entity] = 1;
entities[ent[i].entity].getEntities(text, out);
}
sortEntities(out);
return out;
}
function getEntities(text, intent) {
var out = extractEntities(text, intent.entities);
var outentities = [],
outentitiesindex = {},
sentence = [],
ent, pos = -1,
nextid, outtext = "",
exceedEntities = false,
extraEntities = false,
missingEntities = false,
counters = {},
found = 0,
prevstring;
for (var i = 0; i < out.length; i++) {
ent = out[i].entity;
if (out[i].position >= pos) {
if (intent.index[ent]) {
if (counters[ent] == undefined) counters[ent] = 0;
if (nextid = intent.index[ent][counters[ent]]) {
counters[ent]++;
found++;
var match = {
position: out[i].position,
entity: ent,
value: out[i].value,
string: out[i].string,
id: nextid
};
outentities.push(match);
outentitiesindex[match.id] = match;
if (pos == -1) prevstring = text.substr(0, out[i].position);
else prevstring = text.substr(pos, out[i].position - pos);
if (prevstring.length) sentence.push({
string: prevstring
});
sentence.push(match);
outtext += prevstring;
outtext += "{" + ent + "}";
pos = out[i].position + out[i].string.length;
} else
exceedEntities = true;
} else extraEntities = true;
}
}
prevstring = text.substr(pos == -1 ? 0 : pos);
if (prevstring.length) {
if (prevstring.length) sentence.push({
string: prevstring
});
outtext += prevstring;
}
return {
found: found,
missingEntities: missingEntities,
exceedEntities: exceedEntities,
extraEntities: extraEntities,
text: outtext,
entities: outentities,
sentence: sentence,
entitiesIndex: outentitiesindex
};
}
function expandIntentFromText(text, intent, names) {
var ent, outtext = "",
cur = -1,
pos = -1,
nextid, out = extractEntities(text, allEntities),
counters = {};
for (var i = 0; i < out.length; i++) {
ent = out[i].entity;
if (out[i].position > pos) {
cur++;
if (!intent.index[ent]) intent.index[ent] = [];
if (counters[ent] == undefined) counters[ent] = 0;
else counters[ent]++;
nextid = intent.index[ent][counters[ent]];
if (!nextid) {
nextid = intent.index[ent][counters[ent]] = names && names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : "");
intent.entities.push({
entity: ent,
id: nextid
});
console.warn("Adding entity", nextid, "to", intent.name);
}
if (pos == -1)
outtext += text.substr(0, out[i].position);
else
outtext += text.substr(pos, out[i].position - pos);
outtext += "{" + ent + "}";
pos = out[i].position + out[i].string.length;
}
}
outtext += text.substr(pos == -1 ? 0 : pos);
return {
text: outtext
};
}
function expandIntentFromTagged(text, intent, names) {
var nextid, cur = -1,
counters = {};
text.replace(/\{([.a-z_-]+)\}/g, function(m, ent) {
cur++;
if (!intent.index[ent]) intent.index[ent] = [];
if (counters[ent] == undefined) counters[ent] = 0;
else counters[ent]++;
nextid = intent.index[ent][counters[ent]];
if (!nextid) {
nextid = intent.index[ent][counters[ent]] = names && names[cur] ? names[cur] : ent + (counters[ent] ? counters[ent] : "");
intent.entities.push({
entity: ent,
id: nextid
});
console.warn("Adding entity", nextid, "to", intent.name);
}
});
return {
text: text
};
}
function getAnyEntity(text) {
var prevstring, outentities = [],
outentitiesindex = {},
sentence = [],
ent, nextid, outtext = "",
counters = {},
pos = -1,
cur = -1,
found = 0,
out = extractEntities(text, allEntities);
for (var i = 0; i < out.length; i++) {
ent = out[i].entity;
if (out[i].position > pos) {
found++;
cur++;
if (counters[ent] == undefined) counters[ent] = 0;
else counters[ent]++;
nextid = ent + (counters[ent] ? counters[ent] : "");
var match = {
position: out[i].position,
entity: ent,
value: out[i].value,
string: out[i].string,
id: nextid
};
outentities.push(match);
outentitiesindex[match.id] = match;
if (pos == -1) prevstring = text.substr(0, out[i].position);
else prevstring = text.substr(pos, out[i].position - pos);
if (prevstring.length) sentence.push({
string: prevstring
});
sentence.push(match);
outtext += prevstring;
outtext += "{" + ent + "}";
pos = out[i].position + out[i].string.length;
}
}
prevstring = text.substr(pos == -1 ? 0 : pos);
if (prevstring.length) {
if (prevstring.length) sentence.push({
string: prevstring
});
outtext += prevstring;
}
return {
found: found,
text: outtext,
entities: outentities,
sentence: sentence,
entitiesIndex: outentitiesindex
};
}
/**
* Adds an intent.
* @param {string} intentName - The name of the new intent.
* @param {IntentEntity[]} entities - The produced entities.
* @returns {boolean} True when successful.
*/
this.addIntent = function(intentName, entities) {
var index = {};
for (var i = 0; i < entities.length; i++) {
if (!index[entities[i].entity]) index[entities[i].entity] = [];
index[entities[i].entity].push(entities[i].id);
}
intents[intentName] = {
name: intentName,
entities: entities,
index: index
};
return true;
}
/**
* Adds an entity recognizer.
* @param {EntityRecognizer} entity - The entity recognizer to be addded.
* @returns {boolean} True when successful.
*/
this.addEntity = function(entity) {
var entityName = entity.getName();
if (!entities[entityName]) allEntities.push({
entity: entityName,
id: "none"
});
entities[entityName] = entity;
return true;
}
/**
* Check if an entity with a given name exists.
* @param {string} entityName - The entity name.
* @returns {boolean} True when found.
*/
this.hasEntity = function(entityName) {
return !!entities[entityName];
}
/**
* Set confidence ratio, from 0.5 to 1. The higher the more strict.
* @param {number} ratio - The confidence ratio to be set.
*/
this.setConfidence = function(ratio) {
confidence = ratio;
}
/**
* Returns the confidence ratio.
* @returns {number} The confidence ratio.
*/
this.getConfidence = function(c) {
return confidence;
}
/**
* Add a new document.
* @param {string} text - The document content.
* @param {string} intent - The related intent.
* @param {boolean} [guess.fromFullSentence=false] - Indicates that the document is a full untagged sentence. (i.e. "Please call the 333-123456")
* @param {boolean} [guess.fromTaggedSentence=true] - Indicates that the document is tagged with braces. (i.e "Please call the {phone_number}")
* @param {boolean} [guess.expandIntent=false] - Extends or creates a new intent if exceeded entities are found. New entities id will be autogenerated and progressive.
* @param {string[]} [guess.withNames=[]] - Uses the given names for auto-extending intents. Positions in array are matched with positions of entity into the specified sentence.
* @returns {boolean} True when found.
*/
this.addDocument = function(text, intent, guess) {
if (guess) {
if (guess.fromFullSentence) { // From a full sentence...
text = Bravey.Text.clean(text);
if (guess.expandIntent) { // Expand intent with found items
if (!intents[intent]) {
console.warn("Adding intent", intent);
this.addIntent(intent, []);
}
var expanded = expandIntentFromText(text, intents[intent], guess.withNames);
return documentClassifier.addDocument(expanded.text, intent);
}
} else if (guess.fromTaggedSentence) { // From a {tagged} sentence...
if (guess.expandIntent) { // Expand intent with found items
if (!intents[intent]) {
console.warn("Adding intent", intent);
this.addIntent(intent, []);
}
var expanded = expandIntentFromTagged(text, intents[intent], guess.withNames);
return documentClassifier.addDocument(expanded.text, intent);
}
}
console.warn("Can't guess...");
return false;
} else { // Link a marked sentence to a particular intent
if (intents[intent])
return documentClassifier.addDocument(Bravey.Text.clean(text), intent);
else {
console.warn("Can't find intent", intent);
return false;
}
}
}
/**
* Check if a given sentence matches an intent and extracts its entities.
* @param {string} text - The sentence to be processed.
* @param {string} [method="default"] - The extraction method. "anyEntity" extracts all found entities and guess intent, regardless the intents structure.
* @returns {NlpResult} When an intent is found.
* @returns {false} When the sentence doesn't match any intent.
*/
this.test = function(text, method) {
text = Bravey.Text.clean(text);
switch (method) {
case "anyEntity":
{
var result = getAnyEntity(text);
var classification = documentClassifier.classifyDocument(result.text);
result.score = classification.winner.score;
result.intent = classification.winner.label;
return result;
}
default:
{ // When entities are enough, check classifier.
var classification, entlist, result = false,
resultscore = -1,
resultfound = -1;
for (var intent in intents) {
entlist = getEntities(text, intents[intent]);
if (!entlist.exceedEntities && !entlist.extraEntities && !entlist.missingEntities) { // No unwanted entites, entity count under the threshold and 0 entities for no entities intents
classification = documentClassifier.classifyDocument(entlist.text);
if ((classification.scores[intent] > confidence) && ((classification.scores[intent] > resultscore) || ((classification.scores[intent] == resultscore) && (entlist.found > resultfound)))) {
result = entlist;
result.score = resultscore = classification.scores[intent];
resultfound = result.found;
result.intent = intent;
}
}
}
return result;
}
}
return false;
}
}
/**
Describes an entity to be matched in an intent.
@typedef IntentEntity
@type {Object}
@property {string} entity The entity type to be found.
@property {string} id The entity ID to be assigned when found.
*/
/**
Describes a sentence classification and entities.
@typedef NlpResult
@type {Object}
@property {Entity[]} entities The ordered list of found entities.
@property {number[]} entitiesIndex An map version of entities, with key as entity ID and value as entity value.
@property {string} intent The matched intent.
@property {number} score The score of the matched sentence intent.
*/