add conversational retrieval agent

This commit is contained in:
Henry
2023-08-06 19:45:21 +01:00
parent d208eae868
commit 0ae6f53295
18 changed files with 1018 additions and 103 deletions
+42 -4
View File
@@ -4,8 +4,10 @@ import * as fs from 'fs'
import * as path from 'path'
import { JSDOM } from 'jsdom'
import { DataSource } from 'typeorm'
import { ICommonObject, IDatabaseEntity, INodeData } from './Interface'
import { ICommonObject, IDatabaseEntity, IMessage, INodeData } from './Interface'
import { AES, enc } from 'crypto-js'
import { ChatMessageHistory } from 'langchain/memory'
import { AIMessage, HumanMessage } from 'langchain/schema'
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
export const notEmptyRegex = '(.|\\s)*\\S(.|\\s)*' //return true if string is not empty or blank
@@ -204,6 +206,9 @@ export const getAvailableURLs = async (url: string, limit: number) => {
/**
* Search for href through htmlBody string
* @param {string} htmlBody
* @param {string} baseURL
* @returns {string[]}
*/
function getURLsFromHTML(htmlBody: string, baseURL: string): string[] {
const dom = new JSDOM(htmlBody)
@@ -233,6 +238,8 @@ function getURLsFromHTML(htmlBody: string, baseURL: string): string[] {
/**
* Normalize URL to prevent crawling the same page
* @param {string} urlString
* @returns {string}
*/
function normalizeURL(urlString: string): string {
const urlObj = new URL(urlString)
@@ -246,6 +253,11 @@ function normalizeURL(urlString: string): string {
/**
* Recursive crawl using normalizeURL and getURLsFromHTML
* @param {string} baseURL
* @param {string} currentURL
* @param {string[]} pages
* @param {number} limit
* @returns {Promise<string[]>}
*/
async function crawl(baseURL: string, currentURL: string, pages: string[], limit: number): Promise<string[]> {
const baseURLObj = new URL(baseURL)
@@ -290,6 +302,9 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
/**
* Prep URL before passing into recursive carwl function
* @param {string} stringURL
* @param {number} limit
* @returns {Promise<string[]>}
*/
export async function webCrawl(stringURL: string, limit: number): Promise<string[]> {
const URLObj = new URL(stringURL)
@@ -336,10 +351,10 @@ export async function xmlScrape(currentURL: string, limit: number): Promise<stri
return urls
}
/*
/**
* Get env variables
* @param {string} url
* @returns {string[]}
* @param {string} name
* @returns {string | undefined}
*/
export const getEnvironmentVariable = (name: string): string | undefined => {
try {
@@ -470,6 +485,10 @@ export function handleEscapeCharacters(input: any, reverse: Boolean): any {
return input
}
/**
* Get user home dir
* @returns {string}
*/
export const getUserHome = (): string => {
let variableName = 'HOME'
if (process.platform === 'win32') {
@@ -482,3 +501,22 @@ export const getUserHome = (): string => {
}
return process.env[variableName] as string
}
/**
* Map incoming chat history to ChatMessageHistory
* @param {options} ICommonObject
* @returns {ChatMessageHistory}
*/
export const mapChatHistory = (options: ICommonObject): ChatMessageHistory => {
const chatHistory = []
const histories: IMessage[] = options.chatHistory
for (const message of histories) {
if (message.type === 'apiMessage') {
chatHistory.push(new AIMessage(message.message))
} else if (message.type === 'userMessage') {
chatHistory.push(new HumanMessage(message.message))
}
}
return new ChatMessageHistory(chatHistory)
}