Merge pull request #595 from FlowiseAI/feature/CSVAgent

Feature/CSVAgent
This commit is contained in:
Henry Heng
2023-07-22 15:30:24 +01:00
committed by GitHub
6 changed files with 205 additions and 1 deletions
@@ -0,0 +1,154 @@
import { ICommonObject, INode, INodeData, INodeParams, PromptTemplate } from '../../../src/Interface'
import { AgentExecutor } from 'langchain/agents'
import { getBaseClasses } from '../../../src/utils'
import { LoadPyodide, finalSystemPrompt, systemPrompt } from './core'
import { LLMChain } from 'langchain/chains'
import { BaseLanguageModel } from 'langchain/base_language'
import { ConsoleCallbackHandler, CustomChainHandler } from '../../../src/handler'
class CSV_Agents implements INode {
label: string
name: string
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
constructor() {
this.label = 'CSV Agent'
this.name = 'csvAgent'
this.type = 'AgentExecutor'
this.category = 'Agents'
this.icon = 'csvagent.png'
this.description = 'Agent used to to answer queries on CSV data'
this.baseClasses = [this.type, ...getBaseClasses(AgentExecutor)]
this.inputs = [
{
label: 'Csv File',
name: 'csvFile',
type: 'file',
fileType: '.csv'
},
{
label: 'Language Model',
name: 'model',
type: 'BaseLanguageModel'
}
]
}
async init(): Promise<any> {
// Not used
return undefined
}
async run(nodeData: INodeData, input: string, options: ICommonObject): Promise<string> {
const csvFileBase64 = nodeData.inputs?.csvFile as string
const model = nodeData.inputs?.model as BaseLanguageModel
const loggerHandler = new ConsoleCallbackHandler(options.logger)
const handler = new CustomChainHandler(options.socketIO, options.socketIOClientId)
let files: string[] = []
if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) {
files = JSON.parse(csvFileBase64)
} else {
files = [csvFileBase64]
}
let base64String = ''
for (const file of files) {
const splitDataURI = file.split(',')
splitDataURI.pop()
base64String = splitDataURI.pop() ?? ''
}
const pyodide = await LoadPyodide()
// First load the csv file and get the dataframe dictionary of column types
// For example using titanic.csv: {'PassengerId': 'int64', 'Survived': 'int64', 'Pclass': 'int64', 'Name': 'object', 'Sex': 'object', 'Age': 'float64', 'SibSp': 'int64', 'Parch': 'int64', 'Ticket': 'object', 'Fare': 'float64', 'Cabin': 'object', 'Embarked': 'object'}
let dataframeColDict = ''
try {
const code = `import pandas as pd
import base64
from io import StringIO
import json
base64_string = "${base64String}"
decoded_data = base64.b64decode(base64_string)
csv_data = StringIO(decoded_data.decode('utf-8'))
df = pd.read_csv(csv_data)
my_dict = df.dtypes.astype(str).to_dict()
print(my_dict)
json.dumps(my_dict)`
dataframeColDict = await pyodide.runPythonAsync(code)
} catch (error) {
throw new Error(error)
}
options.logger.debug('[components/CSVAgent] [1] DataframeColDict =>', dataframeColDict)
// Then tell GPT to come out with ONLY python code
// For example: len(df), df[df['SibSp'] > 3]['PassengerId'].count()
let pythonCode = ''
if (dataframeColDict) {
const chain = new LLMChain({
llm: model,
prompt: PromptTemplate.fromTemplate(systemPrompt),
verbose: process.env.DEBUG === 'true' ? true : false
})
const inputs = {
dict: dataframeColDict,
question: input
}
const res = await chain.call(inputs, [loggerHandler])
pythonCode = res?.text
}
options.logger.debug('[components/CSVAgent] [2] Generated Python Code =>', pythonCode)
// Then run the code using Pyodide
let finalResult = ''
if (pythonCode) {
try {
const code = `import pandas as pd\n${pythonCode}`
finalResult = await pyodide.runPythonAsync(code)
} catch (error) {
throw new Error(pythonCode)
}
}
options.logger.debug('[components/CSVAgent] [3] Pyodide Result =>', finalResult)
// Finally, return a complete answer
if (finalResult) {
const chain = new LLMChain({
llm: model,
prompt: PromptTemplate.fromTemplate(finalSystemPrompt),
verbose: process.env.DEBUG === 'true' ? true : false
})
const inputs = {
question: input,
answer: finalResult
}
if (options.socketIO && options.socketIOClientId) {
const result = await chain.call(inputs, [loggerHandler, handler])
options.logger.debug('[components/CSVAgent] [4] Final Result =>', result?.text)
return result?.text
} else {
const result = await chain.call(inputs, [loggerHandler])
options.logger.debug('[components/CSVAgent] [4] Final Result =>', result?.text)
return result?.text
}
}
return pythonCode
}
}
module.exports = { nodeClass: CSV_Agents }
@@ -0,0 +1,35 @@
import type { PyodideInterface } from 'pyodide'
import * as path from 'path'
import { getUserHome } from '../../../src/utils'
let pyodideInstance: PyodideInterface | undefined
export async function LoadPyodide(): Promise<PyodideInterface> {
if (pyodideInstance === undefined) {
const { loadPyodide } = await import('pyodide')
const obj: any = { packageCacheDir: path.join(getUserHome(), '.flowise', 'pyodideCacheDir') }
pyodideInstance = await loadPyodide(obj)
await pyodideInstance.loadPackage('micropip')
const micropip = pyodideInstance.pyimport('micropip')
await micropip.install('pandas')
await micropip.install('numpy')
//let mountDir = "/mnt";
//pyodideInstance.FS.mkdir(mountDir);
//pyodideInstance.FS.mount(pyodideInstance.FS.filesystems.NODEFS, { root: path.join(getUserHome(), '.flowise', 'pyodideFS') }, mountDir);
}
return pyodideInstance
}
export const systemPrompt = `You are working with a pandas dataframe in Python. The name of the dataframe is df.
The columns and data types of a dataframe are given below as a Python dictionary with keys showing column names and values showing the data types.
{dict}
I will ask question, and you will output the Python code using pandas dataframe to answer my question. Do not provide any explanations. Do not respond with anything except the output of the code.
Question: {question}
Output Code:`
export const finalSystemPrompt = `You are given the question: {question}. You have an answer to the question: {answer}. Rephrase the answer into a standalone answer.
Standalone Answer:`
Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

+1
View File
@@ -50,6 +50,7 @@
"pdfjs-dist": "^3.7.107",
"playwright": "^1.35.0",
"puppeteer": "^20.7.1",
"pyodide": ">=0.21.0-alpha.2",
"redis": "^4.6.7",
"replicate": "^0.12.3",
"srt-parser-2": "^1.2.3",
+14
View File
@@ -377,3 +377,17 @@ export const availableDependencies = [
'typeorm',
'weaviate-ts-client'
]
export const getUserHome = (): string => {
let variableName = 'HOME'
if (process.platform === 'win32') {
variableName = 'USERPROFILE'
}
if (process.env[variableName] === undefined) {
// If for some reason the variable does not exist
// fall back to current folder
return process.cwd()
}
return process.env[variableName] as string
}
+1 -1
View File
@@ -728,7 +728,7 @@ export const isFlowValidForStream = (reactFlowNodes: IReactFlowNode[], endingNod
isValidChainOrAgent = !blacklistChains.includes(endingNodeData.name)
} else if (endingNodeData.category === 'Agents') {
// Agent that are available to stream
const whitelistAgents = ['openAIFunctionAgent']
const whitelistAgents = ['openAIFunctionAgent', 'csvAgent']
isValidChainOrAgent = whitelistAgents.includes(endingNodeData.name)
}