ResponsibleAI - Input Moderation - Ability to verify inputs and prevent potentially harmful content generation

2026-06-28 11:00:55 +03:00 · 2023-11-13 10:48:24 +05:30
parent a311e024e1
commit ad8281e553
8 changed files with 346 additions and 1 deletions
@@ -0,0 +1,24 @@
+import { Moderation } from '../ResponsibleAI'
+import { BaseLanguageModel } from 'langchain/base_language'
+
+export class SimplePromptModerationRunner implements Moderation {
+    private readonly denyList: string = ''
+    private readonly moderationErrorMessage: string = ''
+
+    constructor(denyList: string, moderationErrorMessage: string) {
+        this.denyList = denyList
+        if (denyList.indexOf('\n') === -1) {
+            this.denyList += '\n'
+        }
+        this.moderationErrorMessage = moderationErrorMessage
+    }
+
+    async checkForViolations(llm: BaseLanguageModel, input: string): Promise<string> {
+        this.denyList.split('\n').forEach((denyListItem) => {
+            if (denyListItem && denyListItem !== '' && input.includes(denyListItem)) {
+                throw Error(this.moderationErrorMessage)
+            }
+        })
+        return Promise.resolve(input)
+    }
+}