OpenAI trains your own document library
foreword
To catch up with the tide of AI, try to be a question-and-answer document library
project
Use OpenAI to train a simple document library, and use the early public data of King of Glory for testing.
Installation Environment
!pip install gpt_index
!pip install langchain
import package
from gpt_index import SimpleDirectoryReader, GPTListIndex, GPTSimpleVectorIndex, LLMPredictor, PromptHelper
from langchain import OpenAI
import sys
#from google.colab import drive
import os
Set environment variable (key)
os.environ["OPENAI_API_KEY"] = 'Your OpenAI Key 你的OpenAI Key'
declaration method
Add the files in the folder to the training data set and output the trained data
def construct_index(directory_path):
# set maximum input size
max_input_size = 4096
# set number of output tokens
num_outputs = 256
# set maximum chunk overlap
max_chunk_overlap = 20
# set chunk size limit
chunk_size_limit = 600
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
# define LLM text-davinci-003
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-ada-001", max_tokens=num_outputs))
documents = SimpleDirectoryReader(directory_path).load_data()
index = GPTSimpleVectorIndex(documents, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index.save_to_disk('index.json')
return index
Questions with training data sets
def ask_bot(input_index = 'index.json'):
index = GPTSimpleVectorIndex.load_from_disk(input_index)
while True:
query = input('What do you want to ask the bot? \n')
response = index.query(query, response_mode="compact")
print ("\nBot says: \n\n" + response.response + "\n\n\n")
generate training data
Call the method to add all txt in the content folder to the training
index = construct_index("/content/")
Use the trained dataset to ask questions
ask_bot('index.json')