一、前言
1、连接Pinecone数据库,或者创建pinecone数据库
2、加载或下载CLIP模型
3、加载图片
4、利用transformers提取图片向量特征
5、存储到Pincecone数据库中
二、代码示例
import pinecone
import torch
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
# Initialize the Pinecone client
pinecone.init(api_key="Your-API-KEY", environment="Your-environment")
# 如果没有先注册,地址:https://www.pinecone.io/(需要梯子)
# 连接数据库
index_name = "img-index"
print(index_name)
index = pinecone.Index(index_name=index_name)
# 加载或下载CLIP模型
model_path = r"D:\Desktop\\clip_model.pt"
model_name = 'openai/clip-vit-base-patch16'
try:
# 尝试从本地路径加载模型
clip_model = torch.load(model_path)
clip_processor = CLIPProcessor.from_pretrained(model_name)
except FileNotFoundError:
# 如果本地未找到模型,则下载并保存模型(需要梯子)
clip_processor = CLIPProcessor.from_pretrained(model_name)
clip_model = CLIPModel.from_pretrained(model_name)
torch.save(clip_model, model_path)
# 加载图片
image_path = r'D:\Desktop\tp.png'
print(image_path)
# Load the image and text
text = ["img"]
# Preprocess the image and text
inputs = clip_processor(text=text, images=Image.open(image_path), return_tensors="pt", padding=True, truncation=True)
# Forward pass through the model
with torch.no_grad():
outputs = clip_model(**inputs)
# Get the image and text embeddings
# image_embeds 是图像在模型嵌入空间中的向量表示,用于计算图像之间的相似度或在其他图像相关任务中使用:512
# logits_per_image 是用于图像分类任务的预测得分,表示图像在不同类别上的概率分布:20
image_vectors = outputs.image_embeds.numpy().tolist()
print(image_vectors[0])
vectors = [
(
"vec22", # Vector ID
image_vectors, # Dense vector values
{"path": image_path} # Vector metadata
)
]
print(vectors)
# 将数据存储
upsert_response = index.upsert(
vectors=vectors,
namespace="img-namespace"
)