利用transformers提取图片特征,存储到Pinecone数据库

一、前言

1、连接Pinecone数据库,或者创建pinecone数据库

2、加载或下载CLIP模型

3、加载图片

4、利用transformers提取图片向量特征

5、存储到Pincecone数据库中

二、代码示例

import pinecone
import torch
import numpy as np
from PIL import Image
from transformers import CLIPProcessor, CLIPModel

# Initialize the Pinecone client
pinecone.init(api_key="Your-API-KEY", environment="Your-environment")

# 如果没有先注册,地址:https://www.pinecone.io/(需要梯子)

# 连接数据库
index_name = "img-index"
print(index_name)
index = pinecone.Index(index_name=index_name)

# 加载或下载CLIP模型
model_path = r"D:\Desktop\\clip_model.pt"
model_name = 'openai/clip-vit-base-patch16'
try:
    # 尝试从本地路径加载模型
    clip_model = torch.load(model_path)
    clip_processor = CLIPProcessor.from_pretrained(model_name)
except FileNotFoundError:
    # 如果本地未找到模型,则下载并保存模型(需要梯子)
    clip_processor = CLIPProcessor.from_pretrained(model_name)
    clip_model = CLIPModel.from_pretrained(model_name)
    torch.save(clip_model, model_path)


# 加载图片
image_path = r'D:\Desktop\tp.png'
print(image_path)

# Load the image and text
text = ["img"]

# Preprocess the image and text
inputs = clip_processor(text=text, images=Image.open(image_path), return_tensors="pt", padding=True, truncation=True)

# Forward pass through the model
with torch.no_grad():
    outputs = clip_model(**inputs)

# Get the image and text embeddings
# image_embeds 是图像在模型嵌入空间中的向量表示,用于计算图像之间的相似度或在其他图像相关任务中使用:512
# logits_per_image 是用于图像分类任务的预测得分,表示图像在不同类别上的概率分布:20
image_vectors = outputs.image_embeds.numpy().tolist()

print(image_vectors[0])

vectors = [
    (
        "vec22",  # Vector ID
        image_vectors,  # Dense vector values
        {"path": image_path}  # Vector metadata
    )
]
print(vectors)

# 将数据存储
upsert_response = index.upsert(
    vectors=vectors,
    namespace="img-namespace"
)

猜你喜欢

转载自blog.csdn.net/xun527/article/details/131808434
今日推荐