-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloadAndStore.js
69 lines (56 loc) · 2.02 KB
/
loadAndStore.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import dotenv from 'dotenv';
import axios from 'axios';
dotenv.config();
import fs from 'fs';
import { YoutubeLoader } from "langchain/document_loaders/web/youtube";
import { FaissStore } from "langchain/vectorstores/faiss";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { Document } from "langchain/document";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { RetrievalQAChain } from "langchain/chains";
import { ChatOpenAI } from "langchain/chat_models/openai";
const loader = YoutubeLoader.createFromUrl("https://youtu.be/bZQun8Y4L2A", {
language: "en",
addVideoInfo: false,
});
// Load the data
const data = await loader.load();
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 500,
chunkOverlap: 100,
});
// Split the the data into chunks
const splitDocs = await textSplitter.splitDocuments(data);
// convert the documents into vectors
const vectorStore = await FaissStore.fromDocuments(
splitDocs,
new OpenAIEmbeddings()
);
// Set the Pinecone API key
const apiKey = 'YOUR_API_KEY'; // replace with your Pinecone API key
// Create an axios instance with the Pinecone base URL and headers
const pinecone = axios.create({
baseURL: 'https://api.pinecone.io',
headers: {
'api-key': apiKey,
'Content-Type': 'application/json'
}
});
// Function to upsert vectors
async function upsertVectors(indexName, ids, vectors) {
const response = await pinecone.post(`/v1/index/${indexName}/vectors/upsert`, {
ids: ids,
vectors: vectors
});
return response.data;
}
// Example usage
const indexName = 'my-index';
const ids = splitDocs.map((doc, index) => `doc${index}`); // create unique ids for each document
const vectors = vectorStore.vectors; // get the vectors from the vectorStore
// upsertVectors(indexName, ids, vectors)
// .then(response => console.log(response))
// .catch(error => console.error(error));
console.log('ids: ', ids[0]);
console.log('vectors: ', vectors);