!pip install boto3 --upgrade

!pip install Pillow --upgrade
!pip install numpy --upgrade
!pip install pandas --upgrade
!pip install scikit-learn --upgrade
!pip install psycopg2-binary --upgrade
!pip install requests --upgrade
!pip install requests_aws4auth --upgrade
!pip install pinecone-client --upgrade
!pip install threadpoolctl==3.1.0
!pip install numexpr==2.8.4
!pip install bottleneck==1.3.6
!pip install seaborn
region = 'us-east-1'

Token: 자연어를 분리하였을 때 단위를 토큰이라고 함. 보통 평균적으로 단어 2~3개 정도가 하나의 토큰 Tokenize 하는 방식은 각 회사마다 모델마다 다름.

위 용어들을 종합하여 생각해보자면 아래와 같습니다.

invoke_model 사용시

import json
import boto3

def invoke_model(client, prompt, model, 
    accept = 'application/json', content_type = 'application/json',
    max_tokens  = 512, temperature = 1.0, top_p = 1.0, top_k = 250, stop_sequences = [],
    count_penalty = 0, presence_penalty = 0, frequency_penalty = 0, return_likelihoods = 'NONE'):
    # default response
    output = ''
    # identify the model provider
    provider = model.split('.')[0] 
    # InvokeModel
    if (provider == 'anthropic'): 
        input = {
            'prompt': prompt,
            'max_tokens_to_sample': max_tokens, 
            'temperature': temperature,
            'top_k': top_k,
            'top_p': top_p,
            'stop_sequences': stop_sequences
        }
        body=json.dumps(input)
        response = bedrock.invoke_model(body=body, modelId=model, accept=accept,contentType=content_type)
        response_body = json.loads(response.get('body').read())
        output = response_body['completion']
    elif (provider == 'ai21'): 
        input = {
            'prompt': prompt, 
            'maxTokens': max_tokens,
            'temperature': temperature,
            'topP': top_p,
            'stopSequences': stop_sequences,
            'countPenalty': {'scale': count_penalty},
            'presencePenalty': {'scale': presence_penalty},
            'frequencyPenalty': {'scale': frequency_penalty}
        }
        body=json.dumps(input)
        response = bedrock.invoke_model(body=body, modelId=model, accept=accept,contentType=content_type)
        response_body = json.loads(response.get('body').read())
        completions = response_body['completions']
        for part in completions:
            output = output + part['data']['text']
    elif (provider == 'amazon'): 
        input = {
            'inputText': prompt,
            'textGenerationConfig': {
                  'maxTokenCount': max_tokens,
                  'stopSequences': stop_sequences,
                  'temperature': temperature,
                  'topP': top_p
            }
        }
        body=json.dumps(input)
        response = bedrock.invoke_model(body=body, modelId=model, accept=accept,contentType=content_type)
        response_body = json.loads(response.get('body').read())
        results = response_body['results']
        for result in results:
            output = output + result['outputText']
    elif (provider == 'cohere'): 
        input = {
            'prompt': prompt, 
            'max_tokens': max_tokens,
            'temperature': temperature,
            'k': top_k,
            'p': top_p,
            'stop_sequences': stop_sequences,
            'return_likelihoods': return_likelihoods
        }
        body=json.dumps(input)
        response = bedrock.invoke_model(body=body, modelId=model, accept=accept,contentType=content_type)
        response_body = json.loads(response.get('body').read())
        results = response_body['generations']
        for result in results:
            output = output + result['text']
    elif (provider == 'meta'): 
        input = {
            'prompt': prompt,
            'max_gen_len': max_tokens,
            'temperature': temperature,
            'top_p': top_p
        }
        body=json.dumps(input)
        response = bedrock.invoke_model(body=body, modelId=model, accept=accept,contentType=content_type)
        response_body = json.loads(response.get('body').read())
        output = response_body['generation']
    # return
    return output

# 전 단계에서 만든 Helper function
# {...}

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
model  = 'ai21.j2-ultra'

prompt = """
Human: How are you?
AI: I am well, thank you for asking. How are you?
Human: I can't find my cat.
AI: I am sorry to hear that. I can assist you with finding your cat.
Human: Thank you! Where do you think it is hiding?
AI: Cats often hide in small spaces where they feel safe, such as under beds or in closets. I can also help you look in those areas.
Human: Alright, let's just check under the bed first.
AI: I can help you look under the bed as well. Let's move the bed and check thoroughly.
Human: It is definitely not under the bed.
AI: Let's check in the closet then. Cats often feel safe in small, enclosed spaces. I can help you search in the closet.
Human: It is not in the closet either.
"""
output = invoke_model(client=bedrock, prompt=prompt, model=model)
print(output)

Embedding

import json
import boto3
import math

from numpy import dot
from numpy.linalg import norm

bedrock = boto3.client(
    service_name='bedrock-runtime'
)

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

def calculate_distance(v1, v2):
    distance = math.dist(v1, v2)
    return distance

def calculate_dot_product_similarity(v1, v2):
    similarity = dot(v1, v2)
    return similarity

def calculate_cousin_similarity(v1, v2):
    similarity = dot(v1, v2)/(norm(v1)*norm(v2))
    return similarity


names = ['Albert Einstein', 'Bob Dylan', 'Elvis Presley', 
         'Isaac Newton', 'Michael Jackson', 'Niels Bohr', 
         'Taylor Swift', 'Hank Williams', 'Werner Heisenberg', 
         'Stevie Wonder', 'Marie Curie', 'Ernest Rutherford', 'Byoungsoo Ko']
embeddings = []
for name in names:
    embeddings.append(get_embedding(bedrock, name))
# clustering
df = pd.DataFrame(data={'names': names, 'embeddings': embeddings})
matrix = np.vstack(df.embeddings.values)
n_clusters = 2
kmeans = KMeans(n_clusters = n_clusters, init='k-means++', random_state=42)
kmeans.fit(matrix)
df['cluster'] = kmeans.labels_
# result
print(df[['cluster', 'names']])

ConverseAPI

import json
import boto3

prompt = """
Explain what general relativity is to an 8 year old.
"""

## Model 
# model_id = 'anthropic.claude-3-sonnet-20240229-v1:0'
# model_id = 'meta.llama3-8b-instruct-v1:0'
model_id = 'amazon.titan-text-lite-v1'

bedrock = boto3.client(service_name='bedrock-runtime')
message = {"role": "user", "content": [{"text": prompt}]}
messages = [message]
inference_config = {"temperature": 0.5, "topP": 0.9, "maxTokens": 500}
response = bedrock.converse(
    modelId=model_id,
    messages=messages,
    inferenceConfig=inference_config
)
content = response['output']['message']['content']
for item in content:
    print(item['text'])

ConverseStream API

import json
import boto3

prompt = """
Explain what general relativity is to an 8 year old.
"""

# model_id = 'anthropic.claude-3-sonnet-20240229-v1:0'
# model_id = 'meta.llama3-8b-instruct-v1:0'
model_id = 'amazon.titan-text-lite-v1'

bedrock = boto3.client(service_name='bedrock-runtime')
message = {"role": "user", "content": [{"text": prompt}]}
messages = [message]
inference_config = {"temperature": 0.5, "topP": 0.9, "maxTokens": 500}
response = bedrock.converse_stream(
    modelId=model_id,
    messages=messages,
    inferenceConfig=inference_config
)
stream = response.get('stream')
if stream:
    for event in stream:
        if 'messageStart' in event:
            print(f"\nRole: {event['messageStart']['role']}")
        if 'contentBlockDelta' in event:
            print(event['contentBlockDelta']['delta']['text'], end="")
        if 'messageStop' in event:
            print(f"\n\nStop reason: {event['messageStop']['stopReason']}")
        if 'metadata' in event:
            metadata = event['metadata']
            if 'usage' in metadata:
                print("\nToken usage")
                print(f"Input tokens: {metadata['usage']['inputTokens']}")
                print(f"Output tokens: {metadata['usage']['outputTokens']}")
                print(f"Total tokens: {metadata['usage']['totalTokens']}")
            if 'metrics' in event['metadata']:
                print(f"Latency: {metadata['metrics']['latencyMs']} milliseconds")

Conversational Chatbot For foundation models to remember what has been asked and answered, it requires something similar to the concept of a session on the server side. Considering the size of the conversation data, this is something very expensive to offer, especially in a multi-tenant environment. A workaround is, instead of asking the service to keep the chat history, the customer keeps chat history on the client side. When asking a follow up question, the client sends the chat history as part of the prompt so that the LLM models know what has been talked about.

import json
import boto3

model_id = 'amazon.titan-text-lite-v1'
bedrock = boto3.client(service_name='bedrock-runtime')
messages = [
    {"role": "user",      "content": [{"text": "How are you?"}]},
    {"role": "assistant", "content": [{"text": "I am well, thank you for asking! I hope you are well, too."}]},
    {"role": "user",      "content": [{"text": "I can't find my cat."}]},
    {"role": "assistant", "content": [{"text": "Did the cat go missing recently?"}]},
    {"role": "user",      "content": [{"text": "I saw it in the garden yesterday."}]}
]
inference_config = {"temperature": 1.0, "topP": 1.0, "maxTokens": 200, "stopSequences": []}
response = bedrock.converse(
    modelId=model_id,
    messages=messages,
    inferenceConfig=inference_config
)
content = response['output']['message']['content']
for item in content:
    print(item['text'])

bedrock_chatbot.png


Embedding

The academic definition of embedding is translating high-dimensional vectors into a relatively low-dimensional space. You might know each and every word in this sentence but still have no idea about the whole sentence. We can think of embedding as converting natural language into a sequence of numbers, with the input being a piece of text and the output being a vector. In other words, the vector is a numerical representation of the text, making it easy to perform all kinds of complex computations in AI/ML.

Embedding의 학술적 정의는 고차원의 벡터를 상대적으로 저차원의 벡터로 변환하는 것입니다. 이번 워크샵에서는 고차원의 벡터는 자연어, 저차원의 벡터는 수백 차원 정도의 벡터라고 생각하시면 됩니다. 이 문장의 모든 단어를 알고 있지만 문장 전체에 대해서는 전혀 모를 수도 있습니다. 임베딩은 자연어를 일련의 숫자로 변환하는 것으로, 입력은 텍스트이고 출력은 벡터라고 생각할 수 있습니다. 즉, 벡터는 텍스트를 숫자로 표현한 것이므로 AI/ML에서 모든 종류의 복잡한 연산을 쉽게 수행할 수 있습니다.

import json
import boto3
import math

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding


## Euclidean distance
def calculate_distance(v1, v2):
    distance = math.dist(v1, v2)
    return distance

## Dot Product Similarity
def calculate_dot_product_similarity(v1, v2):
    similarity = dot(v1, v2)
    return similarity

## Cosine Similarity
def calculate_cousin_similarity(v1, v2):
    similarity = dot(v1, v2)/(norm(v1)*norm(v2))
    return similarity

## Search and recommendation
def search(dataset, v):
    for item in dataset:
        item['distance'] = calculate_distance(item['embedding'], v)
    dataset.sort(key=lambda x: x['distance'])
    return dataset[0]['text']

## Classification
def classify(classes, v):
    for item in classes:
        item['distance'] = calculate_distance(item['embedding'], v)
    classes.sort(key=lambda x: x['distance'])
    return classes[0]['name']

## Search and recommendation
# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
# the data set
t1 = """
The theory of general relativity says that the observed gravitational effect between masses results from their warping of spacetime. 
"""
t2 = """
Quantum mechanics allows the calculation of properties and behaviour of physical systems. It is typically applied to microscopic systems: molecules, atoms and sub-atomic particles. 
"""
t3 = """
Wavelet theory is essentially the continuous-time theory that corresponds to dyadic subband transforms — i.e., those where the L (LL) subband is recursively split over and over.
"""
t4 = """
Every particle attracts every other particle in the universe with a force that is proportional to the product of their masses and inversely proportional to the square of the distance between their centers.
"""
t5 = """
The electromagnetic spectrum is the range of frequencies (the spectrum) of electromagnetic radiation and their respective wavelengths and photon energies. 
"""
dataset = [
    {'text': t1, 'embedding': get_embedding(bedrock, t1)}, 
    {'text': t2, 'embedding': get_embedding(bedrock, t2)}, 
    {'text': t3, 'embedding': get_embedding(bedrock, t3)}, 
    {'text': t4, 'embedding': get_embedding(bedrock, t4)}, 
    {'text': t5, 'embedding': get_embedding(bedrock, t5)}
]
query = 'Isaac Newton'
v = get_embedding(bedrock, query)              
result = search(dataset, v)
print(result)

Isaac Newton returns t4 on universal gravitation

## Classification
# main function1
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
# the data set
classes = [    
    {'name': 'athletics', 'description': 'all students with a talent in sports'}, 
    {'name': 'musician', 'description': 'all students with a talent in music'}, 
    {'name': 'magician', 'description': 'all students with a talent in witch craft'}
]
for item in classes:
    item['embedding'] = get_embedding(bedrock, item['description'])
# perform a classification
query = 'Ellison sends a spell to prevent Professor Wang from entering the classroom'
v = get_embedding(bedrock, query)              
result = classify(classes, v)
print(result)


# main function2
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
# the data set
classes = [    
    {'name': 'positive', 'description': 'customer demonstrated positive sentiment in the response.'}, 
    {'name': 'negative', 'description': 'customer demonstrated negative sentiment in the response.'}
]
for item in classes:
    item['embedding'] = get_embedding(bedrock, item['description'])
# perform a classification
query = 'Steve helped me solve the problem in just a few minutes. Thank you for the great work!'
#query = 'It took too long to get a response from your support engineer!'
v = get_embedding(bedrock, query)              
result = classify(classes, v)
print(result)

Clustering

import json
import boto3
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding

# main function
bedrock = boto3.client(
    service_name='bedrock-runtime'
)
names = ['Albert Einstein', 'Bob Dylan', 'Elvis Presley', 
         'Isaac Newton', 'Michael Jackson', 'Niels Bohr', 
         'Taylor Swift', 'Hank Williams', 'Werner Heisenberg', 
         'Stevie Wonder', 'Marie Curie', 'Ernest Rutherford']
embeddings = []
for name in names:
    embeddings.append(get_embedding(bedrock, name))
# clustering
df = pd.DataFrame(data={'names': names, 'embeddings': embeddings})
matrix = np.vstack(df.embeddings.values)
n_clusters = 2
kmeans = KMeans(n_clusters = n_clusters, init='k-means++', random_state=42)
kmeans.fit(matrix)
df['cluster'] = kmeans.labels_
# result
print(df[['cluster', 'names']])

Knowledge Base

RAG(Retrieval Augmented Generation)

검색 증강 생성

즉, 이를 이용하면 우리가 프롬프트를 작성할 때 미리 Context를 작성하지 않아도 Knowledge Base에서 검색된 결과가 프롬프트에 포함되어 Foundation Model에 전송됩니다.

import boto3
import json
import requests
from requests_aws4auth import AWS4Auth

def get_embedding(bedrock, text):
    modelId = 'amazon.titan-embed-text-v1'
    accept = 'application/json'
    contentType = 'application/json'
    input = {
            'inputText': text
        }
    body=json.dumps(input)
    response = bedrock.invoke_model(
        body=body, modelId=modelId, accept=accept,contentType=contentType)
    response_body = json.loads(response.get('body').read())
    embedding = response_body['embedding']
    return embedding
    
def search(embedding, limit=1):
    # prepare for OpenSearch Serverless
    service = 'aoss'
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(
        credentials.access_key, 
        credentials.secret_key, 
        region, 
        service, 
        session_token=credentials.token
    )
    # search
    index = 'demo-index'
    datatype = '_search'
    url = host + '/' + index + '/' + datatype
    headers = {'Content-Type': 'application/json'}
    document = {
        'size': limit,
        'query': {
            'knn': {
                'embedding': {
                    'vector': embedding,
                    'k': limit
                }
            }
        }
    }
    # response
    response = requests.get(url, auth=awsauth, json=document, headers=headers)
    response.raise_for_status()
    data = response.json()
    output = ''
    for item in data['hits']['hits']:
        output += item['_source']['content'] + '\n'
    return output

# main function
bedrock = boto3.client(service_name='bedrock-runtime')
# this is the original prompt (query text)
prompt = 'What does Albert Einstein do?'
# convet the query text into embedding
embedding = get_embedding(bedrock, prompt)
# retrieve 5 most relevant entries from the knowledge base
info = search(embedding, limit=5)
# augment the prompt with the context
prompt = 'Use the context below to answer the question:\n\n=== Context ===\n{0}\n\n=== Question ===\n{1}'.format(info, prompt)
# ask the foundation model
model_id = 'mistral.mistral-large-2402-v1:0'
message = {"role": "user", "content": [{"text": prompt}]}
messages = [message]
inference_config = {"temperature": 0.5, "topP": 0.9, "maxTokens": 500}
response = bedrock.converse(
    modelId=model_id,
    messages=messages,
    inferenceConfig=inference_config
)
content = response['output']['message']['content']
for item in content:
    print(item['text'])