File size: 2,831 Bytes
d9c7e03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import pickle
import tensorflow as tf
import numpy as np
from fastapi import FastAPI
from pydantic import BaseModel
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1. Define file paths
tokenizer_file_path = 'tokenizer.pkl'
model_dir_path = 'keras_model_savedmodel' # Corrected path

# 2. Define max_sequence_length
max_sequence_length = 12

# 3. Load the tokenizer object
# The app assumes tokenizer.pkl is present in the same directory
with open(tokenizer_file_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

# 4. Load the Keras model as an inference-only layer using TFSMLayer
# The app assumes keras_model_savedmodel directory is present
# The call_endpoint 'serve' is derived from the model.export() output.
model = tf.keras.layers.TFSMLayer(model_dir_path, call_endpoint='serve')

# 5. Define the generate_first_word function
def generate_first_word(input_text, model, tokenizer, max_sequence_length):
    # Tokenize the input_text
    input_sequence = tokenizer.texts_to_sequences([input_text])

    # Pad this integer sequence to the correct length
    padded_input = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='post')

    # Convert padded_input to a TensorFlow tensor with the expected dtype (float32)
    padded_input_tensor = tf.constant(padded_input, dtype=tf.float32)

    # Use the loaded TFSMLayer to get predictions
    predicted_outputs = model(padded_input_tensor)

    # Assuming the TFSMLayer returns a dictionary or a tuple where the first element is the prediction tensor.
    if isinstance(predicted_outputs, dict):
        predicted_probabilities = list(predicted_outputs.values())[0][0]
    elif isinstance(predicted_outputs, (tuple, list)):
        predicted_probabilities = predicted_outputs[0][0]
    else:
        predicted_probabilities = predicted_outputs[0]

    # Determine the index of the word with the highest probability
    predicted_token_id = np.argmax(predicted_probabilities)

    # Convert this predicted token ID back to a human-readable word
    if predicted_token_id in tokenizer.index_word:
        predicted_word = tokenizer.index_word[predicted_token_id]
    else:
        predicted_word = "<UNKNOWN_TOKEN>"
    return predicted_word

# 6. Define a Pydantic InputText model
class InputText(BaseModel):
    text: str

# 7. Instantiate the FastAPI application.
app = FastAPI()

# 8. Create a root endpoint
@app.get("/")
async def read_root():
    return {"message": "Welcome to the Generative Text Model API!"}

# 9. Create a POST endpoint /predict
@app.post("/predict")
async def predict_first_word(input_data: InputText):
    predicted_word = generate_first_word(input_data.text, model, tokenizer, max_sequence_length)
    return {"predicted_first_word": predicted_word}

print("API code consolidated into app.py.")