Source code for ssp.spark.udf.tensorflow_serving_api_udf

import requests
import json
from pyspark.sql.types import FloatType, ArrayType
from pyspark.sql.functions import udf

from ssp.logger.pretty_print import print_error, print_warn
from ssp.dl.tf.classifier import NaiveTextClassifier
from ssp.logger.pretty_print import print_info


[docs]def predict_text_class(text, url, tokenizer_path):
    classifer = NaiveTextClassifier()
    # TODO is this right way to load the tokenizer? Move this to a flask API as one extra layer
    classifer.load_tokenizer(tokenizer_path=tokenizer_path)
    text = list(classifer.transform([text])[0])
    text = [int(t) for t in text]
    data = json.dumps({"signature_name": "serving_default", "instances": [text]})
    headers = {"content-type": "application/json"}
    json_response = requests.post(url, data=data, headers=headers)
    predictions = json.loads(json_response.text)['predictions']
    return float(predictions[0][1])


schema = FloatType()


[docs]def get_text_classifier_udf(is_docker, tokenizer_path):
    if is_docker: #when the example is trigger inside the Docker environment
        url = "http://host.docker.internal:30125/v1/models/naive_text_clf:predict"
        return udf(lambda x: predict_text_class(text=x, tokenizer_path=tokenizer_path, url=url), schema)
    else:
        url = "http://localhost:8501/v1/models/naive_text_clf:predict"
        return udf(lambda x: predict_text_class(text=x, tokenizer_path=tokenizer_path, url=url), schema)

[docs]def predict(text):
    print("\n")
    print_info(f"Text : {text} ")
    try:
        URL = "http://host.docker.internal:30125/v1/models/naive_text_clf:predict"
        data = predict_text_class(text=text,
                                  url=URL,
                                  tokenizer_path="~/ssp/model/raw_tweet_dataset_0/naive_text_classifier/1/")
        print_warn(URL)
        print(data)
        exit(0)
    except:
        pass

    try:
        URL = "http://localhost:8501/v1/models/naive_text_clf:predict"
        data = predict_text_class(
            text=text,
            url=URL,
            tokenizer_path="~/ssp/model/raw_tweet_dataset_0/naive_text_classifier/1/")
        print_warn(URL)
        print(data)
        exit(0)
    except:
        pass

    try:
        URL = "http://127.0.0.1:30125/v1/models/naive_text_clf:predict"
        data = predict_text_class(
            text=text,
            url=URL,
            tokenizer_path="~/ssp/model/raw_tweet_dataset_0/naive_text_classifier/1/")
        print_warn(URL)
        print(data)
        exit(0)
    except:
        pass


if __name__ == "__main__":
    predict("📰Machine learning as a tool to explore cognitive profiles of epileptic patients. Neuropsychological data science are meaningful artificial intelligence 📈🔍| Home https://t.co/cAQ2vZYxk2")
    predict("This is a random text to check whats the prediction...home so it gets classified as 0")

# export PYTHONPATH=$(pwd)/src/:$PYTHONPATH