ssp.ml.transformer


class ssp.ml.transformer.ssp_labeller.SSPTextLabeler(input_col=None, output_col='label')[source]

Bases: sklearn.base.BaseEstimator, sklearn.base.TransformerMixin

fit(X, y=None)[source]
transform(X, y=None)[source]
ssp.ml.transformer.ssp_labeller.labelme(text, keywords=['#AI', 'Artificial Intelligence', 'robotics', 'machinelearningengineer', 'Machine Learning', 'scikit', '#ML', 'mathematics', 'DeepLearning', 'Deep Learning', '#DL', 'Tensorflow', 'Pytorch', 'Neural Network', 'NeuralNetwork', 'computervision', 'computer vision', 'machine vision', 'machinevision', 'convolutional network', 'convnet', 'image processing', 'NLP', 'naturallanguageprocessing', 'natural language processing', 'text processing', 'text analytics', 'nltk', 'spacy', 'iot', 'datasets', 'dataengineer', 'analytics', 'bigdata', 'big data', 'data science', 'data analytics', 'data insights', 'data mining', 'distributed computing', 'parallel processing', 'apache spark', 'hadoop', 'apache hive', 'airflow', 'mlflow', 'apache kafka', 'hdfs', 'apache', 'kafka', 'dataanalysis', 'AugmentedIntelligence', 'datascience', 'machinelearning', 'rnd', 'businessintelligence', 'DigitalTransformation', 'datamanagement', 'ArtificialIntelligence'])[source]
ssp.ml.transformer.ssp_labeller.labelme_udf(text, keywords=['#AI', 'Artificial Intelligence', 'robotics', 'machinelearningengineer', 'Machine Learning', 'scikit', '#ML', 'mathematics', 'DeepLearning', 'Deep Learning', '#DL', 'Tensorflow', 'Pytorch', 'Neural Network', 'NeuralNetwork', 'computervision', 'computer vision', 'machine vision', 'machinevision', 'convolutional network', 'convnet', 'image processing', 'NLP', 'naturallanguageprocessing', 'natural language processing', 'text processing', 'text analytics', 'nltk', 'spacy', 'iot', 'datasets', 'dataengineer', 'analytics', 'bigdata', 'big data', 'data science', 'data analytics', 'data insights', 'data mining', 'distributed computing', 'parallel processing', 'apache spark', 'hadoop', 'apache hive', 'airflow', 'mlflow', 'apache kafka', 'hdfs', 'apache', 'kafka', 'dataanalysis', 'AugmentedIntelligence', 'datascience', 'machinelearning', 'rnd', 'businessintelligence', 'DigitalTransformation', 'datamanagement', 'ArtificialIntelligence'])
ssp.ml.transformer.test_text_preprocessor.test_preprocess()[source]
class ssp.ml.transformer.text_preprocessor.TextPreProcessor(input_col=None, output_col=None)[source]

Bases: sklearn.base.BaseEstimator, sklearn.base.TransformerMixin

fit(X, y=None)[source]
transform(X, y=None)[source]
ssp.ml.transformer.text_preprocessor.preprocess(text)[source]
ssp.ml.transformer.text_preprocessor.preprocess_udf(text)
ssp.ml.transformer.text_preprocessor.remove_stop_words(text)[source]