Developer Documentation
Developer documentation for utilization of ubiai's apis
Getting started
Welcome to the UBIAI Developer Documentation! This guide is designed to help developers integrate and utilize UBIAI's APIs effectively. UBIAI provides a cutting-edge text labeling platform for natural language processing. By leveraging our APIs, developers can enhance their applications with advanced AI capabilities.
Train model
Create a model training on AWS sagemaker
Model primary database key
4488791blank20layoutlmPOST /api_v1/train_model/{token}/{id} HTTP/1.1
Host: api.ubiai.tools:8443/
Content-Type: application/json
Accept: */*
Content-Length: 219
{
"drop": 4,
"max_batch": 4,
"nb_iter": 8,
"project": 8791,
"selected_model": "blank",
"selected_validation": 20,
"model_type": "layoutlm",
"with_annotate": false,
"allowed_labels": [],
"allowed_relations": [],
"allowed_classifications": []
}{
"status": 200,
"model": {
"job_id": 123,
"owner": 456,
"model": 789,
"name": "Training Job 1",
"status": "training",
"model_type": "layoutlm",
"created_at": "2024-03-19T12:00:00Z",
"nb_iter": 8,
"drop": 4,
"max_batch": 4,
"selected_model": "blank",
"selected_validation": 20,
"with_annotate": false,
"allowed_labels": [],
"allowed_relations": [],
"allowed_classifications": []
}
}import requests
import json
url ="https://api.ubiai.tools:8443/api_v1/train_model"
my_token = "put_your_access_token"
data = {
"drop": 4,
"max_batch": 4,
"nb_iter": "8",
"project": "8791",
"selected_model": "blank",
"selected_validation": "20",
"model_type": "layoutlm",
"with_annotate": False
"allowed_labels": []
}
response = requests.post(url+ my_token ,data=data)
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)Model types for training
Spacy
en_core_web_en
Bert
distilbert-base-cased
dslim/bert-base-NER
roberta-base
allenai/scibert_scivocab_uncased
alvaroalon2/biobert_chemical_ner
LayoutLM
LayoutLM-base
Template Form Recognizer
blank
Add project
User access token
Bearer token for authentication
Project NameEnglishDescription of the projectText AnnotationBinaryPOST /api_v1/project HTTP/1.1
Host: api.ubiai.tools:8443/
Authorization: text
Content-Type: application/json
Accept: */*
Content-Length: 423
{
"name": "Project Name",
"language": "English",
"description": "Description of the project",
"type": "Text Annotation",
"entities_labels": [
{
"text": "Entity 1",
"shortcut": "1"
},
{
"text": "Entity 2",
"shortcut": "2"
}
],
"relations_labels": [
{
"text": "Relation 1",
"shortcut": "A"
},
{
"text": "Relation 2",
"shortcut": "B"
}
],
"classification_type": "Binary",
"classifications_labels": [
{
"text": "Label 1",
"shortcut": "X"
},
{
"text": "Label 2",
"shortcut": "Y"
}
]
}{
"application/json": {
"details": "Project created successfully."
}
}import requests
import json
url ="https://api.ubiai.tools:8443/api_v1/project"
headers = {"Authorization": "Token put_your_acess_token"}
project_name = ""
# must be one of these values:
# 'Chinese', 'Danish', 'Dutch', 'English', 'French'
# 'German', 'Greek', 'Italian', 'Lithuanian', 'Multi-language',
# 'Norwegian Bokmål', 'Polish', 'Polish', 'Romanian', 'Spanish',
# 'Afrikaans', 'Albanian', 'Arabic', 'Armenian', 'Basque',
# 'Bengali', 'Bulgarian', 'Catalan', 'Croatian', 'Czech'
# 'Estonian', 'Finnish', 'Gujarati', 'Hebrew', 'Hindi', 'Hungarian', 'Tamil'
language = "English"
description = ""
# project type must be 'Text Annotation' for span based
# 'Character Based Annotation' for character based
# 'Native PDF Annotation' for ocr
# 'Image Classification'
project_type = "Text Annotation"
# label object must be in this format {'text': 'example', 'shortcut': '1'}
entities_labels = []
# label object must be in this format {'text': 'example', 'shortcut': '1'}
relations_labels = []
# classification_type must be 'binary' for positive or negative
# 'single' for single classification
# 'multi' for multi classifications
classification_type = "Binary"
# fill this only when classification type is different then binary
# label object must be in this format {'text': 'example', 'shortcut': '1'}
classifications_labels = []
project = {
"name": project_name,
"language": language,
"description": description,
"type": project_type,
"entities_labels" : entities_labels,
"relations_labels": relations_labels,
"classification_type": classification_type,
"classifications_labels": classifications_labels
}
response = requests.post(url , json=project, headers=headers)
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)Annotate project
Primary key of the project
User access token
Annotation successful
POST /api_v1/annotate_project/{token}/{id} HTTP/1.1
Host: api.ubiai.tools:8443/
token: text
Accept: */*
{
"result": "text"
}import requests
# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/annotate_project/{token}/{project_id}"
# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", project_id="your_project_id")
# Make a POST request to the API with headers and files
response = requests.post(api_url)
# Check the response
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)Annotate snippets
Annotation successful
POST /api_v1/annotate/{token} HTTP/1.1
Host: api.ubiai.tools:8443/
Content-Type: application/json
Accept: */*
Content-Length: 63
{
"inputs": [],
"entities": [
[
{
"start": 1,
"end": 1,
"label": "text"
}
]
]
}[
{
"tokens": [
{
"text": "text",
"id": 1
}
],
"relations": []
}
]import requests
import json
url ="https://api.ubiai.tools:8443/api_v1/annotate"
my_token = "/put-your_acess_token"
data = {
# inputs is a list of text
"inputs" : ["John works at Google.",
"John works at Google." ],
# entities is a list of list
# Each list is a list of dict
# Each dict must have this format :
# {start : represent the offset of the start character
# end : represent the offset of the end character + 1
# label : represent the label of entity}
"entities" : [
[{'start': 0, 'end': 4, 'label': 'PER'},
{'start': 14, 'end': 20, 'label': 'COMPANY'}],
[{'start': 0, 'end': 4, 'label': 'PER'},
{'start': 14, 'end': 20, 'label': 'COMPANY'}]]}
response = requests.post(url+ my_token,json= data)
print(response.status_code)
res = json.loads(response.content.decode("utf-8"))
print(res)
Export data
export success
https://example.com/download/1234GET /api_v1/download/{token}/{type} HTTP/1.1
Host: api.ubiai.tools:8443/
Accept: */*
https://example.com/download/1234import requests
# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/download/{token}/{type}"
# Replace placeholders with actual values
api_url = api_url.format(token="your_model_token", type="aws/Lists")
split_ratio = ""
params = {'splitRatio': split_ratio}
# Make a GETrequest to the API with headers and files
response = requests.get(api_url, params=params)
# Check the response
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)Download options
aws/Listsspacy/JsonDocBin_NER/Jsonspacy_training/Jsonclassification/Jsonocr1,ocr2,ocr1stanfordiobiob_posiob_chatbotDownload model
GET /api_v1/download_model/{token}/{model_name} HTTP/1.1
Host: api.ubiai.tools:8443/
Accept: */*
{
"url": "https://example.com/download_model/1234"
}import requests
# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/download_model/{token}/{model_name}"
# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", model_name="your_name")
# Make a POST request to the API with headers and files
response = requests.get(api_url)
# Check the response
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)Perform OCR & layoutLM inference with API
OCR Engine (DEFAULT, ENGINE1, ENGINE2, ENGINE3)
Uploaded file
URL of the file
List of URLs for files
POST /api_v1/ocr_layoutlm_inference/{token}/{file_type} HTTP/1.1
Host: api.ubiai.tools:8443/
Content-Type: application/x-www-form-urlencoded
Accept: */*
Content-Length: 67
"ocr_engine='text'&file='text'&fileUrl='text'&filesUrls=[]"[
{
"application/json": [
{
"documentName": "invoice.pdf",
"document": "Invoice\nBusiness Name\nStreet",
"annotation": [
{
"color": "",
"labels": "AMOUNT_HT_ID",
"commentsList": [],
"propertiesList": [],
"span": [
{
"id": 68,
"start": 437,
"end": 444,
"length": 8,
"selected": "true",
"pageNum": 1,
"left": 3304.4795048236847,
"top": 4900.814838409424,
"width": 411.1468829140067,
"height": 83.03435039520264,
"conf": 99.93638610839844,
"text": "Subtotal",
"pageSize": {
"width": 5227,
"height": 7392
}
}
]
}
],
"tokens": [
{
"id": 0,
"start": 0,
"end": 6,
"length": 7,
"selected": "false",
"pageNum": 1,
"left": 520.5288305431604,
"top": 438.63357067108154,
"width": 1482.1837384998798,
"height": 348.4454308748245,
"conf": 99.88578796386719,
"text": "Invoice",
"pageSize": {
"width": 5227,
"height": 7392
}
}
]
}
]
}
]import requests
# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/ocr_layoutlm_inference/{token}/{file_type}"
# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", file_type="your_file_type")
# Define the file and other parameters to be sent with the request
file_paths = [] # add local files urls here
file_urls = [] # add urls of files online (must be public/accessible)
files = []
for path in file_paths:
files.append(
(
"file",
(os.path.basename(path), open(path, "rb"), mimetypes.guess_type(path)[0]),
)
)
data = {
"ocr_engine": "DEFAULT", # ocr engine to use
"filesUrls": file_urls, # urls of files online
}
# Make a POST request to the API with headers and files
response = requests.post(api_url, body=body, headers=headers)
# Check the response
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)DEFAULTENGINE1ENGINE2ENGINE3pdfimageUpload files
POST /api_v1/upload/{token}/{file_type} HTTP/1.1
Host: api.ubiai.tools:8443/
Content-Type: application/json
Accept: */*
Content-Length: 137
{
"autoAssignToCollab": true,
"taskType": "text",
"nbUsersPerDoc": "text",
"selectedUsers": "text",
"filesUrls": [
"text"
],
"file": [
{
"file": "text"
}
]
}{
"message": "Upload successfull",
"status": 200
}import requests
import json
import mimetypes
import os
# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api/upload/{token}/{file_type}/"
# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", file_type="zip")
# Define the file and other parameters to be sent with the request
file_type = "/json"
list_of_file_path = ['']
urls = []
files = []
for file_path in list_of_file_path :
files.append(('file',(os.path.basename(file_path ),open(file_path, 'rb'),mimetypes.guess_type(file_path)[0])))
data = {
'autoAssignToCollab' :False,
'taskType' :'TASK',
'nbUsersPerDoc' :'',
'selectedUsers' :'',
'filesUrls' : urls
}
# Make a POST request to the API with headers and files
response = requests.post(api_url, files=files, data=data)
# Check the response
if response.status_code == 200:
# Access the response content
data = json.loads(response.content.decode("utf-8"))
print("Response Data:", data)
else:
# Handle the error
print("Error:", response.status_code, response.text)Last updated