Developer Documentation

Developer documentation for utilization of ubiai's apis

Getting started

Welcome to the UBIAI Developer Documentation! This guide is designed to help developers integrate and utilize UBIAI's APIs effectively. UBIAI provides a cutting-edge text labeling platform for natural language processing. By leveraging our APIs, developers can enhance their applications with advanced AI capabilities.

Train model

post

Create a model training on AWS sagemaker

Path parameters

tokenanyRequired

idanyRequired

pkintegerRequired

Model primary database key

Body

dropnumberRequiredExample: 4

max_batchintegerRequiredExample: 4

nb_iterintegerRequiredExample: 8

projectintegerRequiredExample: 8791

selected_modelstringRequiredExample: blank

selected_validationintegerRequiredExample: 20

model_typestringRequiredExample: layoutlm

with_annotatebooleanRequired

allowed_labelsarrayOptional

allowed_relationsarrayOptional

allowed_classificationsarrayOptional

Responses

200Success

application/json

400Error

application/json

403Error

application/json

post

POST //api_v1/train_model/{token}/{id} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/json
Accept: */*
Content-Length: 219

{
  "drop": 4,
  "max_batch": 4,
  "nb_iter": 8,
  "project": 8791,
  "selected_model": "blank",
  "selected_validation": 20,
  "model_type": "layoutlm",
  "with_annotate": false,
  "allowed_labels": [],
  "allowed_relations": [],
  "allowed_classifications": []
}

{
  "status": 200,
  "model": {
    "job_id": 123,
    "owner": 456,
    "model": 789,
    "name": "Training Job 1",
    "status": "training",
    "model_type": "layoutlm",
    "created_at": "2024-03-19T12:00:00Z",
    "nb_iter": 8,
    "drop": 4,
    "max_batch": 4,
    "selected_model": "blank",
    "selected_validation": 20,
    "with_annotate": false,
    "allowed_labels": [],
    "allowed_relations": [],
    "allowed_classifications": []
  }
}

import requests
import json


url ="https://api.ubiai.tools:8443/api_v1/train_model"
my_token = "put_your_access_token"

data = {
    "drop": 4,
    "max_batch": 4,
    "nb_iter": "8",
    "project": "8791",
    "selected_model": "blank",
    "selected_validation": "20",
    "model_type": "layoutlm",
    "with_annotate": False
    "allowed_labels": []
}


response = requests.post(url+ my_token ,data=data)
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Model types for training

model types

models

Spacy

en_core_web_en

Bert

distilbert-base-cased

dslim/bert-base-NER

roberta-base

allenai/scibert_scivocab_uncased

alvaroalon2/biobert_chemical_ner

LayoutLM

LayoutLM-base

Template Form Recognizer

blank

Add project

post

Path parameters

tokenstringRequired

User access token

Header parameters

AuthorizationstringRequired

Bearer token for authentication

Body

namestringRequiredExample: Project Name

languagestringRequiredExample: English

descriptionstringRequiredExample: Description of the project

typestringRequiredExample: Text Annotation

classification_typestringRequiredExample: Binary

Responses

200Success

application/json

400Error

application/json

403Error

application/json

500Error

application/json

post

POST //api_v1/project HTTP/1.1
Host: api.ubiai.tools:8443
Authorization: text
Content-Type: application/json
Accept: */*
Content-Length: 423

{
  "name": "Project Name",
  "language": "English",
  "description": "Description of the project",
  "type": "Text Annotation",
  "entities_labels": [
    {
      "text": "Entity 1",
      "shortcut": "1"
    },
    {
      "text": "Entity 2",
      "shortcut": "2"
    }
  ],
  "relations_labels": [
    {
      "text": "Relation 1",
      "shortcut": "A"
    },
    {
      "text": "Relation 2",
      "shortcut": "B"
    }
  ],
  "classification_type": "Binary",
  "classifications_labels": [
    {
      "text": "Label 1",
      "shortcut": "X"
    },
    {
      "text": "Label 2",
      "shortcut": "Y"
    }
  ]
}

{
  "application/json": {
    "details": "Project created successfully."
  }
}

import requests
import json

url ="https://api.ubiai.tools:8443/api_v1/project"
headers = {"Authorization": "Token put_your_acess_token"}

project_name = ""

# must be one of these values:
# 'Chinese', 'Danish', 'Dutch', 'English', 'French'
# 'German', 'Greek', 'Italian', 'Lithuanian', 'Multi-language', 
# 'Norwegian Bokmål', 'Polish', 'Polish', 'Romanian', 'Spanish',
# 'Afrikaans', 'Albanian', 'Arabic', 'Armenian', 'Basque',
# 'Bengali', 'Bulgarian', 'Catalan', 'Croatian', 'Czech'
# 'Estonian', 'Finnish', 'Gujarati', 'Hebrew', 'Hindi', 'Hungarian', 'Tamil'
language = "English"

description = ""

# project type must be 'Text Annotation' for span based
# 'Character Based Annotation' for character based
# 'Native PDF Annotation' for ocr
# 'Image Classification'
project_type = "Text Annotation"

# label object must be in this format {'text': 'example', 'shortcut': '1'}
entities_labels = []

# label object must be in this format {'text': 'example', 'shortcut': '1'}
relations_labels = []

# classification_type must be 'binary' for positive or negative
# 'single' for single classification
# 'multi' for multi classifications
classification_type = "Binary"

# fill this only when classification type is different then binary
# label object must be in this format {'text': 'example', 'shortcut': '1'}
classifications_labels = []

project = {
    "name": project_name,
    "language": language,
    "description": description,
    "type": project_type,
    "entities_labels" : entities_labels,
    "relations_labels": relations_labels,
    "classification_type": classification_type,
    "classifications_labels": classifications_labels 
}

response = requests.post(url , json=project, headers=headers)
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Annotate project

post

Path parameters

tokenanyRequired

idanyRequired

pkintegerRequired

Primary key of the project

Header parameters

tokenstringRequired

User access token

Responses

200

Annotation successful

application/json

400Error

application/json

403Error

application/json

500Error

application/json

post

POST //api_v1/annotate_project/{token}/{id} HTTP/1.1
Host: api.ubiai.tools:8443
token: text
Accept: */*

{
  "result": "text"
}

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/annotate_project/{token}/{project_id}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", project_id="your_project_id")


# Make a POST request to the API with headers and files
response = requests.post(api_url)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Annotate snippets

post

Path parameters

tokenanyRequired

Body

inputsarrayRequired

Responses

200

Annotation successful

application/json

400Error

application/json

403Error

application/json

500Error

application/json

post

POST //api_v1/annotate/{token} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/json
Accept: */*
Content-Length: 63

{
  "inputs": [],
  "entities": [
    [
      {
        "start": 1,
        "end": 1,
        "label": "text"
      }
    ]
  ]
}

[
  {
    "tokens": [
      {
        "text": "text",
        "id": 1
      }
    ],
    "relations": []
  }
]

import requests
import json


url ="https://api.ubiai.tools:8443/api_v1/annotate"
my_token = "/put-your_acess_token"


data = {
    
    # inputs is a list of text

    "inputs" : ["John works at Google.",
                "John works at Google." ],
    
    # entities is a list of list
    # Each list is a list of dict
    # Each dict must have this format :
    # {start : represent the offset of the start character
    # end : represent the offset of the end character + 1
    # label : represent the label of entity}
    "entities" : [
      [{'start': 0, 'end': 4, 'label': 'PER'}, 
       {'start': 14, 'end': 20, 'label': 'COMPANY'}], 
      [{'start': 0, 'end': 4, 'label': 'PER'}, 
       {'start': 14, 'end': 20, 'label': 'COMPANY'}]]}

response = requests.post(url+ my_token,json= data)
print(response.status_code)
res = json.loads(response.content.decode("utf-8"))
print(res)

Export data

get

Path parameters

tokenanyRequired

typeanyRequired

Responses

200Success

application/json

Responsestring

export success

Example: https://example.com/download/1234

400Error

application/json

403Error

application/json

get

GET //api_v1/download/{token}/{type} HTTP/1.1
Host: api.ubiai.tools:8443
Accept: */*

https://example.com/download/1234

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/download/{token}/{type}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_model_token", type="aws/Lists")
split_ratio = ""
params = {'splitRatio': split_ratio}
# Make a GETrequest to the API with headers and files
response = requests.get(api_url, params=params)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Download options

Type options

aws/Lists

spacy/Json

DocBin_NER/Json

spacy_training/Json

classification/Json

ocr1,ocr2,ocr1

stanford

iob

iob_pos

iob_chatbot

Download model

get

Path parameters

tokenanyRequired

model_nameanyRequired

Responses

200Success

application/json

400Error

application/json

403Error

application/json

get

GET //api_v1/download_model/{token}/{model_name} HTTP/1.1
Host: api.ubiai.tools:8443
Accept: */*

{
  "url": "https://example.com/download_model/1234"
}

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/download_model/{token}/{model_name}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", model_name="your_name")

# Make a POST request to the API with headers and files
response = requests.get(api_url)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Perform OCR & layoutLM inference with API

post

Path parameters

tokenanyRequired

file_typeanyRequired

Body

ocr_enginestringRequired

OCR Engine (DEFAULT, ENGINE1, ENGINE2, ENGINE3)

filestringRequired

Uploaded file

fileUrlstringOptional

URL of the file

filesUrlsarrayOptional

List of URLs for files

Responses

200Success

application/json

400Error

application/json

403Error

application/json

500Error

application/json

post

POST //api_v1/ocr_layoutlm_inference/{token}/{file_type} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/x-www-form-urlencoded
Accept: */*
Content-Length: 67

"ocr_engine='text'&file='text'&fileUrl='text'&filesUrls=[]"

[
  {
    "application/json": [
      {
        "documentName": "invoice.pdf",
        "document": "Invoice\nBusiness Name\nStreet",
        "annotation": [
          {
            "color": "",
            "labels": "AMOUNT_HT_ID",
            "commentsList": [],
            "propertiesList": [],
            "span": [
              {
                "id": 68,
                "start": 437,
                "end": 444,
                "length": 8,
                "selected": "true",
                "pageNum": 1,
                "left": 3304.4795048236847,
                "top": 4900.814838409424,
                "width": 411.1468829140067,
                "height": 83.03435039520264,
                "conf": 99.93638610839844,
                "text": "Subtotal",
                "pageSize": {
                  "width": 5227,
                  "height": 7392
                }
              }
            ]
          }
        ],
        "tokens": [
          {
            "id": 0,
            "start": 0,
            "end": 6,
            "length": 7,
            "selected": "false",
            "pageNum": 1,
            "left": 520.5288305431604,
            "top": 438.63357067108154,
            "width": 1482.1837384998798,
            "height": 348.4454308748245,
            "conf": 99.88578796386719,
            "text": "Invoice",
            "pageSize": {
              "width": 5227,
              "height": 7392
            }
          }
        ]
      }
    ]
  }
]

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/ocr_layoutlm_inference/{token}/{file_type}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", file_type="your_file_type")

# Define the file and other parameters to be sent with the request
file_paths = []    # add local files urls here
file_urls = []     # add urls of files online (must be public/accessible)

files = []
for path in file_paths:
   files.append(
           (
               "file",
               (os.path.basename(path), open(path, "rb"), mimetypes.guess_type(path)[0]),
           )
       )

data = {
   "ocr_engine": "DEFAULT",    # ocr engine to use 
   "filesUrls": file_urls,     # urls of files online
}

# Make a POST request to the API with headers and files
response = requests.post(api_url, body=body, headers=headers)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

OCR_ENGINES

DEFAULT

ENGINE1

ENGINE2

ENGINE3

type

pdf

image

Upload files

post

Path parameters

tokenanyRequired

file_typeanyRequired

Body

autoAssignToCollabbooleanRequired

taskTypestringRequired

nbUsersPerDocstringOptional

selectedUsersstringOptional

filesUrlsstring[]Optional

Responses

200Success

application/json

400Error

application/json

403Error

application/json

500Error

application/json

post

POST //api_v1/upload/{token}/{file_type} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/json
Accept: */*
Content-Length: 137

{
  "autoAssignToCollab": true,
  "taskType": "text",
  "nbUsersPerDoc": "text",
  "selectedUsers": "text",
  "filesUrls": [
    "text"
  ],
  "file": [
    {
      "file": "text"
    }
  ]
}

{
  "message": "Upload successfull",
  "status": 200
}

import requests
import json
import mimetypes
import os

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api/upload/{token}/{file_type}/"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", file_type="zip")

# Define the file and other parameters to be sent with the request
file_type = "/json"

list_of_file_path = ['']
urls = []
files = []
for file_path in list_of_file_path :
    files.append(('file',(os.path.basename(file_path ),open(file_path, 'rb'),mimetypes.guess_type(file_path)[0])))

data = {
  'autoAssignToCollab' :False,
  'taskType' :'TASK',
  'nbUsersPerDoc' :'',
  'selectedUsers' :'',
  'filesUrls' : urls
}

# Make a POST request to the API with headers and files
response = requests.post(api_url, files=files, data=data)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

PreviousUnion Merge Annotations

Last updated 1 year ago