UBIAI Documentation
  • Getting Started
  • Project Settings
  • Upload Documents
  • Annotation Settings
  • Project Metrics
  • Project Models
  • Project Comparison
  • Annotation Export
  • Manual Annotation
  • Zero-shot and Few-shot Labeling
  • Hugging Face Model Auto-Labeling
  • Model-Assisted Labeling
  • Real Time Analysis
  • API
  • Collaboration
  • Inter-annotator Agreement (IAA)
  • Union Merge Annotations
  • Developer Documentation
Powered by GitBook
On this page

Developer Documentation

Developer documentation for utilization of ubiai's apis

PreviousUnion Merge Annotations

Last updated 1 year ago

Getting started

Welcome to the UBIAI Developer Documentation! This guide is designed to help developers integrate and utilize UBIAI's APIs effectively. UBIAI provides a cutting-edge text labeling platform for natural language processing. By leveraging our APIs, developers can enhance their applications with advanced AI capabilities.

Train model

import requests
import json


url ="https://api.ubiai.tools:8443/api_v1/train_model"
my_token = "put_your_access_token"

data = {
    "drop": 4,
    "max_batch": 4,
    "nb_iter": "8",
    "project": "8791",
    "selected_model": "blank",
    "selected_validation": "20",
    "model_type": "layoutlm",
    "with_annotate": False
    "allowed_labels": []
}


response = requests.post(url+ my_token ,data=data)
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Model types for training

model types
models

Spacy

en_core_web_en

Bert

distilbert-base-cased

dslim/bert-base-NER

roberta-base

allenai/scibert_scivocab_uncased

alvaroalon2/biobert_chemical_ner

LayoutLM

LayoutLM-base

Template Form Recognizer

blank

Add project

import requests
import json

url ="https://api.ubiai.tools:8443/api_v1/project"
headers = {"Authorization": "Token put_your_acess_token"}

project_name = ""

# must be one of these values:
# 'Chinese', 'Danish', 'Dutch', 'English', 'French'
# 'German', 'Greek', 'Italian', 'Lithuanian', 'Multi-language', 
# 'Norwegian Bokmål', 'Polish', 'Polish', 'Romanian', 'Spanish',
# 'Afrikaans', 'Albanian', 'Arabic', 'Armenian', 'Basque',
# 'Bengali', 'Bulgarian', 'Catalan', 'Croatian', 'Czech'
# 'Estonian', 'Finnish', 'Gujarati', 'Hebrew', 'Hindi', 'Hungarian', 'Tamil'
language = "English"

description = ""

# project type must be 'Text Annotation' for span based
# 'Character Based Annotation' for character based
# 'Native PDF Annotation' for ocr
# 'Image Classification'
project_type = "Text Annotation"

# label object must be in this format {'text': 'example', 'shortcut': '1'}
entities_labels = []

# label object must be in this format {'text': 'example', 'shortcut': '1'}
relations_labels = []

# classification_type must be 'binary' for positive or negative
# 'single' for single classification
# 'multi' for multi classifications
classification_type = "Binary"

# fill this only when classification type is different then binary
# label object must be in this format {'text': 'example', 'shortcut': '1'}
classifications_labels = []

project = {
    "name": project_name,
    "language": language,
    "description": description,
    "type": project_type,
    "entities_labels" : entities_labels,
    "relations_labels": relations_labels,
    "classification_type": classification_type,
    "classifications_labels": classifications_labels 
}

response = requests.post(url , json=project, headers=headers)
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Annotate project

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/annotate_project/{token}/{project_id}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", project_id="your_project_id")


# Make a POST request to the API with headers and files
response = requests.post(api_url)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Annotate snippets

import requests
import json


url ="https://api.ubiai.tools:8443/api_v1/annotate"
my_token = "/put-your_acess_token"


data = {
    
    # inputs is a list of text

    "inputs" : ["John works at Google.",
                "John works at Google." ],
    
    # entities is a list of list
    # Each list is a list of dict
    # Each dict must have this format :
    # {start : represent the offset of the start character
    # end : represent the offset of the end character + 1
    # label : represent the label of entity}
    "entities" : [
      [{'start': 0, 'end': 4, 'label': 'PER'}, 
       {'start': 14, 'end': 20, 'label': 'COMPANY'}], 
      [{'start': 0, 'end': 4, 'label': 'PER'}, 
       {'start': 14, 'end': 20, 'label': 'COMPANY'}]]}

response = requests.post(url+ my_token,json= data)
print(response.status_code)
res = json.loads(response.content.decode("utf-8"))
print(res)

Export data

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/download/{token}/{type}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_model_token", type="aws/Lists")
split_ratio = ""
params = {'splitRatio': split_ratio}
# Make a GETrequest to the API with headers and files
response = requests.get(api_url, params=params)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Download options

Type options

Download model

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/download_model/{token}/{model_name}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", model_name="your_name")

# Make a POST request to the API with headers and files
response = requests.get(api_url)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)

Perform OCR & layoutLM inference with API

import requests

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api_v1/ocr_layoutlm_inference/{token}/{file_type}"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", file_type="your_file_type")

# Define the file and other parameters to be sent with the request
file_paths = []    # add local files urls here
file_urls = []     # add urls of files online (must be public/accessible)

files = []
for path in file_paths:
   files.append(
           (
               "file",
               (os.path.basename(path), open(path, "rb"), mimetypes.guess_type(path)[0]),
           )
       )

data = {
   "ocr_engine": "DEFAULT",    # ocr engine to use 
   "filesUrls": file_urls,     # urls of files online
}

# Make a POST request to the API with headers and files
response = requests.post(api_url, body=body, headers=headers)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)
OCR_ENGINES
type

Upload files

import requests
import json
import mimetypes
import os

# Define the API endpoint with token and file_type in the URL
api_url = "https://app.ubiai.tools:8443/api/upload/{token}/{file_type}/"

# Replace placeholders with actual values
api_url = api_url.format(token="your_access_token", file_type="zip")

# Define the file and other parameters to be sent with the request
file_type = "/json"

list_of_file_path = ['']
urls = []
files = []
for file_path in list_of_file_path :
    files.append(('file',(os.path.basename(file_path ),open(file_path, 'rb'),mimetypes.guess_type(file_path)[0])))

data = {
  'autoAssignToCollab' :False,
  'taskType' :'TASK',
  'nbUsersPerDoc' :'',
  'selectedUsers' :'',
  'filesUrls' : urls
}

# Make a POST request to the API with headers and files
response = requests.post(api_url, files=files, data=data)

# Check the response
if response.status_code == 200:
    # Access the response content
    data = json.loads(response.content.decode("utf-8"))
    print("Response Data:", data)
else:
    # Handle the error
    print("Error:", response.status_code, response.text)
aws/Lists
spacy/Json
DocBin_NER/Json
spacy_training/Json
classification/Json
ocr1,ocr2,ocr1
stanford
iob
iob_pos
iob_chatbot
DEFAULT
ENGINE1
ENGINE2
ENGINE3
pdf
image
post
Path parameters
tokenanyRequired
idanyRequired
pkintegerRequired

Primary key of the project

Header parameters
tokenstringRequired

User access token

Responses
200
Annotation successful
application/json
400Error
application/json
403Error
application/json
500Error
application/json
post
POST //api_v1/annotate_project/{token}/{id} HTTP/1.1
Host: api.ubiai.tools:8443
token: text
Accept: */*
{
  "result": "text"
}
get
Path parameters
tokenanyRequired
typeanyRequired
tokenstringRequired

User access token

typestringRequired

Type of the download (e.g., "type" parameter)

Responses
200Success
application/json
Responsestring

export success

Example: https://example.com/download/1234
400Error
application/json
403Error
application/json
get
GET //api_v1/download/{token}/{type} HTTP/1.1
Host: api.ubiai.tools:8443
Accept: */*
https://example.com/download/1234
get
Path parameters
tokenanyRequired
model_nameanyRequired
tokenstringRequired

User access token

model_namestringRequired

Name of the model to download

Responses
200Success
application/json
400Error
application/json
403Error
application/json
get
GET //api_v1/download_model/{token}/{model_name} HTTP/1.1
Host: api.ubiai.tools:8443
Accept: */*
{
  "url": "https://example.com/download_model/1234"
}
  • Getting started
  • Train model
  • POST/api_v1/train_model/{token}/{id}
  • Model types for training
  • Add project
  • POST/api_v1/project
  • Annotate project
  • POST/api_v1/annotate_project/{token}/{id}
  • Annotate snippets
  • POST/api_v1/annotate/{token}
  • Export data
  • GET/api_v1/download/{token}/{type}
  • Download options
  • Download model
  • GET/api_v1/download_model/{token}/{model_name}
  • Perform OCR & layoutLM inference with API
  • POST/api_v1/ocr_layoutlm_inference/{token}/{file_type}
  • Upload files
  • POST/api_v1/upload/{token}/{file_type}
post

Create a model training on AWS sagemaker

Path parameters
tokenanyRequired
idanyRequired
tokenstringRequired

User access token

pkintegerRequired

Model primary database key

Body
dropnumberRequiredExample: 4
max_batchintegerRequiredExample: 4
nb_iterintegerRequiredExample: 8
projectintegerRequiredExample: 8791
selected_modelstringRequiredExample: blank
selected_validationintegerRequiredExample: 20
model_typestringRequiredExample: layoutlm
with_annotatebooleanRequired
allowed_labelsarrayOptional
allowed_relationsarrayOptional
allowed_classificationsarrayOptional
Responses
200Success
application/json
400Error
application/json
403Error
application/json
post
POST //api_v1/train_model/{token}/{id} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/json
Accept: */*
Content-Length: 219

{
  "drop": 4,
  "max_batch": 4,
  "nb_iter": 8,
  "project": 8791,
  "selected_model": "blank",
  "selected_validation": 20,
  "model_type": "layoutlm",
  "with_annotate": false,
  "allowed_labels": [],
  "allowed_relations": [],
  "allowed_classifications": []
}
{
  "status": 200,
  "model": {
    "job_id": 123,
    "owner": 456,
    "model": 789,
    "name": "Training Job 1",
    "status": "training",
    "model_type": "layoutlm",
    "created_at": "2024-03-19T12:00:00Z",
    "nb_iter": 8,
    "drop": 4,
    "max_batch": 4,
    "selected_model": "blank",
    "selected_validation": 20,
    "with_annotate": false,
    "allowed_labels": [],
    "allowed_relations": [],
    "allowed_classifications": []
  }
}
post
Path parameters
tokenstringRequired

User access token

Header parameters
AuthorizationstringRequired

Bearer token for authentication

Body
namestringRequiredExample: Project Name
languagestringRequiredExample: English
descriptionstringRequiredExample: Description of the project
typestringRequiredExample: Text Annotation
classification_typestringRequiredExample: Binary
Responses
200Success
application/json
400Error
application/json
403Error
application/json
500Error
application/json
post
POST //api_v1/project HTTP/1.1
Host: api.ubiai.tools:8443
Authorization: text
Content-Type: application/json
Accept: */*
Content-Length: 423

{
  "name": "Project Name",
  "language": "English",
  "description": "Description of the project",
  "type": "Text Annotation",
  "entities_labels": [
    {
      "text": "Entity 1",
      "shortcut": "1"
    },
    {
      "text": "Entity 2",
      "shortcut": "2"
    }
  ],
  "relations_labels": [
    {
      "text": "Relation 1",
      "shortcut": "A"
    },
    {
      "text": "Relation 2",
      "shortcut": "B"
    }
  ],
  "classification_type": "Binary",
  "classifications_labels": [
    {
      "text": "Label 1",
      "shortcut": "X"
    },
    {
      "text": "Label 2",
      "shortcut": "Y"
    }
  ]
}
{
  "application/json": {
    "details": "Project created successfully."
  }
}
post
Path parameters
tokenanyRequired
tokenstringRequired

User access token

Body
inputsarrayRequired
Responses
200
Annotation successful
application/json
400Error
application/json
403Error
application/json
500Error
application/json
post
POST //api_v1/annotate/{token} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/json
Accept: */*
Content-Length: 63

{
  "inputs": [],
  "entities": [
    [
      {
        "start": 1,
        "end": 1,
        "label": "text"
      }
    ]
  ]
}
[
  {
    "tokens": [
      {
        "text": "text",
        "id": 1
      }
    ],
    "relations": []
  }
]
post
Path parameters
tokenanyRequired
file_typeanyRequired
tokenstringRequired

Verification token

file_typestringRequired

Type of the file

Body
ocr_enginestringRequired

OCR Engine (DEFAULT, ENGINE1, ENGINE2, ENGINE3)

filestringRequired

Uploaded file

fileUrlstringOptional

URL of the file

filesUrlsarrayOptional

List of URLs for files

Responses
200Success
application/json
400Error
application/json
403Error
application/json
500Error
application/json
post
POST //api_v1/ocr_layoutlm_inference/{token}/{file_type} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/x-www-form-urlencoded
Accept: */*
Content-Length: 67

"ocr_engine='text'&file='text'&fileUrl='text'&filesUrls=[]"
[
  {
    "application/json": [
      {
        "documentName": "invoice.pdf",
        "document": "Invoice\nBusiness Name\nStreet",
        "annotation": [
          {
            "color": "",
            "labels": "AMOUNT_HT_ID",
            "commentsList": [],
            "propertiesList": [],
            "span": [
              {
                "id": 68,
                "start": 437,
                "end": 444,
                "length": 8,
                "selected": "true",
                "pageNum": 1,
                "left": 3304.4795048236847,
                "top": 4900.814838409424,
                "width": 411.1468829140067,
                "height": 83.03435039520264,
                "conf": 99.93638610839844,
                "text": "Subtotal",
                "pageSize": {
                  "width": 5227,
                  "height": 7392
                }
              }
            ]
          }
        ],
        "tokens": [
          {
            "id": 0,
            "start": 0,
            "end": 6,
            "length": 7,
            "selected": "false",
            "pageNum": 1,
            "left": 520.5288305431604,
            "top": 438.63357067108154,
            "width": 1482.1837384998798,
            "height": 348.4454308748245,
            "conf": 99.88578796386719,
            "text": "Invoice",
            "pageSize": {
              "width": 5227,
              "height": 7392
            }
          }
        ]
      }
    ]
  }
]
post
Path parameters
tokenanyRequired
file_typeanyRequired
tokenstringRequired

User access token

file_typestringRequired

Type of the file being uploaded (json, tsv, csv, zip, text_docs, image, native_pdf)

Body
autoAssignToCollabbooleanRequired
taskTypestringRequired
nbUsersPerDocstringOptional
selectedUsersstringOptional
filesUrlsstring[]Optional
Responses
200Success
application/json
400Error
application/json
403Error
application/json
500Error
application/json
post
POST //api_v1/upload/{token}/{file_type} HTTP/1.1
Host: api.ubiai.tools:8443
Content-Type: application/json
Accept: */*
Content-Length: 137

{
  "autoAssignToCollab": true,
  "taskType": "text",
  "nbUsersPerDoc": "text",
  "selectedUsers": "text",
  "filesUrls": [
    "text"
  ],
  "file": [
    {
      "file": "text"
    }
  ]
}
{
  "message": "Upload successfull",
  "status": 200
}