> ## Documentation Index
> Fetch the complete documentation index at: https://docs.aryn.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Partition Document

This is the Aryn DocParse API for partitioning (and optionally chunking) a document synchronously.


## OpenAPI

````yaml post /v1/document/partition
openapi: 3.1.0
info:
  title: OpenAPI Aryn DocParse
  description: DocParse API
  version: 0.1.0
servers:
  - url: https://api.aryn.cloud
    description: US Region
  - url: https://api.aryn.ai
    description: US Region
  - url: https://api.us.aryn.ai
    description: US Region
  - url: https://api.us.aryn.cloud
    description: US Region
security: []
paths:
  /v1/document/partition:
    post:
      tags:
        - Partition
      summary: Partition Document
      operationId: _sync_partition_document_v1
      parameters:
        - name: User-Agent
          in: header
          required: false
          schema:
            anyOf:
              - type: string
              - type: 'null'
            title: User-Agent
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              $ref: '#/components/schemas/Body__sync_partition_document_v1'
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PartitionerResponse'
        '400':
          description: No Input File Provided
          content:
            application/json:
              schema:
                type: object
                properties:
                  detail:
                    type: string
                    examples:
                      - No input file provided.
        '403':
          description: Forbidden
          content:
            application/json:
              schema:
                type: object
                properties:
                  detail:
                    type: string
                    examples:
                      - Not authenticated.
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
        '500':
          description: Internal Server Error
          content:
            text/plain:
              schema:
                type: string
                examples:
                  - Internal Server Error
      security:
        - HTTPBearer: []
components:
  schemas:
    Body__sync_partition_document_v1:
      properties:
        file:
          type: string
          format: binary
          title: file
        file_url:
          type: string
          format: url
          title: file_url
        options:
          type: object
          properties:
            selected_pages:
              type: array
              items:
                oneOf:
                  - type: integer
                  - type: array
                    items:
                      type: integer
                    minItems: 2
                    maxItems: 2
              title: Selected Pages
              description: >
                An array containing single integers (e.g., 1) and/or arrays with
                exactly two integers representing a range (e.g., [1, 10]).
            extract_images:
              type: boolean
              title: Extract Images
              default: false
              description: >
                A boolean value indicating whether to crop images detected in
                the document and return them in the specified format converted
                to base64 within the binary_representation of returned image
                elements.
            image_extraction_options:
              type: object
              properties:
                associate_captions:
                  type: boolean
                  title: Associate Captions
                  default: false
                  description: >
                    A boolean value indicating whether to associate captions
                    with the images.
                extract_image_format:
                  type: string
                  enum:
                    - ppm
                    - png
                    - jpeg
                  title: Extract Image Format
                  default: ppm
                  description: |
                    The format to use for extracted images. Defaults to ppm.
            property_extraction_options:
              type: object
              title: Property Extraction Options
              description: >
                (PAYG Only) Options for extracting properties (key-value pairs)
                from documents such as invoices, purchase orders, contracts,
                etc.
              properties:
                schema:
                  type: array
                  title: Schema
                  description: >
                    A list of properties each of which describes a specific
                    occurrence of information appearing in the document.
                  items:
                    type: object
                    properties:
                      name:
                        type: string
                        title: Name
                        description: |
                          The name of a property, e.g. 'address', 'first_name'.
                      type:
                        type: object
                        title: Type
                        description: |
                          A dictionary describing the property.
                        properties:
                          type:
                            type: string
                            title: Type
                            enum:
                              - int
                              - float
                              - date
                              - string
                              - bool
                              - object
                              - choice
                              - array
                            description: >
                              The type of the property. Simple types can be
                              'int', 'float', 'date', 'string', 'bool',
                              'choice'. A nested type of 'array' can consist of
                              properties of simple types.
                          description:
                            type: string
                            title: Description
                            description: |
                              A description of the property.
                          examples:
                            type: array
                            title: Examples
                            description: |
                              An array of examples.
                          choices:
                            type: array
                            title: Choices
                            description: |
                              Valid choices for a 'choice' type property.
                            items:
                              type: string
                          item_type:
                            type: object
                            title: Item Type
                            description: |
                              The type of items in an 'array' type property.
                        required:
                          - type
                    required:
                      - name
                      - type
                voting:
                  type: boolean
                  title: Enable Voting
                  default: false
                  description: >
                    Vote across three different LLMs from different providers to
                    improve the accuracy of property extraction. Only applicable
                    when schema is provided.
                suggest_properties:
                  type: boolean
                  title: Suggest Properties
                  default: false
                  description: Infer the schema of the submitted document.
                suggest_properties_instructions:
                  type: string
                  title: Suggest Properties Instructions
                  description: >-
                    Additional instructions to DocParse for recognizing
                    important properties for the schema.
            table_extraction_options:
              type: object
              title: Table Extraction Options
              description: |
                Options for table extraction
              properties:
                include_additional_text:
                  type: boolean
                  title: Include Additional Text
                  default: true
                  description: >
                    Attempts to merge text within the table bounding box but
                    missed by the table due to misalignment issues.
                model_selection:
                  type: string
                  title: Model Selection
                  default: pixels > 500 -> deformable_detr; table_transformer
                  description: >
                    An expression to instruct DocParse how to select the table
                    model to use for extraction. Default is `"pixels > 500 ->
                    deformable_detr; table_transformer"`, which means "if the
                    largest dimension of the table is more than 500 pixels, use
                    deformable_detr; otherwise use table_transformer." To use
                    only deformable_detr or table_transformer, set
                    `model_selection="deformable_detr"` or
                    `model_selection="table_transformer"`. Refer to the [full
                    documentation](processing_options) for more details.
            summarize_images:
              type: boolean
              title: Summarize Images
              default: false
              description: >
                (PAYG Only) A boolean value indicating whether to summarize
                images detected in the document and return them as the text
                representation of the image elements.
            pipeline:
              type: string
              enum:
                - standard
                - vision
              default: standard
              title: Pipeline
              description: >
                The parsing pipeline to use in DocParse. Defaults to `standard`,
                which keeps existing behavior. When set to `vision`, DocParse
                will use a VLM to parse the provided document and will ignore
                the values of the `threshold`, `text_mode`, `table_mode`,
                `text_extraction_options`, `table_extraction_options`, and
                `extract_images` parameters.
            text_mode:
              oneOf:
                - type: string
                  enum:
                    - inline_fallback_to_ocr
                    - ocr_standard
                    - ocr_vision
                    - auto
                  default: auto
                - type: string
                  enum:
                    - vision_ocr
                    - standard_ocr
                    - inline
                    - fine_grained
                  deprecated: true
              title: Text Mode
              description: >
                The mode to use for text extraction. Defaults to `auto`, which
                intelligently uses the best combination of OCR and inline text.
                Note that the `vision_ocr` mode is only available for PAYG
                users.
            table_mode:
              type: string
              enum:
                - none
                - standard
                - vision
                - custom
              default: standard
              title: Table Mode
              description: >
                The mode to use for table structure extraction. Defaults to
                `none`, which will not extract table structure. Note that the
                `vision` mode is only available for PAYG users.
            text_extraction_options:
              type: object
              title: Text Extraction Options
              description: |
                Options for text extraction
              properties:
                ocr_text_mode:
                  type: string
                  enum:
                    - vision
                    - standard
                  title: OCR Text Mode
                  deprecated: true
                  description: >
                    The mode to use for OCR text extraction on non-table
                    elements. Defaults to `standard`. Note that the `vision`
                    mode is only available for PAYG users.
                remove_line_breaks:
                  type: boolean
                  default: true
                  title: Remove Line Breaks
                  description: >
                    A boolean value indicating whether to remove line breaks
                    from the extracted text.
            ocr_language:
              type: string
              enum:
                - abaza
                - adyghe
                - afrikaans
                - albanian
                - angika
                - arabic
                - avar
                - azerbaijani
                - belarusian
                - bhojpuri
                - bihari
                - bosnian
                - bulgarian
                - chinese
                - chinese_traditional
                - croatian
                - czech
                - danish
                - dargwa
                - dutch
                - english
                - estonian
                - french
                - german
                - hindi
                - hungarian
                - icelandic
                - indonesian
                - ingush
                - irish
                - italian
                - japanese
                - kabardian
                - korean
                - konkani
                - kurdish
                - lak
                - latvian
                - lezghian
                - lithuanian
                - magahi
                - maithili
                - malay
                - maltese
                - maori
                - marathi
                - mongolian
                - nagpuri
                - nepali
                - newari
                - norwegian
                - occitan
                - persian
                - polish
                - portuguese
                - romanian
                - russian
                - serbian_cyrillic
                - serbian_latin
                - slovak
                - slovenian
                - spanish
                - swahili
                - swedish
                - tabassaran
                - tagalog
                - tamil
                - telugu
                - turkish
                - ukrainian
                - urdu
                - uyghur
                - uzbek
                - vietnamese
                - welsh
              title: OCR Language
              default: english
              description: |
                The language to use for OCR. Defaults to `english`.
            threshold:
              anyOf:
                - type: string
                  enum:
                    - auto
                - type: number
                  minimum: 0
                  maximum: 1
              title: Threshold
              default: auto
              description: >
                A number between 0 and 1 indicating the threshold for document
                segmentation. Defaults to auto, which uses an automatic
                threshold.
            chunking_options:
              type: object
              properties:
                strategy:
                  type: string
                  enum:
                    - context_rich
                    - mixed_multi_column
                    - maximize_within_limit
                  title: Strategy
                  description: >
                    The strategy to use for merging chunks. Defaults to
                    context_rich.
                tokenizer:
                  type: string
                  enum:
                    - openai_tokenizer
                    - character_tokenizer
                    - huggingface_tokenizer
                  title: Tokenizer
                  description: >
                    The tokenizer to use for chunking. Defaults to
                    openai_tokenizer.
                tokenizer_options:
                  type: object
                  description: The options for the tokenizer.
                  properties:
                    model_name:
                      type: string
                      default: text-embedding-3-small
                      title: Model Name
                      description: >
                        The model to use for the tokenizer. Supports all
                        tokenizers tiktoken and huggingface's transformers
                        support so long as they do not run remote code
                max_tokens:
                  type: integer
                  title: Max Tokens
                  description: |
                    The maximum number of tokens per chunk. Defaults to 512.
                merge_across_pages:
                  type: boolean
                  title: Merge Across Pages
                  description: >
                    A boolean value indicating whether to merge chunks across
                    pages. Defaults to true. Not supported for the
                    'mixed_multi_column' strategy.
              title: Chunking Options
              description: >
                The options for chunking the document. If not specified, then
                chunking will not be performed.
            output_format:
              type: string
              enum:
                - json
                - markdown
                - html
              title: Output Format
              default: json
              description: |
                The format of the output. Defaults to json.
            output_label_options:
              type: object
              properties:
                title_candidate_elements:
                  type: array
                  items:
                    type: string
                  title: title_candidate_elements
                  description: >
                    An array of strings representing the elements that should be
                    considered as title candidates. Defaults to
                    ["Section-header", "Caption"]
                promote_title:
                  type: boolean
                  title: promote_title
                  default: false
                  description: >
                    A boolean that specifies whether to promote an element to
                    title. Defaults to false.
                orientation_correction:
                  type: boolean
                  title: orientation_correction
                  default: false
                  description: >
                    A boolean value indicating whether to correct the
                    orientation of the pages. Defaults to false.
              title: Output Label Options
              description: >
                A dictionary of options to specify which heuristic to apply to
                enforce certain label outputs.
            markdown_options:
              type: object
              properties:
                include_pagenum:
                  type: boolean
                  title: Include Page Numbers
                  default: false
                  description: >
                    A boolean value indicating whether to include page numbers
                    in the markdown output. Defaults to false.
                include_headers:
                  type: boolean
                  title: Include Headers
                  default: false
                  description: >
                    A boolean value indicating whether to include page headers
                    in the markdown output. Defaults to false.
                include_footers:
                  type: boolean
                  title: Include Footers
                  default: false
                  description: >
                    A boolean value indicating whether to include page footers
                    in the markdown output. Defaults to false.
              title: Markdown Options
              description: >
                A dictionary of options to specify what to include in the
                markdown output.
            extract_table_structure:
              type: boolean
              title: Extract Table Structure
              default: true
              description: >
                Use `table_mode` instead. A boolean value indicating whether to
                extract table structure from the document. This means detecting
                cells of a table broken into rows and columns.
              deprecated: true
            use_ocr:
              type: boolean
              title: Use OCR
              deprecated: true
              description: >
                Use `text_mode` instead. A boolean value indicating whether to
                use OCR or not on the document.
            extract_image_format:
              deprecated: true
              type: string
              enum:
                - ppm
                - png
                - jpeg
              title: Extract Image Format
              default: ppm
              description: |
                The format to use for extracted images. Defaults to ppm.
      type: object
      title: Body__partition_document_sync_v1
    PartitionerResponse:
      properties:
        status:
          items:
            type: string
          type: array
          title: Status
        status_code:
          type: integer
          title: Status Code
        error:
          anyOf:
            - type: string
            - type: 'null'
          title: Error
          description: |
            The error message if the partitioning is not successful.
        elements:
          anyOf:
            - items:
                $ref: '#/components/schemas/Element'
              type: array
            - type: 'null'
          title: Elements
        markdown:
          anyOf:
            - type: string
            - type: 'null'
          title: Markdown
      type: object
      required:
        - status
        - status_code
        - elements
        - markdown
      title: PartitionerResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    Element:
      properties:
        type:
          type: string
          title: Type
          description: |
            The type of the element.
        bbox:
          items:
            type: number
          type: array
          title: Bbox
          description: |
            The bounding box of the element.
        properties:
          type: object
          title: Properties
          description: |
            The properties of the element.
        text_representation:
          type: string
          title: Text Representation
          description: |
            The text representation of the element.
        binary_representation:
          anyOf:
            - type: string
              format: binary
            - type: 'null'
          title: Binary Representation
          description: |
            The binary representation of the element.
      type: object
      required:
        - type
        - bbox
        - properties
        - text_representation
        - binary_representation
      title: Element
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
  securitySchemes:
    HTTPBearer:
      type: http
      scheme: bearer

````