> ## Documentation Index
> Fetch the complete documentation index at: https://docs.wandb.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# RLトレーニング ジョブを作成

> 新しい RL（強化学習）トレーニングジョブを作成します。



## OpenAPI

````yaml /ja/serverless-training/api-reference/openapi.json post /v1/preview/training-jobs
openapi: 3.1.0
info:
  title: Serverless Training
  version: 1.0.0
servers: []
security: []
paths:
  /v1/preview/training-jobs:
    post:
      tags:
        - training-jobs
      summary: RLトレーニング ジョブを作成
      description: 新しい RL（強化学習）トレーニングジョブを作成します。
      operationId: Create_RL_Training_Job_v1_preview_training_jobs_post
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateTrainingJob'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TrainingJobResponse'
          description: 正常なレスポンス
        '422':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
          description: 検証エラー
      security:
        - HTTPBearer: []
components:
  schemas:
    CreateTrainingJob:
      description: 新しい TrainingJob を作成するためのスキーマ。
      properties:
        experimental_config:
          anyOf:
            - $ref: '#/components/schemas/ExperimentalTrainingConfig'
            - type: 'null'
        model_id:
          format: uuid
          title: Model Id
          type: string
        trajectory_groups:
          items:
            $ref: '#/components/schemas/TrajectoryGroup'
          title: Trajectory Groups
          type: array
      required:
        - model_id
        - trajectory_groups
      title: CreateTrainingJob
      type: object
    TrainingJobResponse:
      description: TrainingJobレスポンスのスキーマ。
      properties:
        id:
          format: uuid
          title: Id
          type: string
      required:
        - id
      title: TrainingJobResponse
      type: object
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          title: Detail
          type: array
      title: HTTPValidationError
      type: object
    ExperimentalTrainingConfig:
      description: 実験的なトレーニング設定のスキーマ。
      properties:
        advantage_balance:
          anyOf:
            - type: number
            - type: 'null'
          title: Advantage Balance
        allow_training_without_logprobs:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Allow Training Without Logprobs
        epsilon:
          anyOf:
            - type: number
            - type: 'null'
          title: Epsilon
        epsilon_high:
          anyOf:
            - type: number
            - type: 'null'
          title: Epsilon High
        importance_sampling_level:
          anyOf:
            - enum:
                - token
                - sequence
                - average
                - geometric_average
              type: string
            - type: 'null'
          title: Importance Sampling Level
        kimi_k2_tau:
          anyOf:
            - type: number
            - type: 'null'
          title: Kimi K2 Tau
        kl_penalty_coef:
          anyOf:
            - type: number
            - type: 'null'
          title: Kl Penalty Coef
        kl_penalty_reference_step:
          anyOf:
            - type: integer
            - type: 'null'
          title: Kl Penalty Reference Step
        kl_penalty_source:
          anyOf:
            - enum:
                - current_learner
                - sample
              type: string
            - type: 'null'
          title: Kl Penalty Source
        kl_penalty_step_lag:
          anyOf:
            - type: integer
            - type: 'null'
          title: Kl Penalty Step Lag
        kl_ref_adapter_path:
          anyOf:
            - type: string
            - type: 'null'
          title: Kl Ref Adapter Path
        learning_rate:
          anyOf:
            - type: number
            - type: 'null'
          title: Learning Rate
        logprob_calculation_chunk_size:
          anyOf:
            - type: integer
            - type: 'null'
          title: Logprob Calculation Chunk Size
        loss_fn:
          anyOf:
            - enum:
                - cispo
                - ppo
              type: string
            - type: 'null'
          title: Loss Fn
        mask_prob_ratio:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Mask Prob Ratio
        max_negative_advantage_importance_sampling_weight:
          anyOf:
            - type: number
            - type: 'null'
          title: Max Negative Advantage Importance Sampling Weight
        normalize_advantages:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Normalize Advantages
        num_trajectories_learning_rate_multiplier_power:
          anyOf:
            - type: number
            - type: 'null'
          title: Num Trajectories Learning Rate Multiplier Power
        packed_sequence_length:
          anyOf:
            - type: integer
            - type: 'null'
          title: Packed Sequence Length
        plot_tensors:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Plot Tensors
        ppo:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Ppo
        precalculate_logprobs:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Precalculate Logprobs
        scale_learning_rate_by_reward_std_dev:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Scale Learning Rate By Reward Std Dev
        scale_rewards:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Scale Rewards
        truncated_importance_sampling:
          anyOf:
            - type: number
            - type: 'null'
          title: Truncated Importance Sampling
      title: ExperimentalTrainingConfig
      type: object
    TrajectoryGroup:
      properties:
        trajectories:
          items:
            $ref: '#/components/schemas/Trajectory'
          title: Trajectories
          type: array
      required:
        - trajectories
      title: TrajectoryGroup
      type: object
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          title: Location
          type: array
        msg:
          title: Message
          type: string
        type:
          title: Error Type
          type: string
      required:
        - loc
        - msg
        - type
      title: ValidationError
      type: object
    Trajectory:
      properties:
        additional_histories:
          default: []
          items:
            $ref: '#/components/schemas/History'
          title: Additional Histories
          type: array
        final_policy_version:
          anyOf:
            - type: integer
            - type: 'null'
          title: Final Policy Version
        initial_policy_version:
          anyOf:
            - type: integer
            - type: 'null'
          title: Initial Policy Version
        logs:
          default: []
          items:
            type: string
          title: Logs
          type: array
        messages_and_choices:
          default: []
          items:
            anyOf:
              - $ref: '#/components/schemas/ChatCompletionDeveloperMessageParam'
              - $ref: '#/components/schemas/ChatCompletionSystemMessageParam'
              - $ref: '#/components/schemas/ChatCompletionUserMessageParam'
              - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
              - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
              - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
              - $ref: '#/components/schemas/Choice'
          title: Messages And Choices
          type: array
        metadata:
          additionalProperties:
            anyOf:
              - type: number
              - type: integer
              - type: string
              - type: boolean
              - type: 'null'
          default: {}
          title: Metadata
          type: object
        metrics:
          additionalProperties:
            anyOf:
              - type: number
              - type: integer
              - type: boolean
          default: {}
          title: Metrics
          type: object
        reward:
          default: 0
          title: Reward
          type: number
        start_time:
          format: date-time
          title: Start Time
          type: string
        tools:
          anyOf:
            - items:
                $ref: '#/components/schemas/ChatCompletionFunctionToolParam'
              type: array
            - type: 'null'
          title: Tools
      title: Trajectory
      type: object
    History:
      properties:
        messages_and_choices:
          items:
            anyOf:
              - $ref: '#/components/schemas/ChatCompletionDeveloperMessageParam'
              - $ref: '#/components/schemas/ChatCompletionSystemMessageParam'
              - $ref: '#/components/schemas/ChatCompletionUserMessageParam'
              - $ref: '#/components/schemas/ChatCompletionAssistantMessageParam'
              - $ref: '#/components/schemas/ChatCompletionToolMessageParam'
              - $ref: '#/components/schemas/ChatCompletionFunctionMessageParam'
              - $ref: '#/components/schemas/Choice'
          title: Messages And Choices
          type: array
        tools:
          anyOf:
            - items:
                $ref: '#/components/schemas/ChatCompletionFunctionToolParam'
              type: array
            - type: 'null'
          title: Tools
      required:
        - messages_and_choices
      title: History
      type: object
    ChatCompletionDeveloperMessageParam:
      description: >-
        ユーザーが送信したメッセージにかかわらず、モデルが従うべき開発者指定の指示です。o1 モデル以降では、`developer` メッセージが従来の
        `system` メッセージに置き換わります。
      properties:
        content:
          anyOf:
            - type: string
            - items:
                $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
              type: array
          title: Content
        name:
          title: Name
          type: string
        role:
          const: developer
          title: Role
          type: string
      required:
        - content
        - role
      title: ChatCompletionDeveloperMessageParam
      type: object
    ChatCompletionSystemMessageParam:
      description: >-
        ユーザーが送信する messages に関係なく、モデルが従うべき開発者指定の指示です。o1 models 以降では、この用途には代わりに
        `developer` messages を使用してください。
      properties:
        content:
          anyOf:
            - type: string
            - items:
                $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
              type: array
          title: Content
        name:
          title: Name
          type: string
        role:
          const: system
          title: Role
          type: string
      required:
        - content
        - role
      title: ChatCompletionSystemMessageParam
      type: object
    ChatCompletionUserMessageParam:
      description: プロンプトまたは追加のコンテキスト情報を含む、エンドユーザーが送信する messages。
      properties:
        content:
          anyOf:
            - type: string
            - items:
                anyOf:
                  - $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
                  - $ref: '#/components/schemas/ChatCompletionContentPartImageParam'
                  - $ref: >-
                      #/components/schemas/ChatCompletionContentPartInputAudioParam
                  - $ref: '#/components/schemas/File'
              type: array
          title: Content
        name:
          title: Name
          type: string
        role:
          const: user
          title: Role
          type: string
      required:
        - content
        - role
      title: ChatCompletionUserMessageParam
      type: object
    ChatCompletionAssistantMessageParam:
      description: ユーザーメッセージへの応答としてモデルが送信したメッセージ。
      properties:
        audio:
          anyOf:
            - $ref: '#/components/schemas/Audio'
            - type: 'null'
        content:
          anyOf:
            - type: string
            - items:
                anyOf:
                  - $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
                  - $ref: '#/components/schemas/ChatCompletionContentPartRefusalParam'
              type: array
            - type: 'null'
          title: Content
        function_call:
          anyOf:
            - $ref: >-
                #/components/schemas/openai__types__chat__chat_completion_assistant_message_param__FunctionCall
            - type: 'null'
        name:
          title: Name
          type: string
        refusal:
          anyOf:
            - type: string
            - type: 'null'
          title: Refusal
        role:
          const: assistant
          title: Role
          type: string
        tool_calls:
          items:
            anyOf:
              - $ref: >-
                  #/components/schemas/ChatCompletionMessageFunctionToolCallParam
              - $ref: '#/components/schemas/ChatCompletionMessageCustomToolCallParam'
          title: Tool Calls
          type: array
      required:
        - role
      title: ChatCompletionAssistantMessageParam
      type: object
    ChatCompletionToolMessageParam:
      properties:
        content:
          anyOf:
            - type: string
            - items:
                $ref: '#/components/schemas/ChatCompletionContentPartTextParam'
              type: array
          title: Content
        role:
          const: tool
          title: Role
          type: string
        tool_call_id:
          title: Tool Call Id
          type: string
      required:
        - content
        - role
        - tool_call_id
      title: ChatCompletionToolMessageParam
      type: object
    ChatCompletionFunctionMessageParam:
      properties:
        content:
          anyOf:
            - type: string
            - type: 'null'
          title: Content
        name:
          title: Name
          type: string
        role:
          const: function
          title: Role
          type: string
      required:
        - content
        - name
        - role
      title: ChatCompletionFunctionMessageParam
      type: object
    Choice:
      additionalProperties: true
      properties:
        finish_reason:
          enum:
            - stop
            - length
            - tool_calls
            - content_filter
            - function_call
          title: Finish Reason
          type: string
        index:
          title: Index
          type: integer
        logprobs:
          anyOf:
            - $ref: '#/components/schemas/ChoiceLogprobs'
            - type: 'null'
        message:
          $ref: '#/components/schemas/ChatCompletionMessage'
      required:
        - finish_reason
        - index
        - message
      title: Choice
      type: object
    ChatCompletionFunctionToolParam:
      description: 応答の生成に使用できる function tool です。
      properties:
        function:
          $ref: '#/components/schemas/FunctionDefinition'
        type:
          const: function
          title: Type
          type: string
      required:
        - function
        - type
      title: ChatCompletionFunctionToolParam
      type: object
    ChatCompletionContentPartTextParam:
      description: >-
        [テキスト入力](https://platform.openai.com/docs/guides/text-generation)についてはこちらをご覧ください。
      properties:
        text:
          title: Text
          type: string
        type:
          const: text
          title: Type
          type: string
      required:
        - text
        - type
      title: ChatCompletionContentPartTextParam
      type: object
    ChatCompletionContentPartImageParam:
      description: '[画像入力](https://platform.openai.com/docs/guides/vision)について詳しくはこちら。'
      properties:
        image_url:
          $ref: '#/components/schemas/ImageURL'
        type:
          const: image_url
          title: Type
          type: string
      required:
        - image_url
        - type
      title: ChatCompletionContentPartImageParam
      type: object
    ChatCompletionContentPartInputAudioParam:
      description: '[オーディオ入力](https://platform.openai.com/docs/guides/audio)についてはこちらをご覧ください。'
      properties:
        input_audio:
          $ref: '#/components/schemas/InputAudio'
        type:
          const: input_audio
          title: Type
          type: string
      required:
        - input_audio
        - type
      title: ChatCompletionContentPartInputAudioParam
      type: object
    File:
      description: >-
        テキスト生成の [file inputs](https://platform.openai.com/docs/guides/text)
        について確認してください。
      properties:
        file:
          $ref: '#/components/schemas/FileFile'
        type:
          const: file
          title: Type
          type: string
      required:
        - file
        - type
      title: File
      type: object
    Audio:
      description: |-
        モデルからの過去のオーディオレスポンスに関するデータ。
        [詳細はこちら](https://platform.openai.com/docs/guides/audio)。
      properties:
        id:
          title: Id
          type: string
      required:
        - id
      title: Audio
      type: object
    ChatCompletionContentPartRefusalParam:
      properties:
        refusal:
          title: Refusal
          type: string
        type:
          const: refusal
          title: Type
          type: string
      required:
        - refusal
        - type
      title: ChatCompletionContentPartRefusalParam
      type: object
    openai__types__chat__chat_completion_assistant_message_param__FunctionCall:
      description: |-
        非推奨であり、`tool_calls` に置き換えられました。

        モデルによって生成された、呼び出す対象の関数の名前と引数です。
      properties:
        arguments:
          title: Arguments
          type: string
        name:
          title: Name
          type: string
      required:
        - arguments
        - name
      title: FunctionCall
      type: object
    ChatCompletionMessageFunctionToolCallParam:
      description: モデルによって作成された function tool への call です。
      properties:
        function:
          $ref: >-
            #/components/schemas/openai__types__chat__chat_completion_message_function_tool_call_param__Function
        id:
          title: Id
          type: string
        type:
          const: function
          title: Type
          type: string
      required:
        - id
        - function
        - type
      title: ChatCompletionMessageFunctionToolCallParam
      type: object
    ChatCompletionMessageCustomToolCallParam:
      description: モデルによって作成されたカスタム tool への call です。
      properties:
        custom:
          $ref: >-
            #/components/schemas/openai__types__chat__chat_completion_message_custom_tool_call_param__Custom
        id:
          title: Id
          type: string
        type:
          const: custom
          title: Type
          type: string
      required:
        - id
        - custom
        - type
      title: ChatCompletionMessageCustomToolCallParam
      type: object
    ChoiceLogprobs:
      additionalProperties: true
      description: この choice の対数確率情報。
      properties:
        content:
          anyOf:
            - items:
                $ref: '#/components/schemas/ChatCompletionTokenLogprob'
              type: array
            - type: 'null'
          title: Content
        refusal:
          anyOf:
            - items:
                $ref: '#/components/schemas/ChatCompletionTokenLogprob'
              type: array
            - type: 'null'
          title: Refusal
      title: ChoiceLogprobs
      type: object
    ChatCompletionMessage:
      additionalProperties: true
      description: モデルによって生成された chat completion メッセージです。
      properties:
        annotations:
          anyOf:
            - items:
                $ref: '#/components/schemas/Annotation'
              type: array
            - type: 'null'
          title: Annotations
        audio:
          anyOf:
            - $ref: '#/components/schemas/ChatCompletionAudio'
            - type: 'null'
        content:
          anyOf:
            - type: string
            - type: 'null'
          title: Content
        function_call:
          anyOf:
            - $ref: '#/components/schemas/FunctionCall'
            - type: 'null'
        refusal:
          anyOf:
            - type: string
            - type: 'null'
          title: Refusal
        role:
          const: assistant
          title: Role
          type: string
        tool_calls:
          anyOf:
            - items:
                anyOf:
                  - $ref: '#/components/schemas/ChatCompletionMessageFunctionToolCall'
                  - $ref: '#/components/schemas/ChatCompletionMessageCustomToolCall'
              type: array
            - type: 'null'
          title: Tool Calls
      required:
        - role
      title: ChatCompletionMessage
      type: object
    FunctionDefinition:
      properties:
        description:
          title: Description
          type: string
        name:
          title: Name
          type: string
        parameters:
          additionalProperties: true
          title: Parameters
          type: object
        strict:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Strict
      required:
        - name
      title: FunctionDefinition
      type: object
    ImageURL:
      properties:
        detail:
          enum:
            - auto
            - low
            - high
          title: Detail
          type: string
        url:
          title: Url
          type: string
      required:
        - url
      title: ImageURL
      type: object
    InputAudio:
      properties:
        data:
          title: Data
          type: string
        format:
          enum:
            - wav
            - mp3
          title: Format
          type: string
      required:
        - data
        - format
      title: InputAudio
      type: object
    FileFile:
      properties:
        file_data:
          title: File Data
          type: string
        file_id:
          title: File Id
          type: string
        filename:
          title: Filename
          type: string
      title: FileFile
      type: object
    openai__types__chat__chat_completion_message_function_tool_call_param__Function:
      description: モデルが呼び出した関数です。
      properties:
        arguments:
          title: Arguments
          type: string
        name:
          title: Name
          type: string
      required:
        - arguments
        - name
      title: Function
      type: object
    openai__types__chat__chat_completion_message_custom_tool_call_param__Custom:
      description: モデルが呼び出したカスタムツールです。
      properties:
        input:
          title: Input
          type: string
        name:
          title: Name
          type: string
      required:
        - input
        - name
      title: Custom
      type: object
    ChatCompletionTokenLogprob:
      additionalProperties: true
      properties:
        bytes:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          title: Bytes
        logprob:
          title: Logprob
          type: number
        token:
          title: Token
          type: string
        top_logprobs:
          items:
            $ref: '#/components/schemas/TopLogprob'
          title: Top Logprobs
          type: array
      required:
        - token
        - logprob
        - top_logprobs
      title: ChatCompletionTokenLogprob
      type: object
    Annotation:
      additionalProperties: true
      description: web search を使用する際の URL 引用。
      properties:
        type:
          const: url_citation
          title: Type
          type: string
        url_citation:
          $ref: '#/components/schemas/AnnotationURLCitation'
      required:
        - type
        - url_citation
      title: Annotation
      type: object
    ChatCompletionAudio:
      additionalProperties: true
      description: >-
        オーディオ出力モダリティが要求された場合、このオブジェクトには

        モデルからのオーディオレスポンスに関するデータが含まれます。[詳細はこちら](https://platform.openai.com/docs/guides/audio)。
      properties:
        data:
          title: Data
          type: string
        expires_at:
          title: Expires At
          type: integer
        id:
          title: Id
          type: string
        transcript:
          title: Transcript
          type: string
      required:
        - id
        - data
        - expires_at
        - transcript
      title: ChatCompletionAudio
      type: object
    FunctionCall:
      additionalProperties: true
      description: |-
        非推奨であり、`tool_calls` に置き換えられました。

        モデルによって生成された、呼び出す対象の関数の名前と引数です。
      properties:
        arguments:
          title: Arguments
          type: string
        name:
          title: Name
          type: string
      required:
        - arguments
        - name
      title: FunctionCall
      type: object
    ChatCompletionMessageFunctionToolCall:
      additionalProperties: true
      description: モデルによって作成された function tool への call です。
      properties:
        function:
          $ref: '#/components/schemas/Function'
        id:
          title: Id
          type: string
        type:
          const: function
          title: Type
          type: string
      required:
        - id
        - function
        - type
      title: ChatCompletionMessageFunctionToolCall
      type: object
    ChatCompletionMessageCustomToolCall:
      additionalProperties: true
      description: モデルによって作成されたカスタム tool への call です。
      properties:
        custom:
          $ref: '#/components/schemas/Custom'
        id:
          title: Id
          type: string
        type:
          const: custom
          title: Type
          type: string
      required:
        - id
        - custom
        - type
      title: ChatCompletionMessageCustomToolCall
      type: object
    TopLogprob:
      additionalProperties: true
      properties:
        bytes:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          title: Bytes
        logprob:
          title: Logprob
          type: number
        token:
          title: Token
          type: string
      required:
        - token
        - logprob
      title: TopLogprob
      type: object
    AnnotationURLCitation:
      additionalProperties: true
      description: web search を使用する際の URL 引用。
      properties:
        end_index:
          title: End Index
          type: integer
        start_index:
          title: Start Index
          type: integer
        title:
          title: Title
          type: string
        url:
          title: Url
          type: string
      required:
        - end_index
        - start_index
        - title
        - url
      title: AnnotationURLCitation
      type: object
    Function:
      additionalProperties: true
      description: モデルが呼び出した関数。
      properties:
        arguments:
          title: Arguments
          type: string
        name:
          title: Name
          type: string
      required:
        - arguments
        - name
      title: Function
      type: object
    Custom:
      additionalProperties: true
      description: モデルが call した custom tool。
      properties:
        input:
          title: Input
          type: string
        name:
          title: Name
          type: string
      required:
        - input
        - name
      title: Custom
      type: object
  securitySchemes:
    HTTPBearer:
      scheme: bearer
      type: http

````