13 changed files with 0 additions and 52331 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,38 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text
-tokenizer.json filter=lfs diff=lfs merge=lfs -text
-model.safetensors filter=lfs diff=lfs merge=lfs -text
-tokenizer.model filter=lfs diff=lfs merge=lfs -text
--- a/1/model.py
+++ b/1/model.py
@ -1,243 +0,0 @@
-import triton_python_backend_utils as pb_utils
-from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
-import numpy as np
-import json
-import os
-
-class TritonPythonModel:
-    def initialize(self, args):
-        """
-        모델이 로드될 때 딱 한 번만 호출됩니다.
-        `initialize` 함수를 구현하는 것은 선택 사항입니다. 이 함수를 통해 모델은
-        이 모델과 관련된 모든 상태를 초기화할 수 있습니다.
-        """
-        self.logger = pb_utils.Logger
-
-        current_file_path = os.path.abspath(__file__)
-        self.logger.log_info(f"current_file_path: {current_file_path}")
-
-
-        self.model_name = args["model_name"]
-        model_repository = args["model_repository"]
-        model_path = f"{model_repository}/{self.model_name}"
-        #model_path = "/cheetah/input/model/gemma-3-1b-it/gemma-3-1b-it"
-
-        input_model_path = model_path
-
-        if os.path.exists(input_model_path):
-            file_list = os.listdir(input_model_path)
-            self.logger.log_info(f"'{input_model_path}' 디렉토리의 파일 목록:")
-        for file_name in file_list:
-            self.logger.log_info(file_name)
-        else:
-            self.logger.log_info(f"'{input_model_path}' 디렉토리가 존재하지 않습니다.")
-
-        self.logger.log_info(f"model_repository: {model_repository}")
-        self.logger.log_info(f"model_path: {model_path}")
-
-        self.model_config = json.loads(args["model_config"])
-
-        # Hugging Face Transformers 라이브러리에서 사전 학습된 토크나이저를 로드합니다.
-        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
-        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
-        self.supports_chat_template = self._check_chat_template_support()
-
-        # Hugging Face Transformers 라이브러리에서 사전 학습된 언어 모델을 로드합니다.
-        self.model = AutoModelForCausalLM.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            local_files_only=True,
-            device_map="auto"
-        )
-
-        self.enable_inference_trace = self._get_inference_trace_setting()
-
-        self.logger.log_info(f"'{self.model_name}' 모델 초기화 완료")
-
-
-    def execute(self, requests):
-        """
-        Triton이 각 추론 요청에 대해 호출하는 실행 함수입니다.
-        """
-        responses = []
-
-        # 각 추론 요청을 순회하며 처리합니다.
-        for request in requests:
-            # Triton 입력 파싱            
-            input_text = self._get_input_value(request, "text_input")
-            
-            text = ""
-            conversation = ""
-            input_token_length = 0  # 입력 토큰 길이를 저장할 변수
-
-            # 입력 텍스트가 JSON 형식의 대화 기록인지 확인합니다.
-            try:
-                conversation = json.loads(input_text)
-                is_chat = True
-                self.logger.log_info(f"입력 conversation 출력:\n{conversation}")
-            except:
-                # JSON 파싱에 실패하면 일반 텍스트로 처리합니다.
-                text = input_text
-                is_chat = False
-                self.logger.log_info(f"입력 text 출력:\n{text}")
-            
-            # 입력 텍스트를 토큰화합니다.
-            if self.supports_chat_template and is_chat:
-                self.logger.log_info(f"Chat 템플릿을 적용하여 토큰화합니다.")
-                inputs = self.tokenizer.apply_chat_template(
-                                            conversation,
-                                            tokenize=True,
-                                            add_generation_prompt=True,
-                                            return_tensors="pt",
-                                            return_dict=True
-                                            ).to(device=self.model.device)
-            else:
-                self.logger.log_info(f"입력 텍스트를 토큰화합니다.")
-                inputs = self.tokenizer(
-                            text,
-                            return_tensors="pt").to(device=self.model.device)
-
-            input_ids = inputs["input_ids"]
-            attention_mask = inputs["attention_mask"]
-            input_token_length = inputs["input_ids"].shape[-1]
-
-
-            # 언어 모델을 사용하여 텍스트를 생성합니다.
-            gened = self.model.generate(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                generation_config=self._process_generation_config(request),
-                pad_token_id=self.tokenizer.pad_token_id,
-            )
-
-            # 생성된 토큰 시퀀스를 텍스트로 디코딩하고 입력 텍스트는 제외합니다.
-            generated_tokens = gened[0][input_token_length:] # 입력 토큰 이후부터 슬라이싱
-            gened_text = self.tokenizer.decode(generated_tokens, skip_special_tokens=True)
-            self.logger.log_info(f"모델이 생성한 토큰 시퀀스 (입력 텍스트 제외):\n{gened_text}")
-
-            output = gened_text.strip()
-
-            # 생성된 텍스트를 Triton 출력 텐서로 변환합니다.
-            output_tensor = pb_utils.Tensor("text_output", np.array(output.encode('utf-8'), dtype=np.bytes_))
-
-            # 응답 객체를 생성하고 출력 텐서를 추가합니다.
-            responses.append(pb_utils.InferenceResponse(output_tensors=[output_tensor]))
-
-        return responses
-    
-    def _process_generation_config(self, request):
-        """
-        추론 요청에서 생성 설정 관련 파라미터들을 추출하여 GenerationConfig 객체를 생성합니다.
-
-        Args:
-            request (pb_utils.InferenceRequest): Triton 추론 요청 객체.
-
-        Returns:
-            transformers.GenerationConfig: GenerationConfig 객체.
-        """
-        max_length = self._get_input_value(request, "max_length", default=20)
-        max_new_tokens = self._get_input_value(request, "max_new_tokens")
-        temperature = self._get_input_value(request, "temperature")
-        do_sample = self._get_input_value(request, "do_sample")
-        top_k = self._get_input_value(request, "top_k")
-        top_p = self._get_input_value(request, "top_p")
-        repetition_penalty = self._get_input_value(request, "repetition_penalty")
-        stream = self._get_input_value(request, "stream")
-
-        generation_config = GenerationConfig(
-            max_length=max_length,
-            max_new_tokens=max_new_tokens,
-            temperature=temperature,
-            do_sample=do_sample,
-            top_k=top_k,
-            top_p=top_p,
-            repetition_penalty=repetition_penalty,
-            stream=stream,
-        )
-
-        self.logger.log_info(f"추론 요청 GenerationConfig:\n{generation_config}")
-
-        return generation_config
-
-    def _get_inference_trace_setting(self):
-        """
-        모델 설정(config.pbxt)에서 'enable_inference_trace' 값을 추출하여 반환합니다.
-
-        'enable_inference_trace' 설정이 없거나, 올바른 형식이 아닌 경우 기본적으로 False를 반환합니다.
-
-        Returns:
-            bool: 추론 추적 활성화 여부 (True 또는 False).
-        """
-        parameters = self.model_config.get('parameters', {})
-        trace_config = parameters.get('enable_inference_trace')
-        if isinstance(trace_config, dict) and 'string_value' in trace_config:
-            return trace_config['string_value'].lower() == 'true'  # 문자열 값을 bool로 변환하여 반환
-        return False
-    
-
-    def _check_chat_template_support(self):
-      """
-      주어진 허깅페이스 Transformer 모델이 Chat 템플릿을 지원하는지 확인하고 결과를 출력합니다.
-      
-      Returns:
-            bool: Chat 템플릿 지원 여부 (True 또는 False).
-      """
-      try:
-          if hasattr(self.tokenizer, "chat_template") and self.tokenizer.chat_template is not None:
-              self.logger.log_info(f"'{self.model_name}' 모델의 토크나이저는 Chat 템플릿을 지원합니다.")
-              self.logger.log_info("Chat 템플릿 내용:")
-              self.logger.log_info(self.tokenizer.chat_template)
-              return True
-          else:
-              self.logger.log_info(f"'{self.model_name}' 모델의 토크나이저는 Chat 템플릿을 직접적으로 지원하지 않거나, Chat 템플릿 정보가 없습니다.")
-              return False
-      except Exception as e:
-          self.logger.log_info(f"'{self.model_name}' 모델의 토크나이저를 로드하는 동안 오류가 발생했습니다: {e}")
-          return False
-    
-
-    def _get_input_value(self, request, input_name: str, default=None):
-        """
-        Triton 추론 요청에서 특정 이름의 입력 텐서 값을 가져옵니다.
-
-        Args:
-            request (pb_utils.InferenceRequest): Triton 추론 요청 객체.
-            input_name (str): 가져올 입력 텐서의 이름.
-            default (any, optional): 입력 텐서가 없을 경우 반환할 기본값. Defaults to None.
-
-        Returns:
-            any: 디코딩된 입력 텐서의 값. 텐서가 없으면 기본값을 반환합니다.
-        """
-        tensor_value = pb_utils.get_input_tensor_by_name(request, input_name)
-
-        if tensor_value is None:
-            return default
-        
-        return self._np_decoder(tensor_value.as_numpy()[0])
-        
-    def _np_decoder(self, obj):
-        """
-        NumPy 객체의 데이터 타입을 확인하고 Python 기본 타입으로 변환합니다.
-
-        Args:
-            obj (numpy.ndarray element): 변환할 NumPy 배열의 요소.
-
-        Returns:
-            any: 해당 NumPy 요소에 대응하는 Python 기본 타입 (str, int, float, bool).
-                 bytes 타입인 경우 UTF-8로 디코딩합니다.
-        """
-        if isinstance(obj, bytes):
-           return obj.decode('utf-8')
-        if np.issubdtype(obj, np.integer):
-           return int(obj)
-        if np.issubdtype(obj, np.floating):
-           return round(float(obj), 3)
-        if isinstance(obj, np.bool_):
-           return bool(obj)
-
-    def finalize(self):
-        """
-        모델 실행이 완료된 후 Triton 서버가 종료될 때 한 번 호출되는 함수입니다.
-        `finalize` 함수를 구현하는 것은 선택 사항입니다. 이 함수를 통해 모델은
-        종료 전에 필요한 모든 정리 작업을 수행할 수 있습니다.
-        """
-        pass
--- a/config.pbtxt
+++ b/config.pbtxt
@ -1,89 +0,0 @@
-# Triton backend to use
-name: "gemma-3-1b-it"
-backend: "python"
-max_batch_size: 0
-
-# Triton should expect as input a single string
-# input of variable length named 'text_input'
-input [
-  {
-    name: "text_input"
-    data_type: TYPE_STRING
-    dims: [ -1 ]
-  },
-  {
-    name: "max_length"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "max_new_tokens"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "do_sample"
-    data_type: TYPE_BOOL
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "top_k"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "top_p"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "temperature"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "repetition_penalty"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    optional: true
-  },
-  {
-    name: "stream"
-    data_type: TYPE_BOOL
-    dims: [ 1 ]
-    optional: true
-  }
-]
-
-
-# Triton should expect to respond with a single string
-# output of variable length named 'text_output'
-output [ 
-  {
-    name: "text_output"
-    data_type: TYPE_STRING
-    dims: [ -1 ]
-  }
-]
-
-
-parameters: [
-  {
-    key: "enable_inference_trace",
-    value: {string_value: "False"}
-  }
-]
-
-instance_group [
-  {
-    kind: KIND_AUTO,
-    count: 1
-  }
-]
-
--- a/gemma-3-1b-it/README.md
+++ b/gemma-3-1b-it/README.md
@ -1,514 +0,0 @@
---
-license: gemma
-library_name: transformers
-pipeline_tag: text-generation
-extra_gated_heading: Access Gemma on Hugging Face
-extra_gated_prompt: To access Gemma on Hugging Face, you’re required to review and
-  agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging
-  Face and click below. Requests are processed immediately.
-extra_gated_button_content: Acknowledge license
-base_model: google/gemma-3-1b-pt
---
-
-# Gemma 3 model card
-
-**Model Page**: [Gemma](https://ai.google.dev/gemma/docs/core)
-
-**Resources and Technical Documentation**:
-
-* [Gemma 3 Technical Report][g3-tech-report]
-* [Responsible Generative AI Toolkit][rai-toolkit]
-* [Gemma on Kaggle][kaggle-gemma]
-* [Gemma on Vertex Model Garden][vertex-mg-gemma3]
-
-**Terms of Use**: [Terms][terms]
-
-**Authors**: Google DeepMind
-
-## Model Information
-
-Summary description and brief definition of inputs and outputs.
-
-### Description
-
-Gemma is a family of lightweight, state-of-the-art open models from Google,
-built from the same research and technology used to create the Gemini models.
-Gemma 3 models are multimodal, handling text and image input and generating text
-output, with open weights for both pre-trained variants and instruction-tuned
-variants. Gemma 3 has a large, 128K context window, multilingual support in over
-140 languages, and is available in more sizes than previous versions. Gemma 3
-models are well-suited for a variety of text generation and image understanding
-tasks, including question answering, summarization, and reasoning. Their
-relatively small size makes it possible to deploy them in environments with
-limited resources such as laptops, desktops or your own cloud infrastructure,
-democratizing access to state of the art AI models and helping foster innovation
-for everyone.
-
-### Inputs and outputs
-
-   **Input:**
-    -  Text string, such as a question, a prompt, or a document to be summarized
-    -  Images, normalized to 896 x 896 resolution and encoded to 256 tokens
-       each
-    -  Total input context of 128K tokens for the 4B, 12B, and 27B sizes, and
-       32K tokens for the 1B size
-
-   **Output:**
-    -   Generated text in response to the input, such as an answer to a
-        question, analysis of image content, or a summary of a document
-    -   Total output context of 8192 tokens
-
-### Usage
-
-Below, there are some code snippets on how to get quickly started with running the model. First, install the Transformers library. Gemma 3 is supported starting from transformers 4.50.0. 
-
-```sh
-$ pip install -U transformers
-```
-
-Then, copy the snippet from the section that is relevant for your use case.
-
-#### Running with the `pipeline` API
-
-With instruction-tuned models, you need to use chat templates to process our inputs first. Then, you can pass it to the pipeline.
-
-```python
-from transformers import pipeline
-import torch
-
-pipe = pipeline("text-generation", model="google/gemma-3-1b-it", device="cuda", torch_dtype=torch.bfloat16)
-
-messages = [
-    [
-        {
-            "role": "system",
-            "content": [{"type": "text", "text": "You are a helpful assistant."},]
-        },
-        {
-            "role": "user",
-            "content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"},]
-        },
-    ],
-]
-
-output = pipe(messages, max_new_tokens=50)
-```
-
-#### Running the model on a single / multi GPU
-
-```python
-from transformers import AutoTokenizer, BitsAndBytesConfig, Gemma3ForCausalLM
-import torch
-
-model_id = "google/gemma-3-1b-it"
-
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-
-model = Gemma3ForCausalLM.from_pretrained(
-    model_id, quantization_config=quantization_config
-).eval()
-
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-
-messages = [
-    [
-        {
-            "role": "system",
-            "content": [{"type": "text", "text": "You are a helpful assistant."},]
-        },
-        {
-            "role": "user",
-            "content": [{"type": "text", "text": "Write a poem on Hugging Face, the company"},]
-        },
-    ],
-]
-inputs = tokenizer.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=True,
-    return_dict=True,
-    return_tensors="pt",
-).to(model.device).to(torch.bfloat16)
-
-
-with torch.inference_mode():
-    outputs = model.generate(**inputs, max_new_tokens=64)
-
-outputs = tokenizer.batch_decode(outputs)
-```
-
-
-### Citation
-
-```none
-@article{gemma_2025,
-    title={Gemma 3},
-    url={https://goo.gle/Gemma3Report},
-    publisher={Kaggle},
-    author={Gemma Team},
-    year={2025}
-}
-```
-
-## Model Data
-
-Data used for model training and how the data was processed.
-
-### Training Dataset
-
-These models were trained on a dataset of text data that includes a wide variety
-of sources. The 27B model was trained with 14 trillion tokens, the 12B model was
-trained with 12 trillion tokens, 4B model was trained with 4 trillion tokens and
-1B with 2 trillion tokens. Here are the key components:
-
-   Web Documents: A diverse collection of web text ensures the model is
-    exposed to a broad range of linguistic styles, topics, and vocabulary. The
-    training dataset includes content in over 140 languages.
-   Code: Exposing the model to code helps it to learn the syntax and
-    patterns of programming languages, which improves its ability to generate
-    code and understand code-related questions.
-   Mathematics: Training on mathematical text helps the model learn logical
-    reasoning, symbolic representation, and to address mathematical queries.
-   Images: A wide range of images enables the model to perform image
-    analysis and visual data extraction tasks.
-
-The combination of these diverse data sources is crucial for training a powerful
-multimodal model that can handle a wide variety of different tasks and data
-formats.
-
-### Data Preprocessing
-
-Here are the key data cleaning and filtering methods applied to the training
-data:
-
-   CSAM Filtering: Rigorous CSAM (Child Sexual Abuse Material) filtering
-    was applied at multiple stages in the data preparation process to ensure
-    the exclusion of harmful and illegal content.
-   Sensitive Data Filtering: As part of making Gemma pre-trained models
-    safe and reliable, automated techniques were used to filter out certain
-    personal information and other sensitive data from training sets.
-   Additional methods: Filtering based on content quality and safety in
-    line with [our policies][safety-policies].
-
-## Implementation Information
-
-Details about the model internals.
-
-### Hardware
-
-Gemma was trained using [Tensor Processing Unit (TPU)][tpu] hardware (TPUv4p,
-TPUv5p and TPUv5e). Training vision-language models (VLMS) requires significant
-computational power. TPUs, designed specifically for matrix operations common in
-machine learning, offer several advantages in this domain:
-
-   Performance: TPUs are specifically designed to handle the massive
-    computations involved in training VLMs. They can speed up training
-    considerably compared to CPUs.
-   Memory: TPUs often come with large amounts of high-bandwidth memory,
-    allowing for the handling of large models and batch sizes during training.
-    This can lead to better model quality.
-   Scalability: TPU Pods (large clusters of TPUs) provide a scalable
-    solution for handling the growing complexity of large foundation models.
-    You can distribute training across multiple TPU devices for faster and more
-    efficient processing.
-   Cost-effectiveness: In many scenarios, TPUs can provide a more
-    cost-effective solution for training large models compared to CPU-based
-    infrastructure, especially when considering the time and resources saved
-    due to faster training.
-   These advantages are aligned with
-    [Google's commitments to operate sustainably][sustainability].
-
-### Software
-
-Training was done using [JAX][jax] and [ML Pathways][ml-pathways].
-
-JAX allows researchers to take advantage of the latest generation of hardware,
-including TPUs, for faster and more efficient training of large models. ML
-Pathways is Google's latest effort to build artificially intelligent systems
-capable of generalizing across multiple tasks. This is specially suitable for
-foundation models, including large language models like these ones.
-
-Together, JAX and ML Pathways are used as described in the
-[paper about the Gemini family of models][gemini-2-paper]; *"the 'single
-controller' programming model of Jax and Pathways allows a single Python
-process to orchestrate the entire training run, dramatically simplifying the
-development workflow."*
-
-## Evaluation
-
-Model evaluation metrics and results.
-
-### Benchmark Results
-
-These models were evaluated against a large collection of different datasets and
-metrics to cover different aspects of text generation:
-
-#### Reasoning and factuality
-
-| Benchmark                      | Metric         | Gemma 3 PT 1B  | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
-| ------------------------------ |----------------|:--------------:|:-------------:|:--------------:|:--------------:|
-| [HellaSwag][hellaswag]         | 10-shot        |      62.3      |      77.2     |      84.2      |      85.6      |
-| [BoolQ][boolq]                 | 0-shot         |      63.2      |      72.3     |      78.8      |      82.4      |
-| [PIQA][piqa]                   | 0-shot         |      73.8      |      79.6     |      81.8      |      83.3      |
-| [SocialIQA][socialiqa]         | 0-shot         |      48.9      |      51.9     |      53.4      |      54.9      |
-| [TriviaQA][triviaqa]           | 5-shot         |      39.8      |      65.8     |      78.2      |      85.5      |
-| [Natural Questions][naturalq]  | 5-shot         |      9.48      |      20.0     |      31.4      |      36.1      |
-| [ARC-c][arc]                   | 25-shot        |      38.4      |      56.2     |      68.9      |      70.6      |
-| [ARC-e][arc]                   | 0-shot         |      73.0      |      82.4     |      88.3      |      89.0      |
-| [WinoGrande][winogrande]       | 5-shot         |      58.2      |      64.7     |      74.3      |      78.8      |
-| [BIG-Bench Hard][bbh]          | few-shot       |      28.4      |      50.9     |      72.6      |      77.7      |
-| [DROP][drop]                   | 1-shot         |      42.4      |      60.1     |      72.2      |      77.2      |
-
-[hellaswag]: https://arxiv.org/abs/1905.07830
-[boolq]: https://arxiv.org/abs/1905.10044
-[piqa]: https://arxiv.org/abs/1911.11641
-[socialiqa]: https://arxiv.org/abs/1904.09728
-[triviaqa]: https://arxiv.org/abs/1705.03551
-[naturalq]: https://github.com/google-research-datasets/natural-questions
-[arc]: https://arxiv.org/abs/1911.01547
-[winogrande]: https://arxiv.org/abs/1907.10641
-[bbh]: https://paperswithcode.com/dataset/bbh
-[drop]: https://arxiv.org/abs/1903.00161
-
-#### STEM and code
-
-| Benchmark                      | Metric         | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
-| ------------------------------ |----------------|:-------------:|:--------------:|:--------------:|
-| [MMLU][mmlu]                   | 5-shot         |      59.6     |      74.5      |      78.6      |
-| [MMLU][mmlu] (Pro COT)         | 5-shot         |      29.2     |      45.3      |      52.2      |
-| [AGIEval][agieval]             | 3-5-shot       |      42.1     |      57.4      |      66.2      |
-| [MATH][math]                   | 4-shot         |      24.2     |      43.3      |      50.0      |
-| [GSM8K][gsm8k]                 | 8-shot         |      38.4     |      71.0      |      82.6      |
-| [GPQA][gpqa]                   | 5-shot         |      15.0     |      25.4      |      24.3      |
-| [MBPP][mbpp]                   | 3-shot         |      46.0     |      60.4      |      65.6      |
-| [HumanEval][humaneval]         | 0-shot         |      36.0     |      45.7      |      48.8      |
-
-[mmlu]: https://arxiv.org/abs/2009.03300
-[agieval]: https://arxiv.org/abs/2304.06364
-[math]: https://arxiv.org/abs/2103.03874
-[gsm8k]: https://arxiv.org/abs/2110.14168
-[gpqa]: https://arxiv.org/abs/2311.12022
-[mbpp]: https://arxiv.org/abs/2108.07732
-[humaneval]: https://arxiv.org/abs/2107.03374
-
-#### Multilingual
-
-| Benchmark                            | Gemma 3 PT 1B | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
-| ------------------------------------ |:-------------:|:-------------:|:--------------:|:--------------:|
-| [MGSM][mgsm]                         |      2.04     |      34.7     |      64.3     |      74.3     |
-| [Global-MMLU-Lite][global-mmlu-lite] |      24.9     |      57.0     |      69.4     |      75.7     |
-| [WMT24++][wmt24pp] (ChrF)            |      36.7     |      48.4     |      53.9     |      55.7     |
-| [FloRes][flores]                     |      29.5     |      39.2     |      46.0     |      48.8     |
-| [XQuAD][xquad] (all)                 |      43.9     |      68.0     |      74.5     |      76.8     |
-| [ECLeKTic][eclektic]                 |      4.69     |      11.0     |      17.2     |      24.4     |
-| [IndicGenBench][indicgenbench]       |      41.4     |      57.2     |      61.7     |      63.4     |
-
-[mgsm]: https://arxiv.org/abs/2210.03057
-[flores]: https://arxiv.org/abs/2106.03193
-[xquad]: https://arxiv.org/abs/1910.11856v3
-[global-mmlu-lite]: https://huggingface.co/datasets/CohereForAI/Global-MMLU-Lite
-[wmt24pp]: https://arxiv.org/abs/2502.12404v1
-[eclektic]: https://arxiv.org/abs/2502.21228
-[indicgenbench]: https://arxiv.org/abs/2404.16816
-
-#### Multimodal
-
-| Benchmark                      | Gemma 3 PT 4B | Gemma 3 PT 12B | Gemma 3 PT 27B |
-| ------------------------------ |:-------------:|:--------------:|:--------------:|
-| [COCOcap][coco-cap]            |      102      |      111       |      116       |
-| [DocVQA][docvqa] (val)         |      72.8     |      82.3      |      85.6      |
-| [InfoVQA][info-vqa] (val)      |      44.1     |      54.8      |      59.4      |
-| [MMMU][mmmu] (pt)              |      39.2     |      50.3      |      56.1      |
-| [TextVQA][textvqa] (val)       |      58.9     |      66.5      |      68.6      |
-| [RealWorldQA][realworldqa]     |      45.5     |      52.2      |      53.9      |
-| [ReMI][remi]                   |      27.3     |      38.5      |      44.8      |
-| [AI2D][ai2d]                   |      63.2     |      75.2      |      79.0      |
-| [ChartQA][chartqa]             |      63.6     |      74.7      |      76.3      |
-| [VQAv2][vqav2]                 |      63.9     |      71.2      |      72.9      |
-| [BLINK][blinkvqa]              |      38.0     |      35.9      |      39.6      |
-| [OKVQA][okvqa]                 |      51.0     |      58.7      |      60.2      |
-| [TallyQA][tallyqa]             |      42.5     |      51.8      |      54.3      |
-| [SpatialSense VQA][ss-vqa]     |      50.9     |      60.0      |      59.4      |
-| [CountBenchQA][countbenchqa]   |      26.1     |      17.8      |      68.0      |
-
-[coco-cap]: https://cocodataset.org/#home
-[docvqa]: https://www.docvqa.org/
-[info-vqa]: https://arxiv.org/abs/2104.12756
-[mmmu]: https://arxiv.org/abs/2311.16502
-[textvqa]: https://textvqa.org/
-[realworldqa]: https://paperswithcode.com/dataset/realworldqa
-[remi]: https://arxiv.org/html/2406.09175v1
-[ai2d]: https://allenai.org/data/diagrams
-[chartqa]: https://arxiv.org/abs/2203.10244
-[vqav2]: https://visualqa.org/index.html
-[blinkvqa]: https://arxiv.org/abs/2404.12390
-[okvqa]: https://okvqa.allenai.org/
-[tallyqa]: https://arxiv.org/abs/1810.12440
-[ss-vqa]: https://arxiv.org/abs/1908.02660
-[countbenchqa]: https://github.com/google-research/big_vision/blob/main/big_vision/datasets/countbenchqa/
-
-## Ethics and Safety
-
-Ethics and safety evaluation approach and results.
-
-### Evaluation Approach
-
-Our evaluation methods include structured evaluations and internal red-teaming
-testing of relevant content policies. Red-teaming was conducted by a number of
-different teams, each with different goals and human evaluation metrics. These
-models were evaluated against a number of different categories relevant to
-ethics and safety, including:
-
-   **Child Safety**: Evaluation of text-to-text and image to text prompts
-    covering child safety policies, including child sexual abuse and
-    exploitation.
-   **Content Safety:** Evaluation of text-to-text and image to text prompts
-    covering safety policies including, harassment, violence and gore, and hate
-    speech.
-   **Representational Harms**: Evaluation of text-to-text and image to text
-    prompts covering safety policies including bias, stereotyping, and harmful
-    associations or inaccuracies.
-
-In addition to development level evaluations, we conduct "assurance
-evaluations" which are our 'arms-length' internal evaluations for responsibility
-governance decision making. They are conducted separately from the model
-development team, to inform decision making about release. High level findings
-are fed back to the model team, but prompt sets are held-out to prevent
-overfitting and preserve the results' ability to inform decision making.
-Assurance evaluation results are reported to our Responsibility & Safety Council
-as part of release review.
-
-### Evaluation Results
-
-For all areas of safety testing, we saw major improvements in the categories of
-child safety, content safety, and representational harms relative to previous
-Gemma models. All testing was conducted without safety filters to evaluate the
-model capabilities and behaviors. For both text-to-text and image-to-text, and
-across all model sizes, the model produced minimal policy violations, and showed
-significant improvements over previous Gemma models' performance with respect
-to ungrounded inferences. A limitation of our evaluations was they included only
-English language prompts.
-
-## Usage and Limitations
-
-These models have certain limitations that users should be aware of.
-
-### Intended Usage
-
-Open vision-language models (VLMs) models have a wide range of applications
-across various industries and domains. The following list of potential uses is
-not comprehensive. The purpose of this list is to provide contextual information
-about the possible use-cases that the model creators considered as part of model
-training and development.
-
-   Content Creation and Communication
-    -   Text Generation: These models can be used to generate creative text
-        formats such as poems, scripts, code, marketing copy, and email drafts.
-    -   Chatbots and Conversational AI: Power conversational interfaces
-        for customer service, virtual assistants, or interactive applications.
-    -   Text Summarization: Generate concise summaries of a text corpus,
-        research papers, or reports.
-    -   Image Data Extraction: These models can be used to extract,
-        interpret, and summarize visual data for text communications.
-   Research and Education
-    -   Natural Language Processing (NLP) and VLM Research: These
-        models can serve as a foundation for researchers to experiment with VLM
-        and NLP techniques, develop algorithms, and contribute to the
-        advancement of the field.
-    -   Language Learning Tools: Support interactive language learning
-        experiences, aiding in grammar correction or providing writing practice.
-    -   Knowledge Exploration: Assist researchers in exploring large
-        bodies of text by generating summaries or answering questions about
-        specific topics.
-
-### Limitations
-
-   Training Data
-    -   The quality and diversity of the training data significantly
-        influence the model's capabilities. Biases or gaps in the training data
-        can lead to limitations in the model's responses.
-    -   The scope of the training dataset determines the subject areas
-        the model can handle effectively.
-   Context and Task Complexity
-    -   Models are better at tasks that can be framed with clear
-        prompts and instructions. Open-ended or highly complex tasks might be
-        challenging.
-    -   A model's performance can be influenced by the amount of context
-        provided (longer context generally leads to better outputs, up to a
-        certain point).
-   Language Ambiguity and Nuance
-    -   Natural language is inherently complex. Models might struggle
-        to grasp subtle nuances, sarcasm, or figurative language.
-   Factual Accuracy
-    -   Models generate responses based on information they learned
-        from their training datasets, but they are not knowledge bases. They
-        may generate incorrect or outdated factual statements.
-   Common Sense
-    -   Models rely on statistical patterns in language. They might
-        lack the ability to apply common sense reasoning in certain situations.
-
-### Ethical Considerations and Risks
-
-The development of vision-language models (VLMs) raises several ethical
-concerns. In creating an open model, we have carefully considered the following:
-
-   Bias and Fairness
-    -   VLMs trained on large-scale, real-world text and image data can
-        reflect socio-cultural biases embedded in the training material. These
-        models underwent careful scrutiny, input data pre-processing described
-        and posterior evaluations reported in this card.
-   Misinformation and Misuse
-    -   VLMs can be misused to generate text that is false, misleading,
-        or harmful.
-    -   Guidelines are provided for responsible use with the model, see the
-        [Responsible Generative AI Toolkit][rai-toolkit].
-   Transparency and Accountability:
-    -   This model card summarizes details on the models' architecture,
-        capabilities, limitations, and evaluation processes.
-    -   A responsibly developed open model offers the opportunity to
-        share innovation by making VLM technology accessible to developers and
-        researchers across the AI ecosystem.
-
-Risks identified and mitigations:
-
-   **Perpetuation of biases**: It's encouraged to perform continuous
-    monitoring (using evaluation metrics, human review) and the exploration of
-    de-biasing techniques during model training, fine-tuning, and other use
-    cases.
-   **Generation of harmful content**: Mechanisms and guidelines for content
-    safety are essential. Developers are encouraged to exercise caution and
-    implement appropriate content safety safeguards based on their specific
-    product policies and application use cases.
-   **Misuse for malicious purposes**: Technical limitations and developer
-    and end-user education can help mitigate against malicious applications of
-    VLMs. Educational resources and reporting mechanisms for users to flag
-    misuse are provided. Prohibited uses of Gemma models are outlined in the
-    [Gemma Prohibited Use Policy][prohibited-use].
-   **Privacy violations**: Models were trained on data filtered for removal
-    of certain personal information and other sensitive data. Developers are
-    encouraged to adhere to privacy regulations with privacy-preserving
-    techniques.
-
-### Benefits
-
-At the time of release, this family of models provides high-performance open
-vision-language model implementations designed from the ground up for
-responsible AI development compared to similarly sized models.
-
-Using the benchmark evaluation metrics described in this document, these models
-have shown to provide superior performance to other, comparably-sized open model
-alternatives.
-
-[g3-tech-report]: https://goo.gle/Gemma3Report
-[rai-toolkit]: https://ai.google.dev/responsible
-[kaggle-gemma]: https://www.kaggle.com/models/google/gemma-3
-[vertex-mg-gemma3]: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/gemma3
-[terms]: https://ai.google.dev/gemma/terms
-[safety-policies]: https://ai.google/static/documents/ai-responsibility-update-published-february-2025.pdf
-[prohibited-use]: https://ai.google.dev/gemma/prohibited_use_policy
-[tpu]: https://cloud.google.com/tpu/docs/intro-to-tpu
-[sustainability]: https://sustainability.google/operating-sustainably/
-[jax]: https://github.com/jax-ml/jax
-[ml-pathways]: https://blog.google/technology/ai/introducing-pathways-next-generation-ai-architecture/
-[sustainability]: https://sustainability.google/operating-sustainably/
-[gemini-2-paper]: https://arxiv.org/abs/2312.11805
--- a/gemma-3-1b-it/added_tokens.json
+++ b/gemma-3-1b-it/added_tokens.json
@ -1,3 +0,0 @@
-{
-  "<image_soft_token>": 262144
-}
--- a/gemma-3-1b-it/config.json
+++ b/gemma-3-1b-it/config.json
@ -1,37 +0,0 @@
-{
-  "architectures": [
-    "Gemma3ForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "attn_logit_softcapping": null,
-  "bos_token_id": 2,
-  "cache_implementation": "hybrid",
-  "eos_token_id": [
-    1,
-    106
-  ],
-  "final_logit_softcapping": null,
-  "head_dim": 256,
-  "hidden_activation": "gelu_pytorch_tanh",
-  "hidden_size": 1152,
-  "initializer_range": 0.02,
-  "intermediate_size": 6912,
-  "max_position_embeddings": 32768,
-  "model_type": "gemma3_text",
-  "num_attention_heads": 4,
-  "num_hidden_layers": 26,
-  "num_key_value_heads": 1,
-  "pad_token_id": 0,
-  "query_pre_attn_scalar": 256,
-  "rms_norm_eps": 1e-06,
-  "rope_local_base_freq": 10000,
-  "rope_scaling": null,
-  "rope_theta": 1000000,
-  "sliding_window": 512,
-  "sliding_window_pattern": 6,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.50.0.dev0",
-  "use_cache": true,
-  "vocab_size": 262144
-}
--- a/gemma-3-1b-it/generation_config.json
+++ b/gemma-3-1b-it/generation_config.json
@ -1,13 +0,0 @@
-{
-  "bos_token_id": 2,
-  "cache_implementation": "hybrid",
-  "do_sample": true,
-  "eos_token_id": [
-    1,
-    106
-  ],
-  "pad_token_id": 0,
-  "top_k": 64,
-  "top_p": 0.95,
-  "transformers_version": "4.50.0.dev0"
-}
--- a/gemma-3-1b-it/model.safetensors
+++ b/gemma-3-1b-it/model.safetensors
--- a/gemma-3-1b-it/special_tokens_map.json
+++ b/gemma-3-1b-it/special_tokens_map.json
@ -1,33 +0,0 @@
-{
-  "boi_token": "<start_of_image>",
-  "bos_token": {
-    "content": "<bos>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eoi_token": "<end_of_image>",
-  "eos_token": {
-    "content": "<eos>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "image_token": "<image_soft_token>",
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}
--- a/gemma-3-1b-it/sub/test.txt
+++ b/gemma-3-1b-it/sub/test.txt
@ -1,6 +0,0 @@
-test.txt
-test.txt
-test.txt
-test.txt
-test.txt
-
--- a/gemma-3-1b-it/tokenizer.json
+++ b/gemma-3-1b-it/tokenizer.json
--- a/gemma-3-1b-it/tokenizer.model
+++ b/gemma-3-1b-it/tokenizer.model
--- a/gemma-3-1b-it/tokenizer_config.json
+++ b/gemma-3-1b-it/tokenizer_config.json