erndgn commited on
Commit
37a860c
·
verified ·
1 Parent(s): 8b34fb2

Upload CosmosGemma T1 GGUF models

Browse files
.gitattributes CHANGED
@@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Turkish-Gemma-9b-T1-F16.gguf filter=lfs diff=lfs merge=lfs -text
37
+ Turkish-Gemma-9b-T1.IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text
38
+ Turkish-Gemma-9b-T1.IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text
39
+ Turkish-Gemma-9b-T1.IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text
40
+ Turkish-Gemma-9b-T1.IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text
41
+ Turkish-Gemma-9b-T1.IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text
42
+ Turkish-Gemma-9b-T1.IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text
43
+ Turkish-Gemma-9b-T1.IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text
44
+ Turkish-Gemma-9b-T1.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text
45
+ Turkish-Gemma-9b-T1.Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
46
+ Turkish-Gemma-9b-T1.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text
47
+ Turkish-Gemma-9b-T1.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
48
+ Turkish-Gemma-9b-T1.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text
49
+ Turkish-Gemma-9b-T1.Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
50
+ Turkish-Gemma-9b-T1.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
51
+ Turkish-Gemma-9b-T1.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text
52
+ Turkish-Gemma-9b-T1.Q5_0.gguf filter=lfs diff=lfs merge=lfs -text
53
+ Turkish-Gemma-9b-T1.Q5_1.gguf filter=lfs diff=lfs merge=lfs -text
54
+ Turkish-Gemma-9b-T1.Q5_K.gguf filter=lfs diff=lfs merge=lfs -text
55
+ Turkish-Gemma-9b-T1.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
56
+ Turkish-Gemma-9b-T1.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text
57
+ Turkish-Gemma-9b-T1.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
58
+ Turkish-Gemma-9b-T1.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: ytu-ce-cosmos/Turkish-Gemma-9b-T1
3
+ license: gemma
4
+ language:
5
+ - tr
6
+ - en
7
+ tags:
8
+ - gguf
9
+ - ggml
10
+ - gemma
11
+ - cosmosgemma
12
+ - turkish gemma
13
+ - t1
14
+ ---
15
+ # CosmosGemma T1 GGUFs
16
+
17
+ ## Objective
18
+ Due to the need for quantized models in real-time applications, we introduce our GGUF formatted models. These models are part of
19
+ GGML project with a hope to democratize the use of Large Models. Depending on the quantization type, there are 20+ models.
20
+
21
+ ### Features
22
+ * All quantization details are listed on the right by Hugging Face.
23
+ * All the models have been tested in `llama.cpp` environments, `llama-cli` and `llama-server`.
24
+ * Furthermore, a YouTube video has been made to introduce the basics of using `lmstudio` to utilize these models. 👇
25
+ [![lmstudio_yt](https://img.youtube.com/vi/JRID-6sRl7I/0.jpg)](https://www.youtube.com/watch?v=JRID-6sRl7I)
26
+
27
+
28
+ ### Code Example
29
+ Usage example with `llama-cpp-python`
30
+
31
+ ```py
32
+ from llama_cpp import Llama
33
+
34
+ # Define the inference parameters
35
+ inference_params = {
36
+ "n_threads": 4,
37
+ "n_predict": -1,
38
+ "top_k": 40,
39
+ "min_p": 0.05,
40
+ "top_p": 0.95,
41
+ "temp": 0.8,
42
+ "repeat_penalty": 1.1,
43
+ "input_prefix": "<start_of_turn>user\\n",
44
+ "input_suffix": "<end_of_turn>\\n<start_of_turn>model\\n",
45
+ "antiprompt": [],
46
+ "pre_prompt": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak.",
47
+ "pre_prompt_suffix": "<end_of_turn>\\n",
48
+ "pre_prompt_prefix": "<bos><start_of_turn>system\\n",
49
+ "seed": -1,
50
+ "tfs_z": 1,
51
+ "typical_p": 1,
52
+ "repeat_last_n": 64,
53
+ "frequency_penalty": 0,
54
+ "presence_penalty": 0,
55
+ "n_keep": 0,
56
+ "logit_bias": {},
57
+ "mirostat": 0,
58
+ "mirostat_tau": 5,
59
+ "mirostat_eta": 0.1,
60
+ "memory_f16": True,
61
+ "multiline_input": False,
62
+ "penalize_nl": True
63
+ }
64
+
65
+ # Initialize the Gemma model with the specified inference parameters
66
+ gemma = Llama.from_pretrained(
67
+ repo_id="ytu-ce-cosmos/Turkish-Gemma-9b-T1-GGUF",
68
+ filename="*Q4_K.gguf",
69
+ verbose=False
70
+ )
71
+ # Example input
72
+ user_input = "Türkiyenin başkenti neresidir?"
73
+
74
+ # Construct the prompt
75
+ prompt = f"{inference_params['pre_prompt_prefix']}{inference_params['pre_prompt']}{inference_params['pre_prompt_suffix']}{inference_params['input_prefix']}{user_input}{inference_params['input_suffix']}"
76
+
77
+ # Generate the response
78
+ response = gemma(prompt)
79
+
80
+ # Output the response
81
+ print(response['choices'][0]['text'])
82
+
83
+ ```
84
+
85
+ The quantization has been made using `llama.cpp`. As we have seen, this method tends to give the most stable results.
86
+
87
+ Obviously, we encountered better inference quality for models with the highest bits. However, the inference time tends to be similar between low-bit models.
88
+
89
+ Each model's memory footprint can be anticipated by the qunatization docs in either [Hugging Face](https://huggingface.co/docs/transformers/main/en/quantization/overview) or [llama.cpp](https://github.com/ggerganov/llama.cpp/tree/master/examples/quantize).
90
+
91
+
92
+ # Acknowledgments
93
+ - Research supported with Cloud TPUs from [Google's TensorFlow Research Cloud](https://sites.research.google/trc/about/) (TFRC). Thanks for providing access to the TFRC ❤️
94
+ - Thanks to the generous support from the Hugging Face team, it is possible to download models from their S3 storage 🤗
95
+
96
+ ## Contact
97
+ COSMOS AI Research Group, Yildiz Technical University Computer Engineering Department
98
+ https://cosmos.yildiz.edu.tr/
99
Turkish-Gemma-9b-T1-F16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f1602d5c860a6ade3e290a257b4fc262432155bf84b1aad0d195a1b2540a19d
3
+ size 18490680320
Turkish-Gemma-9b-T1.IQ2_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2e67de6ff7981ddd82f48fd01ebf41137bca0057feb8b4c20b7304b307f0b9
3
+ size 636855296
Turkish-Gemma-9b-T1.IQ3_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861508d08584373d8270714dc3c8ddf3493e51d4eba28e7b056ec19c5d8ace6b
3
+ size 4494615552
Turkish-Gemma-9b-T1.IQ3_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430e0cb276349f0a38a402c44cd2d6541412f662a0531dc3125ea18278600243
3
+ size 4337665024
Turkish-Gemma-9b-T1.IQ3_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea2e7477661a131914df70bf66c44aa09ea3294fbc1c7019cc33e27254566944
3
+ size 4144989184
Turkish-Gemma-9b-T1.IQ3_XXS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2e67de6ff7981ddd82f48fd01ebf41137bca0057feb8b4c20b7304b307f0b9
3
+ size 636855296
Turkish-Gemma-9b-T1.IQ4_NL.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51a750b61f5470205851d34e8c7c354c0ef148a4258ffd535c715557171595d1
3
+ size 5475255296
Turkish-Gemma-9b-T1.IQ4_XS.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e199c0abeaa98ad344ec7120391447f5eb2c94e743019e119d6d712df4e0ae5
3
+ size 5223171072
Turkish-Gemma-9b-T1.Q2_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182c70eac8c84228b8d1d7a5cbdead348f4f13dd2cdaf2df6352b87a8599f0c9
3
+ size 3805398016
Turkish-Gemma-9b-T1.Q3_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b5513c53e46575cb70fb16cec16a5e07864447a4e2fe779ce19c54e2642f73
3
+ size 4761781248
Turkish-Gemma-9b-T1.Q3_K_L.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff59485dbd8065653a5790248af166619e062e119b59b41fc5689b9c64f3836a
3
+ size 5132452864
Turkish-Gemma-9b-T1.Q3_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b5513c53e46575cb70fb16cec16a5e07864447a4e2fe779ce19c54e2642f73
3
+ size 4761781248
Turkish-Gemma-9b-T1.Q3_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d43aefa64ab30070fbe74816f30563fb7e04f65fa82501c6a64336d87f5be3bf
3
+ size 4337665024
Turkish-Gemma-9b-T1.Q4_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49387c2d8ab1d1a4d65f3e720e8fef190a87e6049b424369779847ee2832c43
3
+ size 5761057792
Turkish-Gemma-9b-T1.Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49387c2d8ab1d1a4d65f3e720e8fef190a87e6049b424369779847ee2832c43
3
+ size 5761057792
Turkish-Gemma-9b-T1.Q4_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e6776dc02bf5fb07b40b6e948affdc3f979286b3e6b3d02959306060a907d71
3
+ size 5478925312
Turkish-Gemma-9b-T1.Q5_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3c0723e1c323519684bc62378da1b340a08cf16e17054830f9e0ae79f511651
3
+ size 6483592192
Turkish-Gemma-9b-T1.Q5_1.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe79fcbe9d8c9cd826c33e738a093dfb7458bb6347cb084e01f6dbc4fa7efdb
3
+ size 7003816960
Turkish-Gemma-9b-T1.Q5_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f5743d26e6f826aa0c494dcf198d6944832b251b5c52f8f80efea36423a92b
3
+ size 6647366656
Turkish-Gemma-9b-T1.Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f5743d26e6f826aa0c494dcf198d6944832b251b5c52f8f80efea36423a92b
3
+ size 6647366656
Turkish-Gemma-9b-T1.Q5_K_S.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b23a9f1dbd3511802a7e25d03789df56d83c221f1d0a302b8e7afea0380d751
3
+ size 6483592192
Turkish-Gemma-9b-T1.Q6_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adfb8160fb498505730f8465da99634328d7a7640776c63cf5540738607d147b
3
+ size 7589069824
Turkish-Gemma-9b-T1.Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f23708d2eeee52229027a2da3ebf2974aa89012f48674f5cc602148860ee4e2
3
+ size 9827148800
cosmosgemma_t1_lm_studio_preset.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "cosmosgemma_t1_lm_studio",
3
+ "load_params": {
4
+ "n_ctx": 2048,
5
+ "n_batch": 512,
6
+ "rope_freq_base": 0,
7
+ "rope_freq_scale": 0,
8
+ "n_gpu_layers": 10,
9
+ "use_mlock": true,
10
+ "main_gpu": 0,
11
+ "tensor_split": [
12
+ 0
13
+ ],
14
+ "seed": -1,
15
+ "f16_kv": true,
16
+ "use_mmap": true,
17
+ "no_kv_offload": false,
18
+ "num_experts_used": 0
19
+ },
20
+ "inference_params": {
21
+ "n_threads": 4,
22
+ "n_predict": -1,
23
+ "top_k": 40,
24
+ "min_p": 0.05,
25
+ "top_p": 0.95,
26
+ "temp": 0.8,
27
+ "repeat_penalty": 1.1,
28
+ "input_prefix": "<start_of_turn>user\\n",
29
+ "input_suffix": "<end_of_turn>\\n<start_of_turn>model\\n",
30
+ "antiprompt": [],
31
+ "pre_prompt": "Sen bir yapay zeka asistanısın. Kullanıcı sana bir görev verecek. Amacın görevi olabildiğince sadık bir şekilde tamamlamak. Görevi yerine getirirken adım adım düşün ve adımlarını gerekçelendir.",
32
+ "pre_prompt_suffix": "<end_of_turn>\\n",
33
+ "pre_prompt_prefix": "<bos><start_of_turn>system\\n",
34
+ "seed": -1,
35
+ "tfs_z": 1,
36
+ "typical_p": 1,
37
+ "repeat_last_n": 64,
38
+ "frequency_penalty": 0,
39
+ "presence_penalty": 0,
40
+ "n_keep": 0,
41
+ "logit_bias": {},
42
+ "mirostat": 0,
43
+ "mirostat_tau": 5,
44
+ "mirostat_eta": 0.1,
45
+ "memory_f16": true,
46
+ "multiline_input": false,
47
+ "penalize_nl": true
48
+ }
49
+ }