diff --git a/.gitattributes b/.gitattributes index f6d3b5976efaa2784b4ab9073c94f29c47e938e6..0791888f7185ea10ed855a4e7da6cf0f246db61a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -37,3 +37,12 @@ wandb_logs/run-20251116_132253-ytv0as6f/run-ytv0as6f.wandb filter=lfs diff=lfs m wandb_logs/run-20251116_133532-vuimo2u7/run-vuimo2u7.wandb filter=lfs diff=lfs merge=lfs -text wandb_logs/run-20251116_134322-fwgtjw62/run-fwgtjw62.wandb filter=lfs diff=lfs merge=lfs -text wandb_logs/run-20251116_135428-vyyhh03l/run-vyyhh03l.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_161333-76f9jnsn/run-76f9jnsn.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_162012-g5qyspo9/run-g5qyspo9.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_162942-hn6dt5mn/run-hn6dt5mn.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_163550-m5him6ro/run-m5him6ro.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_164139-rj75j47x/run-rj75j47x.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_165422-97r1a8g4/run-97r1a8g4.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_170227-g5jfi5ko/run-g5jfi5ko.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_171322-7w55arw9/run-7w55arw9.wandb filter=lfs diff=lfs merge=lfs -text +wandb_logs/run-20251122_172103-peygrszr/run-peygrszr.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/wandb_logs/debug-internal.log b/wandb_logs/debug-internal.log index 3a8d227b88a277ac43b8a351b1ca99cb2a60680b..18188938d13f3260909ee51c73516a835cc83ce3 100644 --- a/wandb_logs/debug-internal.log +++ b/wandb_logs/debug-internal.log @@ -1,12 +1,12 @@ -{"time":"2025-11-16T13:54:28.513593258Z","level":"INFO","msg":"stream: starting","core version":"0.23.0"} -{"time":"2025-11-16T13:54:28.694903419Z","level":"INFO","msg":"stream: created new stream","id":"vyyhh03l"} -{"time":"2025-11-16T13:54:28.69500738Z","level":"INFO","msg":"handler: started","stream_id":"vyyhh03l"} -{"time":"2025-11-16T13:54:28.695135336Z","level":"INFO","msg":"stream: started","id":"vyyhh03l"} -{"time":"2025-11-16T13:54:28.695150894Z","level":"INFO","msg":"writer: started","stream_id":"vyyhh03l"} -{"time":"2025-11-16T13:54:28.695169215Z","level":"INFO","msg":"sender: started","stream_id":"vyyhh03l"} -{"time":"2025-11-16T13:59:47.269716853Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} -{"time":"2025-11-16T13:59:47.447011897Z","level":"INFO","msg":"handler: operation stats","stats":{}} -{"time":"2025-11-16T13:59:47.453021363Z","level":"INFO","msg":"stream: closing","id":"vyyhh03l"} -{"time":"2025-11-16T13:59:47.453046371Z","level":"INFO","msg":"handler: closed","stream_id":"vyyhh03l"} -{"time":"2025-11-16T13:59:47.453104673Z","level":"INFO","msg":"sender: closed","stream_id":"vyyhh03l"} -{"time":"2025-11-16T13:59:47.453114697Z","level":"INFO","msg":"stream: closed","id":"vyyhh03l"} +{"time":"2025-11-22T17:21:03.316972054Z","level":"INFO","msg":"stream: starting","core version":"0.23.0"} +{"time":"2025-11-22T17:21:03.495466125Z","level":"INFO","msg":"stream: created new stream","id":"peygrszr"} +{"time":"2025-11-22T17:21:03.495560895Z","level":"INFO","msg":"handler: started","stream_id":"peygrszr"} +{"time":"2025-11-22T17:21:03.495722995Z","level":"INFO","msg":"stream: started","id":"peygrszr"} +{"time":"2025-11-22T17:21:03.495748552Z","level":"INFO","msg":"writer: started","stream_id":"peygrszr"} +{"time":"2025-11-22T17:21:03.495767189Z","level":"INFO","msg":"sender: started","stream_id":"peygrszr"} +{"time":"2025-11-22T17:32:02.559883134Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2025-11-22T17:32:02.672821022Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2025-11-22T17:32:02.681608875Z","level":"INFO","msg":"stream: closing","id":"peygrszr"} +{"time":"2025-11-22T17:32:02.68216231Z","level":"INFO","msg":"handler: closed","stream_id":"peygrszr"} +{"time":"2025-11-22T17:32:02.682264494Z","level":"INFO","msg":"sender: closed","stream_id":"peygrszr"} +{"time":"2025-11-22T17:32:02.682278822Z","level":"INFO","msg":"stream: closed","id":"peygrszr"} diff --git a/wandb_logs/debug.log b/wandb_logs/debug.log index 3a1a550efbe26ee4b9db105b55398605735e0990..31fa840e739e71650332b33fe3a0d8eca3243a01 100644 --- a/wandb_logs/debug.log +++ b/wandb_logs/debug.log @@ -1,29 +1,29 @@ -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0 -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_setup.py:_flush():80] Configure stats pid to 10012 -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_setup.py:_flush():80] Loading settings from /content/code-switch-identification-model/wandb/settings -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_setup.py:_flush():80] Loading settings from environment variables -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /content/code-switch-identification-model/wandb/run-20251116_135428-vyyhh03l/logs/debug.log -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /content/code-switch-identification-model/wandb/run-20251116_135428-vyyhh03l/logs/debug-internal.log -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_init.py:init():840] calling init triggers -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_init.py:init():845] wandb.init called with sweep_config: {} -config: {'model': 'bert-base-multilingual-cased', 'train_file': 'data2/train.txt', 'max_length': 128, 'epochs': 3, 'batch_size': 16, 'learning_rate': 2e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.01, '_wandb': {}} -2025-11-16 13:54:28,196 INFO MainThread:10012 [wandb_init.py:init():888] starting backend -2025-11-16 13:54:28,508 INFO MainThread:10012 [wandb_init.py:init():891] sending inform_init request -2025-11-16 13:54:28,511 INFO MainThread:10012 [wandb_init.py:init():899] backend started and connected -2025-11-16 13:54:28,515 INFO MainThread:10012 [wandb_init.py:init():969] updated telemetry -2025-11-16 13:54:28,529 INFO MainThread:10012 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout -2025-11-16 13:54:28,905 INFO MainThread:10012 [wandb_init.py:init():1040] starting run threads in backend -2025-11-16 13:54:29,369 INFO MainThread:10012 [wandb_run.py:_console_start():2504] atexit reg -2025-11-16 13:54:29,369 INFO MainThread:10012 [wandb_run.py:_redirect():2352] redirect: wrap_raw -2025-11-16 13:54:29,369 INFO MainThread:10012 [wandb_run.py:_redirect():2421] Wrapping output streams. -2025-11-16 13:54:29,369 INFO MainThread:10012 [wandb_run.py:_redirect():2444] Redirects installed. -2025-11-16 13:54:29,371 INFO MainThread:10012 [wandb_init.py:init():1080] run started, returning control to user process -2025-11-16 13:55:43,549 INFO MainThread:10012 [wandb_run.py:_config_callback():1385] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-GUJ', 2: 'I-GUJ', 3: 'B-ENG', 4: 'I-ENG'}, 'label2id': {'O': 0, 'B-GUJ': 1, 'I-GUJ': 2, 'B-ENG': 3, 'I-ENG': 4}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'bert-base-multilingual-cased', 'transformers_version': '4.57.1', 'directionality': 'bidi', 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'tf_legacy_loss': False, 'use_bfloat16': False, 'vocab_size': 119547, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_attentions': False, 'output_dir': './models/bert-multilingual', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './models/bert-multilingual/logs', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': None, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'h3110Fr13nd/guj-eng-code-switch-bert-multilingual', 'hub_strategy': 'end', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-bert-multilingual', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} -2025-11-16 13:55:43,555 INFO MainThread:10012 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 177266693 - > -2025-11-16 13:55:43,555 INFO MainThread:10012 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 177266693 None -2025-11-16 13:59:46,799 INFO MainThread:10012 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/vyyhh03l -2025-11-16 13:59:46,801 INFO MainThread:10012 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 -2025-11-16 13:59:46,801 INFO MainThread:10012 [wandb_run.py:_restore():2451] restore -2025-11-16 13:59:46,801 INFO MainThread:10012 [wandb_run.py:_restore():2457] restore done -2025-11-16 13:59:47,450 INFO MainThread:10012 [wandb_run.py:_footer_sync_info():3853] logging synced files +2025-11-22 17:21:03,022 INFO MainThread:20205 [wandb_setup.py:_flush():80] Current SDK version is 0.23.0 +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_setup.py:_flush():80] Configure stats pid to 20205 +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_setup.py:_flush():80] Loading settings from /content/code-switch-identification-model/wandb/settings +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_setup.py:_flush():80] Loading settings from environment variables +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_init.py:setup_run_log_directory():713] Logging user logs to /content/code-switch-identification-model/wandb/run-20251122_172103-peygrszr/logs/debug.log +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_init.py:setup_run_log_directory():714] Logging internal logs to /content/code-switch-identification-model/wandb/run-20251122_172103-peygrszr/logs/debug-internal.log +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_init.py:init():840] calling init triggers +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_init.py:init():845] wandb.init called with sweep_config: {} +config: {'model': 'bert-base-multilingual-cased', 'train_file': 'synthetic_data_gen/data/train.txt', 'max_length': 128, 'epochs': 3, 'batch_size': 16, 'learning_rate': 2e-05, 'warmup_ratio': 0.1, 'weight_decay': 0.01, '_wandb': {}} +2025-11-22 17:21:03,023 INFO MainThread:20205 [wandb_init.py:init():888] starting backend +2025-11-22 17:21:03,311 INFO MainThread:20205 [wandb_init.py:init():891] sending inform_init request +2025-11-22 17:21:03,314 INFO MainThread:20205 [wandb_init.py:init():899] backend started and connected +2025-11-22 17:21:03,318 INFO MainThread:20205 [wandb_init.py:init():969] updated telemetry +2025-11-22 17:21:03,336 INFO MainThread:20205 [wandb_init.py:init():993] communicating run to backend with 90.0 second timeout +2025-11-22 17:21:03,777 INFO MainThread:20205 [wandb_init.py:init():1040] starting run threads in backend +2025-11-22 17:21:04,276 INFO MainThread:20205 [wandb_run.py:_console_start():2504] atexit reg +2025-11-22 17:21:04,277 INFO MainThread:20205 [wandb_run.py:_redirect():2352] redirect: wrap_raw +2025-11-22 17:21:04,277 INFO MainThread:20205 [wandb_run.py:_redirect():2421] Wrapping output streams. +2025-11-22 17:21:04,277 INFO MainThread:20205 [wandb_run.py:_redirect():2444] Redirects installed. +2025-11-22 17:21:04,279 INFO MainThread:20205 [wandb_init.py:init():1080] run started, returning control to user process +2025-11-22 17:27:38,355 INFO MainThread:20205 [wandb_run.py:_config_callback():1385] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'torchscript': False, 'dtype': 'float32', 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'O', 1: 'B-GUJ', 2: 'I-GUJ', 3: 'B-ENG', 4: 'I-ENG'}, 'label2id': {'O': 0, 'B-GUJ': 1, 'I-GUJ': 2, 'B-ENG': 3, 'I-ENG': 4}, 'task_specific_params': None, 'problem_type': None, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'num_beam_groups': 1, 'diversity_penalty': 0.0, '_name_or_path': 'bert-base-multilingual-cased', 'transformers_version': '4.57.1', 'directionality': 'bidi', 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'tf_legacy_loss': False, 'use_bfloat16': False, 'vocab_size': 119547, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_attentions': False, 'output_dir': './models/bert-multilingual', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 32, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 2e-05, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 3, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.1, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': './models/bert-multilingual/logs', 'logging_strategy': 'steps', 'logging_first_step': True, 'logging_steps': 50, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': None, 'save_total_limit': 3, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': None, 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'parallelism_config': None, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch_fused', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'project': 'huggingface', 'trackio_space_id': 'trackio', 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'h3110Fr13nd/guj-eng-code-switch-bert-multilingual', 'hub_strategy': 'end', 'hub_token': '', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-bert-multilingual', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 17:27:38,360 INFO MainThread:20205 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 177266693 - > +2025-11-22 17:27:38,361 INFO MainThread:20205 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 177266693 None +2025-11-22 17:32:02,106 INFO MainThread:20205 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/peygrszr +2025-11-22 17:32:02,108 INFO MainThread:20205 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 17:32:02,109 INFO MainThread:20205 [wandb_run.py:_restore():2451] restore +2025-11-22 17:32:02,110 INFO MainThread:20205 [wandb_run.py:_restore():2457] restore done +2025-11-22 17:32:02,679 INFO MainThread:20205 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_161333-76f9jnsn/files/config.yaml b/wandb_logs/run-20251122_161333-76f9jnsn/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bcf6f4f37a520b84d54a012a4fec058c3cc7ec92 --- /dev/null +++ b/wandb_logs/run-20251122_161333-76f9jnsn/files/config.yaml @@ -0,0 +1,543 @@ +_name_or_path: + value: bert-base-multilingual-cased +_wandb: + value: + cli_version: 0.23.0 + e: + 2kuo2wa7mpmbyuiylwlvpgza9ojvsbhz: + args: + - configs/bert_data2.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "41432219648" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T16:13:33.410421Z" + writerId: 2kuo2wa7mpmbyuiylwlvpgza9ojvsbhz + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +directionality: + value: bidi +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dtype: + value: float32 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-bert-multilingual-data2 +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/bert-multilingual-data2/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: bert-base-multilingual-cased +model/num_parameters: + value: 177266693 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/bert-multilingual-data2 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +pooler_fc_size: + value: 768 +pooler_num_attention_heads: + value: 12 +pooler_num_fc_layers: + value: 3 +pooler_size_per_head: + value: 128 +pooler_type: + value: first_token_transform +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-bert-multilingual-data2 +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data2/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 119547 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_161333-76f9jnsn/files/output.log b/wandb_logs/run-20251122_161333-76f9jnsn/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..519b8289a32c2f88bf957e5139964cec0fa52b2f --- /dev/null +++ b/wandb_logs/run-20251122_161333-76f9jnsn/files/output.log @@ -0,0 +1,83 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +tokenizer_config.json: 100% 49.0/49.0 [00:00<00:00, 192kB/s] +config.json: 100% 625/625 [00:00<00:00, 2.58MB/s] +vocab.txt: 100% 996k/996k [00:00<00:00, 7.70MB/s] +tokenizer.json: 100% 1.96M/1.96M [00:00<00:00, 7.55MB/s] +model.safetensors: 100% 714M/714M [00:11<00:00, 60.5MB/s] +Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +Downloading builder script: 6.34kB [00:00, 13.1MB/s] +{'loss': 1.6561, 'grad_norm': 11.050471305847168, 'learning_rate': 0.0, 'epoch': 0.01} +{'loss': 1.0255, 'grad_norm': 7.669041633605957, 'learning_rate': 1.9600000000000002e-05, 'epoch': 0.3} +{'loss': 0.2894, 'grad_norm': 2.569448947906494, 'learning_rate': 1.7782805429864253e-05, 'epoch': 0.61} +{'loss': 0.1821, 'grad_norm': 2.593822956085205, 'learning_rate': 1.552036199095023e-05, 'epoch': 0.91} +Downloading builder script: 6.34kB [00:00, 11.3MB/s] +100% 492/492 [03:16<00:00, 2.51it/s]:00, ?B/s] +{'eval_loss': 0.07226230204105377, 'eval_precision': 0.8930976430976431, 'eval_recall': 0.9068376068376068, 'eval_f1': 0.8999151823579304, 'eval_accuracy': 0.9802466756600501, 'eval_runtime': 2.9065, 'eval_samples_per_second': 112.163, 'eval_steps_per_second': 3.785, 'epoch': 1.0} +{'loss': 0.1071, 'grad_norm': 1.1867393255233765, 'learning_rate': 1.32579185520362e-05, 'epoch': 1.22} +{'loss': 0.1027, 'grad_norm': 1.172904133796692, 'learning_rate': 1.0995475113122173e-05, 'epoch': 1.52} +{'loss': 0.0789, 'grad_norm': 3.011417865753174, 'learning_rate': 8.733031674208145e-06, 'epoch': 1.83} +Processing Files (3 / 3) : 100% 709M/709M [00:05<00:00, 126MB/s, 131MB/s ] +{'eval_loss': 0.04031597822904587, 'eval_precision': 0.935374149659864, 'eval_recall': 0.9401709401709402, 'eval_f1': 0.937766410912191, 'eval_accuracy': 0.9896897282713433, 'eval_runtime': 2.7197, 'eval_samples_per_second': 119.867, 'eval_steps_per_second': 4.045, 'epoch': 2.0} +{'loss': 0.0632, 'grad_norm': 1.8443704843521118, 'learning_rate': 6.470588235294119e-06, 'epoch': 2.13} +{'loss': 0.0463, 'grad_norm': 1.2871953248977661, 'learning_rate': 4.208144796380091e-06, 'epoch': 2.44} +{'loss': 0.054, 'grad_norm': 2.695483684539795, 'learning_rate': 1.9457013574660634e-06, 'epoch': 2.74} +{'eval_loss': 0.04117187485098839, 'eval_precision': 0.9366980325064157, 'eval_recall': 0.9358974358974359, 'eval_f1': 0.9362975630611372, 'eval_accuracy': 0.9890152245133937, 'eval_runtime': 2.4944, 'eval_samples_per_second': 130.691, 'eval_steps_per_second': 4.41, 'epoch': 3.0} +{'train_runtime': 196.1277, 'train_samples_per_second': 39.908, 'train_steps_per_second': 2.509, 'train_loss': 0.20365032820197626, 'epoch': 3.0} +New Data Upload : 100% 15.3k/15.3k [00:05<00:00, 2.72kB/s, 2.83kB/s ] + ...l-data2/model.safetensors: 100% 709M/709M [00:04<00:00, 141MB/s] + ...28047.f0da10c9a12b.1796.0: 100% 9.33k/9.33k [00:00', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-bert-multilingual-data2', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 16:14:07,419 INFO MainThread:1796 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 177266693 - > +2025-11-22 16:14:07,419 INFO MainThread:1796 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 177266693 None +2025-11-22 16:18:08,618 INFO MainThread:1796 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/76f9jnsn +2025-11-22 16:18:08,620 INFO MainThread:1796 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 16:18:08,620 INFO MainThread:1796 [wandb_run.py:_restore():2451] restore +2025-11-22 16:18:08,620 INFO MainThread:1796 [wandb_run.py:_restore():2457] restore done +2025-11-22 16:18:09,283 INFO MainThread:1796 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_161333-76f9jnsn/run-76f9jnsn.wandb b/wandb_logs/run-20251122_161333-76f9jnsn/run-76f9jnsn.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7eeaff208bdfaaa1a85870ec1895a0c452bfe30d --- /dev/null +++ b/wandb_logs/run-20251122_161333-76f9jnsn/run-76f9jnsn.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd6b91c831f9df9526345cb41a6979eb703bf639ffedfdc4ff1c8c72d89f07ab +size 142669 diff --git a/wandb_logs/run-20251122_162012-g5qyspo9/files/config.yaml b/wandb_logs/run-20251122_162012-g5qyspo9/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be7b9cb1737a9dc6b7e19ee35ad5d3f3526902cd --- /dev/null +++ b/wandb_logs/run-20251122_162012-g5qyspo9/files/config.yaml @@ -0,0 +1,543 @@ +_name_or_path: + value: bert-base-multilingual-cased +_wandb: + value: + cli_version: 0.23.0 + e: + i8buzorts1ot7g0apehzlb5qg4vk4nic: + args: + - configs/bert_data3.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "49258479616" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T16:20:12.394707Z" + writerId: i8buzorts1ot7g0apehzlb5qg4vk4nic + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +directionality: + value: bidi +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dtype: + value: float32 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-bert-multilingual-data3 +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/bert-multilingual-data3/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: bert-base-multilingual-cased +model/num_parameters: + value: 177266693 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/bert-multilingual-data3 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +pooler_fc_size: + value: 768 +pooler_num_attention_heads: + value: 12 +pooler_num_fc_layers: + value: 3 +pooler_size_per_head: + value: 128 +pooler_type: + value: first_token_transform +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-bert-multilingual-data3 +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data3/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 119547 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_162012-g5qyspo9/files/output.log b/wandb_logs/run-20251122_162012-g5qyspo9/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7efb34a373eb7ff8052e3b10b46c832e01f33da2 --- /dev/null +++ b/wandb_logs/run-20251122_162012-g5qyspo9/files/output.log @@ -0,0 +1,81 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 741/741 [04:18<00:00, 2.86it/s] +{'loss': 1.605, 'grad_norm': 7.117066383361816, 'learning_rate': 0.0, 'epoch': 0.0} +{'loss': 1.1156, 'grad_norm': 2.9144561290740967, 'learning_rate': 1.3066666666666668e-05, 'epoch': 0.2} +{'loss': 0.3404, 'grad_norm': 17.36507225036621, 'learning_rate': 1.927927927927928e-05, 'epoch': 0.4} +{'loss': 0.1872, 'grad_norm': 4.454167366027832, 'learning_rate': 1.7777777777777777e-05, 'epoch': 0.61} +{'loss': 0.167, 'grad_norm': 3.682462692260742, 'learning_rate': 1.627627627627628e-05, 'epoch': 0.81} +Processing Files (3 / 3) : 100% 709M/709M [00:18<00:00, 39.3MB/s, 27.8MB/s ] +{'eval_loss': 0.10514671355485916, 'eval_precision': 0.9108744394618834, 'eval_recall': 0.9497369959088252, 'eval_f1': 0.9298998569384834, 'eval_accuracy': 0.9728603668769168, 'eval_runtime': 3.6556, 'eval_samples_per_second': 134.862, 'eval_steps_per_second': 4.377, 'epoch': 1.0} +{'loss': 0.1493, 'grad_norm': 1.5595707893371582, 'learning_rate': 1.4774774774774776e-05, 'epoch': 1.01} +{'loss': 0.0968, 'grad_norm': 2.235821008682251, 'learning_rate': 1.3273273273273274e-05, 'epoch': 1.21} +{'loss': 0.0727, 'grad_norm': 1.0842339992523193, 'learning_rate': 1.1771771771771771e-05, 'epoch': 1.42} +{'loss': 0.0868, 'grad_norm': 6.031585693359375, 'learning_rate': 1.027027027027027e-05, 'epoch': 1.62} +{'loss': 0.0701, 'grad_norm': 1.9359896183013916, 'learning_rate': 8.768768768768769e-06, 'epoch': 1.82} +{'eval_loss': 0.07907417416572571, 'eval_precision': 0.9501146788990825, 'eval_recall': 0.9684395090590298, 'eval_f1': 0.9591895803183791, 'eval_accuracy': 0.98026734563972, 'eval_runtime': 3.4462, 'eval_samples_per_second': 143.054, 'eval_steps_per_second': 4.643, 'epoch': 2.0} +{'loss': 0.0643, 'grad_norm': 3.1213998794555664, 'learning_rate': 7.267267267267268e-06, 'epoch': 2.02} +{'loss': 0.05, 'grad_norm': 7.348484516143799, 'learning_rate': 5.765765765765766e-06, 'epoch': 2.23} +{'loss': 0.048, 'grad_norm': 3.844130039215088, 'learning_rate': 4.264264264264265e-06, 'epoch': 2.43} +{'loss': 0.0333, 'grad_norm': 2.3236796855926514, 'learning_rate': 2.7627627627627628e-06, 'epoch': 2.63} +{'loss': 0.0487, 'grad_norm': 4.769405841827393, 'learning_rate': 1.2612612612612613e-06, 'epoch': 2.83} +{'eval_loss': 0.0766110047698021, 'eval_precision': 0.952819332566168, 'eval_recall': 0.9678550555230859, 'eval_f1': 0.9602783415482748, 'eval_accuracy': 0.9815404201145767, 'eval_runtime': 3.3752, 'eval_samples_per_second': 146.064, 'eval_steps_per_second': 4.74, 'epoch': 3.0} +{'train_runtime': 258.9215, 'train_samples_per_second': 45.651, 'train_steps_per_second': 2.862, 'train_loss': 0.17321264293351477, 'epoch': 3.0} +New Data Upload : 100% 709M/709M [00:18<00:00, 39.3MB/s, 27.8MB/s ] + ...l-data3/model.safetensors: 100% 709M/709M [00:17<00:00, 41.7MB/s] + ...28435.f0da10c9a12b.3876.0: 100% 10.4k/10.4k [00:09<00:00, 973B/s] + ...l-data3/training_args.bin: 100% 5.97k/5.97k [00:09<00:00, 559B/s] +Processing Files (3 / 3) : 100% 709M/709M [00:04<00:00, 154MB/s, 154MB/s ] +New Data Upload : | | 0.00B / 0.00B, 0.00B/s + ...l-data3/training_args.bin: 100% 5.97k/5.97k [00:04', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-bert-multilingual-data3', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 16:20:35,474 INFO MainThread:3876 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 177266693 - > +2025-11-22 16:20:35,474 INFO MainThread:3876 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 177266693 None +2025-11-22 16:26:03,731 INFO MainThread:3876 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/g5qyspo9 +2025-11-22 16:26:03,732 INFO MainThread:3876 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 16:26:03,732 INFO MainThread:3876 [wandb_run.py:_restore():2451] restore +2025-11-22 16:26:03,733 INFO MainThread:3876 [wandb_run.py:_restore():2457] restore done +2025-11-22 16:26:04,474 INFO MainThread:3876 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_162012-g5qyspo9/run-g5qyspo9.wandb b/wandb_logs/run-20251122_162012-g5qyspo9/run-g5qyspo9.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b294ca71377f0bb002061b5012d48f6fe3e73f95 --- /dev/null +++ b/wandb_logs/run-20251122_162012-g5qyspo9/run-g5qyspo9.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45424c5800ccbd541b5e07bb6ce03e35e88167d663310e93a419580f69ac50bd +size 236401 diff --git a/wandb_logs/run-20251122_162942-hn6dt5mn/files/config.yaml b/wandb_logs/run-20251122_162942-hn6dt5mn/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0cfe4abb4ab738b2bfec83862607c55f6f935e7d --- /dev/null +++ b/wandb_logs/run-20251122_162942-hn6dt5mn/files/config.yaml @@ -0,0 +1,542 @@ +_name_or_path: + value: ai4bharat/indic-bert +_wandb: + value: + cli_version: 0.23.0 + e: + en6drmm19e2tkcdyvyd08jg6sqs09azz: + args: + - configs/indic_data2.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "56366931968" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T16:29:42.811089Z" + writerId: en6drmm19e2tkcdyvyd08jg6sqs09azz + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: null +attention_probs_dropout_prob: + value: 0 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: 2 +chunk_size_feed_forward: + value: 0 +classifier_dropout_prob: + value: 0.1 +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +down_scale_factor: + value: 1 +dtype: + value: float32 +early_stopping: + value: false +embedding_size: + value: 128 +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 3 +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gap_size: + value: 0 +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-indic-bert-data2 +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +inner_group_num: + value: 1 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/indic-bert-data2/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: ai4bharat/indic-bert +model/num_parameters: + value: 32856837 +model_type: + value: albert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +net_structure_type: + value: 0 +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_groups: + value: 1 +num_hidden_layers: + value: 12 +num_memory_blocks: + value: 0 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/indic-bert-data2 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-indic-bert-data2 +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data2/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 200000 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_162942-hn6dt5mn/files/output.log b/wandb_logs/run-20251122_162942-hn6dt5mn/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..673705d431eb841d96628b5ab35153b3f54bead5 --- /dev/null +++ b/wandb_logs/run-20251122_162942-hn6dt5mn/files/output.log @@ -0,0 +1,83 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +config.json: 100% 507/507 [00:00<00:00, 2.16MB/s] +spiece.model: 100% 5.65M/5.65M [00:00<00:00, 27.4MB/s] +pytorch_model.bin: 100% 135M/135M [00:02<00:00, 64.2MB/s] +Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 492/492 [01:34<00:00, 5.23it/s] +{'loss': 1.5808, 'grad_norm': 8.045380592346191, 'learning_rate': 0.0, 'epoch': 0.01} +{'loss': 1.3, 'grad_norm': 3.5511159896850586, 'learning_rate': 1.9600000000000002e-05, 'epoch': 0.3} +{'loss': 0.6728, 'grad_norm': 5.11202335357666, 'learning_rate': 1.7782805429864253e-05, 'epoch': 0.61} +{'loss': 0.417, 'grad_norm': 1.9782381057739258, 'learning_rate': 1.552036199095023e-05, 'epoch': 0.91} +Processing Files (5 / 5) : 100% 152M/152M [00:01<00:00, 84.4MB/s, 95.3MB/s ] +{'eval_loss': 0.2685639560222626, 'eval_precision': 0.7524509803921569, 'eval_recall': 0.7181286549707603, 'eval_f1': 0.7348892878515858, 'eval_accuracy': 0.9420940970922251, 'eval_runtime': 2.0193, 'eval_samples_per_second': 161.439, 'eval_steps_per_second': 5.447, 'epoch': 1.0} +{'loss': 0.2787, 'grad_norm': 18.289146423339844, 'learning_rate': 1.32579185520362e-05, 'epoch': 1.22} +{'loss': 0.2075, 'grad_norm': 5.472610950469971, 'learning_rate': 1.0995475113122173e-05, 'epoch': 1.52} +{'loss': 0.1602, 'grad_norm': 3.2986786365509033, 'learning_rate': 8.733031674208145e-06, 'epoch': 1.83} +{'eval_loss': 0.1436997503042221, 'eval_precision': 0.8707403055229143, 'eval_recall': 0.8666666666666667, 'eval_f1': 0.8686987104337633, 'eval_accuracy': 0.9660551603644079, 'eval_runtime': 2.0934, 'eval_samples_per_second': 155.727, 'eval_steps_per_second': 5.255, 'epoch': 2.0} +{'loss': 0.1614, 'grad_norm': 2.817918539047241, 'learning_rate': 6.470588235294119e-06, 'epoch': 2.13} +{'loss': 0.1301, 'grad_norm': 3.798708915710449, 'learning_rate': 4.208144796380091e-06, 'epoch': 2.44} +{'loss': 0.132, 'grad_norm': 4.0428009033203125, 'learning_rate': 1.9457013574660634e-06, 'epoch': 2.74} +{'eval_loss': 0.0999612882733345, 'eval_precision': 0.8868360277136259, 'eval_recall': 0.8982456140350877, 'eval_f1': 0.8925043579314353, 'eval_accuracy': 0.9772869087732435, 'eval_runtime': 2.1245, 'eval_samples_per_second': 153.451, 'eval_steps_per_second': 5.178, 'epoch': 3.0} +{'train_runtime': 94.1349, 'train_samples_per_second': 83.147, 'train_steps_per_second': 5.227, 'train_loss': 0.3611367323049685, 'epoch': 3.0} +New Data Upload : 100% 15.2k/15.2k [00:01<00:00, 8.40kB/s, 9.48kB/s ] + ...t-data2/model.safetensors: 100% 131M/131M [00:01<00:00, 88.5MB/s] + ...bert-data2/tokenizer.json: 100% 15.3M/15.3M [00:01', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-indic-bert-data2', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 16:30:23,587 INFO MainThread:6429 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 32856837 - > +2025-11-22 16:30:23,587 INFO MainThread:6429 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 32856837 None +2025-11-22 16:32:11,129 INFO MainThread:6429 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/hn6dt5mn +2025-11-22 16:32:11,130 INFO MainThread:6429 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 16:32:11,130 INFO MainThread:6429 [wandb_run.py:_restore():2451] restore +2025-11-22 16:32:11,130 INFO MainThread:6429 [wandb_run.py:_restore():2457] restore done +2025-11-22 16:32:11,780 INFO MainThread:6429 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_162942-hn6dt5mn/run-hn6dt5mn.wandb b/wandb_logs/run-20251122_162942-hn6dt5mn/run-hn6dt5mn.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6e14fda7b9e5ddbf6c644f0b220bdaec9bf1b723 --- /dev/null +++ b/wandb_logs/run-20251122_162942-hn6dt5mn/run-hn6dt5mn.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70e3d93fea83156ec3ae5ccef88003e88f82c5057f9f59db3510eaa0704b3d4f +size 121050 diff --git a/wandb_logs/run-20251122_163550-m5him6ro/files/config.yaml b/wandb_logs/run-20251122_163550-m5him6ro/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e5418873a63518b968e9febb6593dbb1f4e8d24e --- /dev/null +++ b/wandb_logs/run-20251122_163550-m5him6ro/files/config.yaml @@ -0,0 +1,542 @@ +_name_or_path: + value: ai4bharat/indic-bert +_wandb: + value: + cli_version: 0.23.0 + e: + gtce5850qp01pnp8ld1prboplkugarcc: + args: + - configs/indic_data3.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "58045038592" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T16:35:50.520201Z" + writerId: gtce5850qp01pnp8ld1prboplkugarcc + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: null +attention_probs_dropout_prob: + value: 0 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: 2 +chunk_size_feed_forward: + value: 0 +classifier_dropout_prob: + value: 0.1 +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +down_scale_factor: + value: 1 +dtype: + value: float32 +early_stopping: + value: false +embedding_size: + value: 128 +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 3 +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gap_size: + value: 0 +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-indic-bert-data3 +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +inner_group_num: + value: 1 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/indic-bert-data3/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: ai4bharat/indic-bert +model/num_parameters: + value: 32856837 +model_type: + value: albert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +net_structure_type: + value: 0 +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_groups: + value: 1 +num_hidden_layers: + value: 12 +num_memory_blocks: + value: 0 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/indic-bert-data3 +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-indic-bert-data3 +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data3/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 200000 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_163550-m5him6ro/files/output.log b/wandb_logs/run-20251122_163550-m5him6ro/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7468752f39ec87996f950c7ff55ee21411467bb4 --- /dev/null +++ b/wandb_logs/run-20251122_163550-m5him6ro/files/output.log @@ -0,0 +1,85 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 741/741 [02:33<00:00, 4.83it/s] +{'loss': 1.5882, 'grad_norm': 4.100921630859375, 'learning_rate': 0.0, 'epoch': 0.0} +{'loss': 1.3882, 'grad_norm': 2.7958192825317383, 'learning_rate': 1.3066666666666668e-05, 'epoch': 0.2} +{'loss': 0.8038, 'grad_norm': 8.663116455078125, 'learning_rate': 1.927927927927928e-05, 'epoch': 0.4} +{'loss': 0.4655, 'grad_norm': 3.25374436378479, 'learning_rate': 1.7777777777777777e-05, 'epoch': 0.61} +{'loss': 0.3428, 'grad_norm': 11.548168182373047, 'learning_rate': 1.627627627627628e-05, 'epoch': 0.81} +Processing Files (5 / 5) : 100% 152M/152M [00:04<00:00, 34.5MB/s, 31.3MB/s ] +{'eval_loss': 0.3107636570930481, 'eval_precision': 0.8106646058732612, 'eval_recall': 0.8446054750402576, 'eval_f1': 0.8272870662460567, 'eval_accuracy': 0.9270868141415491, 'eval_runtime': 3.0795, 'eval_samples_per_second': 160.089, 'eval_steps_per_second': 5.196, 'epoch': 1.0} +{'loss': 0.319, 'grad_norm': 7.127461910247803, 'learning_rate': 1.4774774774774776e-05, 'epoch': 1.01} +{'loss': 0.2624, 'grad_norm': 10.869697570800781, 'learning_rate': 1.3273273273273274e-05, 'epoch': 1.21} +{'loss': 0.2253, 'grad_norm': 5.988554954528809, 'learning_rate': 1.1771771771771771e-05, 'epoch': 1.42} +{'loss': 0.2557, 'grad_norm': 5.968228816986084, 'learning_rate': 1.027027027027027e-05, 'epoch': 1.62} +{'loss': 0.209, 'grad_norm': 6.053709506988525, 'learning_rate': 8.768768768768769e-06, 'epoch': 1.82} +{'eval_loss': 0.20414960384368896, 'eval_precision': 0.868071818891491, 'eval_recall': 0.895330112721417, 'eval_f1': 0.8814902893380895, 'eval_accuracy': 0.9502105192808928, 'eval_runtime': 3.2317, 'eval_samples_per_second': 152.553, 'eval_steps_per_second': 4.951, 'epoch': 2.0} +{'loss': 0.1858, 'grad_norm': 4.224324703216553, 'learning_rate': 7.267267267267268e-06, 'epoch': 2.02} +{'loss': 0.1704, 'grad_norm': 3.3073840141296387, 'learning_rate': 5.765765765765766e-06, 'epoch': 2.23} +{'loss': 0.1874, 'grad_norm': 6.554600715637207, 'learning_rate': 4.264264264264265e-06, 'epoch': 2.43} +{'loss': 0.1498, 'grad_norm': 0.9887686967849731, 'learning_rate': 2.7627627627627628e-06, 'epoch': 2.63} +{'loss': 0.1745, 'grad_norm': 9.681267738342285, 'learning_rate': 1.2612612612612613e-06, 'epoch': 2.83} +{'eval_loss': 0.19094230234622955, 'eval_precision': 0.8835562549173879, 'eval_recall': 0.9041867954911433, 'eval_f1': 0.8937524870672503, 'eval_accuracy': 0.9577624807859386, 'eval_runtime': 3.1887, 'eval_samples_per_second': 154.608, 'eval_steps_per_second': 5.018, 'epoch': 3.0} +{'train_runtime': 153.333, 'train_samples_per_second': 77.087, 'train_steps_per_second': 4.833, 'train_loss': 0.3553515584201787, 'epoch': 3.0} +New Data Upload : 100% 131M/131M [00:04<00:00, 29.8MB/s, 31.3MB/s ] + ...c-bert-data3/spiece.model: 100% 5.65M/5.65M [00:04', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-indic-bert-data3', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 16:36:09,579 INFO MainThread:8125 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 32856837 - > +2025-11-22 16:36:09,579 INFO MainThread:8125 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 32856837 None +2025-11-22 16:38:59,697 INFO MainThread:8125 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/m5him6ro +2025-11-22 16:38:59,699 INFO MainThread:8125 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 16:38:59,700 INFO MainThread:8125 [wandb_run.py:_restore():2451] restore +2025-11-22 16:38:59,700 INFO MainThread:8125 [wandb_run.py:_restore():2457] restore done +2025-11-22 16:39:00,468 INFO MainThread:8125 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_163550-m5him6ro/run-m5him6ro.wandb b/wandb_logs/run-20251122_163550-m5him6ro/run-m5him6ro.wandb new file mode 100644 index 0000000000000000000000000000000000000000..379e0a213aba5f91dd1986bcfe4680fa315e3389 --- /dev/null +++ b/wandb_logs/run-20251122_163550-m5him6ro/run-m5him6ro.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e71810a380108502d979ec9d35ecd9a8bb8a8a9cdc803458b9e9d07c11a7c7b +size 171635 diff --git a/wandb_logs/run-20251122_164139-rj75j47x/files/config.yaml b/wandb_logs/run-20251122_164139-rj75j47x/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad70046ab4fca3a047f6b2e8c47a11a84d34f53c --- /dev/null +++ b/wandb_logs/run-20251122_164139-rj75j47x/files/config.yaml @@ -0,0 +1,533 @@ +_name_or_path: + value: xlm-roberta-base +_wandb: + value: + cli_version: 0.23.0 + e: + ll5488h2bm16bbxm8jypdywkqpjti88f: + args: + - configs/xlmr_data2.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "59443888128" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T16:41:39.004845Z" + writerId: ll5488h2bm16bbxm8jypdywkqpjti88f + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - XLMRobertaForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: 0 +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dtype: + value: float32 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 2 +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-xlm-roberta-data2 +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-05 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/xlm-roberta-data2/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 514 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: xlm-roberta-base +model/num_parameters: + value: 277456901 +model_type: + value: xlm-roberta +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/xlm-roberta-data2 +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 1 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-xlm-roberta-data2 +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data2/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 1 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 250002 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_164139-rj75j47x/files/output.log b/wandb_logs/run-20251122_164139-rj75j47x/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e72204298fbad2b7c5dd954fd40174dd344064fd --- /dev/null +++ b/wandb_logs/run-20251122_164139-rj75j47x/files/output.log @@ -0,0 +1,85 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +tokenizer_config.json: 100% 25.0/25.0 [00:00<00:00, 41.7kB/s] +config.json: 100% 615/615 [00:00<00:00, 2.84MB/s] +sentencepiece.bpe.model: 100% 5.07M/5.07M [00:00<00:00, 24.9MB/s] +tokenizer.json: 100% 9.10M/9.10M [00:00<00:00, 20.0MB/s] +model.safetensors: 100% 1.12G/1.12G [00:07<00:00, 149MB/s] +Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 492/492 [08:36<00:00, 1.05s/it] +{'loss': 1.4729, 'grad_norm': 9.59158706665039, 'learning_rate': 0.0, 'epoch': 0.01} +{'loss': 1.2473, 'grad_norm': 9.001952171325684, 'learning_rate': 1.9600000000000002e-05, 'epoch': 0.3} +{'loss': 0.4452, 'grad_norm': 3.5463058948516846, 'learning_rate': 1.7782805429864253e-05, 'epoch': 0.61} +{'loss': 0.2237, 'grad_norm': 3.1912636756896973, 'learning_rate': 1.552036199095023e-05, 'epoch': 0.91} +Processing Files (5 / 5) : 100% 1.13G/1.13G [00:09<00:00, 120MB/s, 123MB/s ] +{'eval_loss': 0.09719689190387726, 'eval_precision': 0.8812989921612542, 'eval_recall': 0.8582333696837514, 'eval_f1': 0.8696132596685083, 'eval_accuracy': 0.9728557263262271, 'eval_runtime': 2.044, 'eval_samples_per_second': 159.493, 'eval_steps_per_second': 5.382, 'epoch': 1.0} +{'loss': 0.12, 'grad_norm': 2.9382197856903076, 'learning_rate': 1.32579185520362e-05, 'epoch': 1.22} +{'loss': 0.1042, 'grad_norm': 4.940054893493652, 'learning_rate': 1.0995475113122173e-05, 'epoch': 1.52} +{'loss': 0.0868, 'grad_norm': 3.1511685848236084, 'learning_rate': 8.733031674208145e-06, 'epoch': 1.83} +{'eval_loss': 0.0509815439581871, 'eval_precision': 0.9203444564047363, 'eval_recall': 0.9323882224645583, 'eval_f1': 0.9263271939328277, 'eval_accuracy': 0.985126425384234, 'eval_runtime': 2.1461, 'eval_samples_per_second': 151.9, 'eval_steps_per_second': 5.125, 'epoch': 2.0} +{'loss': 0.09, 'grad_norm': 1.499419927597046, 'learning_rate': 6.470588235294119e-06, 'epoch': 2.13} +{'loss': 0.0611, 'grad_norm': 2.84587025642395, 'learning_rate': 4.208144796380091e-06, 'epoch': 2.44} +{'loss': 0.069, 'grad_norm': 3.2826921939849854, 'learning_rate': 1.9457013574660634e-06, 'epoch': 2.74} +{'eval_loss': 0.04660652577877045, 'eval_precision': 0.9352051835853131, 'eval_recall': 0.9443838604143948, 'eval_f1': 0.9397721106890937, 'eval_accuracy': 0.9871095686663361, 'eval_runtime': 2.2535, 'eval_samples_per_second': 144.666, 'eval_steps_per_second': 4.881, 'epoch': 3.0} +{'train_runtime': 516.0982, 'train_samples_per_second': 15.166, 'train_steps_per_second': 0.953, 'train_loss': 0.253468589569495, 'epoch': 3.0} +New Data Upload : 100% 8.39M/8.39M [00:09<00:00, 891kB/s, 912kB/s ] + ...2/sentencepiece.bpe.model: 100% 5.07M/5.07M [00:08', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-xlm-roberta-data2', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 16:42:00,258 INFO MainThread:9760 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 277456901 - > +2025-11-22 16:42:00,258 INFO MainThread:9760 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 277456901 None +2025-11-22 16:52:44,371 INFO MainThread:9760 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/rj75j47x +2025-11-22 16:52:44,373 INFO MainThread:9760 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 16:52:44,373 INFO MainThread:9760 [wandb_run.py:_restore():2451] restore +2025-11-22 16:52:44,373 INFO MainThread:9760 [wandb_run.py:_restore():2457] restore done +2025-11-22 16:52:44,969 INFO MainThread:9760 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_164139-rj75j47x/run-rj75j47x.wandb b/wandb_logs/run-20251122_164139-rj75j47x/run-rj75j47x.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c80021feb17af06f9017b0cf59c711c2407c0f71 --- /dev/null +++ b/wandb_logs/run-20251122_164139-rj75j47x/run-rj75j47x.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3727d57746d602f369ae40d2e41f83dea31f54760599b1b8de88014bc85c5a6 +size 233958 diff --git a/wandb_logs/run-20251122_165422-97r1a8g4/files/config.yaml b/wandb_logs/run-20251122_165422-97r1a8g4/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..998de5caac948eb142d3ed3465274b3e07965ce7 --- /dev/null +++ b/wandb_logs/run-20251122_165422-97r1a8g4/files/config.yaml @@ -0,0 +1,533 @@ +_name_or_path: + value: xlm-roberta-base +_wandb: + value: + cli_version: 0.23.0 + e: + wu3bakb5gsgc051uyeywl1wcka05ad54: + args: + - configs/xlmr_data3.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "71776595968" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T16:54:22.500581Z" + writerId: wu3bakb5gsgc051uyeywl1wcka05ad54 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - XLMRobertaForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: 0 +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dtype: + value: float32 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 2 +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-xlm-roberta-data3 +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-05 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/xlm-roberta-data3/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 514 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: xlm-roberta-base +model/num_parameters: + value: 277456901 +model_type: + value: xlm-roberta +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/xlm-roberta-data3 +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 1 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-xlm-roberta-data3 +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data3/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 1 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 250002 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_165422-97r1a8g4/files/output.log b/wandb_logs/run-20251122_165422-97r1a8g4/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4b65415b7bbc044fdf7772a34555f9fdd8e2d566 --- /dev/null +++ b/wandb_logs/run-20251122_165422-97r1a8g4/files/output.log @@ -0,0 +1,85 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 741/741 [04:48<00:00, 2.57it/s] +{'loss': 1.5097, 'grad_norm': 3.99880313873291, 'learning_rate': 0.0, 'epoch': 0.0} +{'loss': 1.3614, 'grad_norm': 4.401490211486816, 'learning_rate': 1.3066666666666668e-05, 'epoch': 0.2} +{'loss': 0.5723, 'grad_norm': 11.96840763092041, 'learning_rate': 1.927927927927928e-05, 'epoch': 0.4} +{'loss': 0.2905, 'grad_norm': 9.415050506591797, 'learning_rate': 1.7777777777777777e-05, 'epoch': 0.61} +{'loss': 0.2012, 'grad_norm': 3.9910802841186523, 'learning_rate': 1.627627627627628e-05, 'epoch': 0.81} +Processing Files (5 / 5) : 100% 1.13G/1.13G [00:15<00:00, 73.1MB/s, 86.0MB/s ] +{'eval_loss': 0.14863422513008118, 'eval_precision': 0.8770358306188925, 'eval_recall': 0.916595744680851, 'eval_f1': 0.8963795255930088, 'eval_accuracy': 0.9518670910643584, 'eval_runtime': 2.6156, 'eval_samples_per_second': 188.484, 'eval_steps_per_second': 6.117, 'epoch': 1.0} +{'loss': 0.1939, 'grad_norm': 2.7964694499969482, 'learning_rate': 1.4774774774774776e-05, 'epoch': 1.01} +{'loss': 0.1365, 'grad_norm': 3.729452133178711, 'learning_rate': 1.3273273273273274e-05, 'epoch': 1.21} +{'loss': 0.1031, 'grad_norm': 4.448157787322998, 'learning_rate': 1.1771771771771771e-05, 'epoch': 1.42} +{'loss': 0.109, 'grad_norm': 10.762557029724121, 'learning_rate': 1.027027027027027e-05, 'epoch': 1.62} +{'loss': 0.1059, 'grad_norm': 3.0850305557250977, 'learning_rate': 8.768768768768769e-06, 'epoch': 1.82} +{'eval_loss': 0.08226888626813889, 'eval_precision': 0.9264828738512949, 'eval_recall': 0.9438297872340425, 'eval_f1': 0.9350758853288365, 'eval_accuracy': 0.9799704991848459, 'eval_runtime': 2.7019, 'eval_samples_per_second': 182.467, 'eval_steps_per_second': 5.922, 'epoch': 2.0} +{'loss': 0.0958, 'grad_norm': 3.639763355255127, 'learning_rate': 7.267267267267268e-06, 'epoch': 2.02} +{'loss': 0.0754, 'grad_norm': 5.485688209533691, 'learning_rate': 5.765765765765766e-06, 'epoch': 2.23} +{'loss': 0.0861, 'grad_norm': 10.797595977783203, 'learning_rate': 4.264264264264265e-06, 'epoch': 2.43} +{'loss': 0.0597, 'grad_norm': 3.6002562046051025, 'learning_rate': 2.7627627627627628e-06, 'epoch': 2.63} +{'loss': 0.0824, 'grad_norm': 4.358560085296631, 'learning_rate': 1.2612612612612613e-06, 'epoch': 2.83} +{'eval_loss': 0.09313274919986725, 'eval_precision': 0.9378673383711167, 'eval_recall': 0.9506382978723404, 'eval_f1': 0.9442096365173288, 'eval_accuracy': 0.9788059933234997, 'eval_runtime': 2.877, 'eval_samples_per_second': 171.36, 'eval_steps_per_second': 5.561, 'epoch': 3.0} +{'train_runtime': 288.7595, 'train_samples_per_second': 40.934, 'train_steps_per_second': 2.566, 'train_loss': 0.23847815108846235, 'epoch': 3.0} +New Data Upload : 100% 1.11G/1.11G [00:15<00:00, 71.7MB/s, 86.0MB/s ] + ...3/sentencepiece.bpe.model: 100% 5.07M/5.07M [00:15', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-xlm-roberta-data3', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 16:54:47,806 INFO MainThread:13085 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 277456901 - > +2025-11-22 16:54:47,807 INFO MainThread:13085 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 277456901 None +2025-11-22 17:00:47,406 INFO MainThread:13085 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/97r1a8g4 +2025-11-22 17:00:47,408 INFO MainThread:13085 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 17:00:47,409 INFO MainThread:13085 [wandb_run.py:_restore():2451] restore +2025-11-22 17:00:47,409 INFO MainThread:13085 [wandb_run.py:_restore():2457] restore done +2025-11-22 17:00:48,048 INFO MainThread:13085 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_165422-97r1a8g4/run-97r1a8g4.wandb b/wandb_logs/run-20251122_165422-97r1a8g4/run-97r1a8g4.wandb new file mode 100644 index 0000000000000000000000000000000000000000..c8c62b2e68d3ff9cd25367cbf13e0d8ce1aa2522 --- /dev/null +++ b/wandb_logs/run-20251122_165422-97r1a8g4/run-97r1a8g4.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca333aa343ef958e8f3f9755bf2578871bf5bc2dd018d0f35b9405d414e62b71 +size 278819 diff --git a/wandb_logs/run-20251122_170227-g5jfi5ko/files/config.yaml b/wandb_logs/run-20251122_170227-g5jfi5ko/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53029abe2951e5366e82618573950948520952e6 --- /dev/null +++ b/wandb_logs/run-20251122_170227-g5jfi5ko/files/config.yaml @@ -0,0 +1,533 @@ +_name_or_path: + value: xlm-roberta-base +_wandb: + value: + cli_version: 0.23.0 + e: + vl08xkypjfm7ys6t847adb3i88g7qcrv: + args: + - configs/xlm_roberta.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "82966093824" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T17:02:27.828566Z" + writerId: vl08xkypjfm7ys6t847adb3i88g7qcrv + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - XLMRobertaForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: 0 +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dtype: + value: float32 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 2 +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-xlm-roberta +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-05 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/xlm-roberta/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 514 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: xlm-roberta-base +model/num_parameters: + value: 277456901 +model_type: + value: xlm-roberta +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/xlm-roberta +output_hidden_states: + value: false +output_past: + value: true +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 1 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-xlm-roberta +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 1 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 250002 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_170227-g5jfi5ko/files/output.log b/wandb_logs/run-20251122_170227-g5jfi5ko/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..2c8b11eaa8e84e38661d1befd222fd8dd8fb20d3 --- /dev/null +++ b/wandb_logs/run-20251122_170227-g5jfi5ko/files/output.log @@ -0,0 +1,86 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 750/750 [05:04<00:00, 2.46it/s] +{'loss': 1.506, 'grad_norm': 6.7831034660339355, 'learning_rate': 0.0, 'epoch': 0.0} +{'loss': 1.2773, 'grad_norm': 6.730624198913574, 'learning_rate': 1.3066666666666668e-05, 'epoch': 0.2} +{'loss': 0.5892, 'grad_norm': 5.595107078552246, 'learning_rate': 1.928888888888889e-05, 'epoch': 0.4} +{'loss': 0.3443, 'grad_norm': 2.822570562362671, 'learning_rate': 1.780740740740741e-05, 'epoch': 0.6} +{'loss': 0.2372, 'grad_norm': 5.8036041259765625, 'learning_rate': 1.632592592592593e-05, 'epoch': 0.8} +{'loss': 0.2274, 'grad_norm': 4.149110794067383, 'learning_rate': 1.4844444444444445e-05, 'epoch': 1.0} +Processing Files (5 / 5) : 100% 1.13G/1.13G [00:16<00:00, 69.5MB/s, 78.3MB/s ] +{'eval_loss': 0.22519312798976898, 'eval_precision': 0.8584842387659289, 'eval_recall': 0.8030112923462986, 'eval_f1': 0.8298217179902756, 'eval_accuracy': 0.943464994066791, 'eval_runtime': 2.5974, 'eval_samples_per_second': 192.5, 'eval_steps_per_second': 6.16, 'epoch': 1.0} +{'loss': 0.155, 'grad_norm': 6.901891708374023, 'learning_rate': 1.3362962962962964e-05, 'epoch': 1.2} +{'loss': 0.1719, 'grad_norm': 7.024008750915527, 'learning_rate': 1.1881481481481482e-05, 'epoch': 1.4} +{'loss': 0.1823, 'grad_norm': 2.771585464477539, 'learning_rate': 1.04e-05, 'epoch': 1.6} +{'loss': 0.163, 'grad_norm': 6.764422416687012, 'learning_rate': 8.91851851851852e-06, 'epoch': 1.8} +{'loss': 0.1323, 'grad_norm': 5.9319233894348145, 'learning_rate': 7.437037037037038e-06, 'epoch': 2.0} +{'eval_loss': 0.16934768855571747, 'eval_precision': 0.8376014990630856, 'eval_recall': 0.8412797992471769, 'eval_f1': 0.83943661971831, 'eval_accuracy': 0.952449567723343, 'eval_runtime': 2.8314, 'eval_samples_per_second': 176.589, 'eval_steps_per_second': 5.651, 'epoch': 2.0} +{'loss': 0.1191, 'grad_norm': 4.590412139892578, 'learning_rate': 5.955555555555555e-06, 'epoch': 2.2} +{'loss': 0.1166, 'grad_norm': 5.152184963226318, 'learning_rate': 4.4740740740740746e-06, 'epoch': 2.4} +{'loss': 0.1249, 'grad_norm': 2.571276903152466, 'learning_rate': 2.992592592592593e-06, 'epoch': 2.6} +{'loss': 0.0956, 'grad_norm': 5.860950946807861, 'learning_rate': 1.5111111111111112e-06, 'epoch': 2.8} +{'loss': 0.1022, 'grad_norm': 2.2289910316467285, 'learning_rate': 2.9629629629629632e-08, 'epoch': 3.0} +{'eval_loss': 0.15839220583438873, 'eval_precision': 0.8560700876095119, 'eval_recall': 0.8582183186951067, 'eval_f1': 0.8571428571428571, 'eval_accuracy': 0.9594846584166808, 'eval_runtime': 2.5876, 'eval_samples_per_second': 193.229, 'eval_steps_per_second': 6.183, 'epoch': 3.0} +{'train_runtime': 304.436, 'train_samples_per_second': 39.417, 'train_steps_per_second': 2.464, 'train_loss': 0.26952009137471516, 'epoch': 3.0} +New Data Upload : 100% 1.11G/1.11G [00:16<00:00, 68.1MB/s, 78.3MB/s ] + ...roberta/training_args.bin: 100% 5.91k/5.91k [00:16', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-xlm-roberta', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 17:05:35,951 INFO MainThread:15297 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 277456901 - > +2025-11-22 17:05:35,951 INFO MainThread:15297 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 277456901 None +2025-11-22 17:12:04,129 INFO MainThread:15297 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/g5jfi5ko +2025-11-22 17:12:04,131 INFO MainThread:15297 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 17:12:04,131 INFO MainThread:15297 [wandb_run.py:_restore():2451] restore +2025-11-22 17:12:04,131 INFO MainThread:15297 [wandb_run.py:_restore():2457] restore done +2025-11-22 17:12:04,760 INFO MainThread:15297 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_170227-g5jfi5ko/run-g5jfi5ko.wandb b/wandb_logs/run-20251122_170227-g5jfi5ko/run-g5jfi5ko.wandb new file mode 100644 index 0000000000000000000000000000000000000000..18a10baff11ef6f462cfcc29ef00d1b9907ffa08 --- /dev/null +++ b/wandb_logs/run-20251122_170227-g5jfi5ko/run-g5jfi5ko.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fee5f0537097f1e1b7c78b5d33aaddcb6e7a12fa6c96b869916ba741e6c11c +size 322782 diff --git a/wandb_logs/run-20251122_171322-7w55arw9/files/config.yaml b/wandb_logs/run-20251122_171322-7w55arw9/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af74fc7d1ef0f17ebc8c8776dfeb60e32855a261 --- /dev/null +++ b/wandb_logs/run-20251122_171322-7w55arw9/files/config.yaml @@ -0,0 +1,542 @@ +_name_or_path: + value: ai4bharat/indic-bert +_wandb: + value: + cli_version: 0.23.0 + e: + b5eqfyunt3b815qqvaxh4xx1c5xkxk97: + args: + - configs/indic_bert.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "94155644928" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T17:13:22.744450Z" + writerId: b5eqfyunt3b815qqvaxh4xx1c5xkxk97 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: null +attention_probs_dropout_prob: + value: 0 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: 2 +chunk_size_feed_forward: + value: 0 +classifier_dropout_prob: + value: 0.1 +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +down_scale_factor: + value: 1 +dtype: + value: float32 +early_stopping: + value: false +embedding_size: + value: 128 +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: 3 +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gap_size: + value: 0 +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-indic-bert +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +inner_group_num: + value: 1 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/indic-bert/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: ai4bharat/indic-bert +model/num_parameters: + value: 32856837 +model_type: + value: albert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +net_structure_type: + value: 0 +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_groups: + value: 1 +num_hidden_layers: + value: 12 +num_memory_blocks: + value: 0 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/indic-bert +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-indic-bert +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 200000 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_171322-7w55arw9/files/output.log b/wandb_logs/run-20251122_171322-7w55arw9/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..955b4a2212898fbc69cc815f5ee8b5e073f2b26f --- /dev/null +++ b/wandb_logs/run-20251122_171322-7w55arw9/files/output.log @@ -0,0 +1,86 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 750/750 [02:32<00:00, 4.93it/s] +{'loss': 1.574, 'grad_norm': 6.119658470153809, 'learning_rate': 0.0, 'epoch': 0.0} +{'loss': 1.3107, 'grad_norm': 5.074459075927734, 'learning_rate': 1.3066666666666668e-05, 'epoch': 0.2} +{'loss': 0.6553, 'grad_norm': 3.4850027561187744, 'learning_rate': 1.928888888888889e-05, 'epoch': 0.4} +{'loss': 0.4389, 'grad_norm': 3.8653061389923096, 'learning_rate': 1.780740740740741e-05, 'epoch': 0.6} +{'loss': 0.3126, 'grad_norm': 4.141387939453125, 'learning_rate': 1.632592592592593e-05, 'epoch': 0.8} +{'loss': 0.2768, 'grad_norm': 4.265642166137695, 'learning_rate': 1.4844444444444445e-05, 'epoch': 1.0} +Processing Files (5 / 5) : 100% 152M/152M [00:05<00:00, 29.1MB/s, 29.5MB/s ] +{'eval_loss': 0.2721101641654968, 'eval_precision': 0.8526984126984127, 'eval_recall': 0.7826340326340326, 'eval_f1': 0.8161652993011242, 'eval_accuracy': 0.9334136546184739, 'eval_runtime': 3.0672, 'eval_samples_per_second': 163.015, 'eval_steps_per_second': 5.216, 'epoch': 1.0} +{'loss': 0.2092, 'grad_norm': 9.980952262878418, 'learning_rate': 1.3362962962962964e-05, 'epoch': 1.2} +{'loss': 0.2157, 'grad_norm': 7.169114589691162, 'learning_rate': 1.1881481481481482e-05, 'epoch': 1.4} +{'loss': 0.2392, 'grad_norm': 1.4673056602478027, 'learning_rate': 1.04e-05, 'epoch': 1.6} +{'loss': 0.2242, 'grad_norm': 6.788226127624512, 'learning_rate': 8.91851851851852e-06, 'epoch': 1.8} +{'loss': 0.1821, 'grad_norm': 5.825960159301758, 'learning_rate': 7.437037037037038e-06, 'epoch': 2.0} +{'eval_loss': 0.21055959165096283, 'eval_precision': 0.868125, 'eval_recall': 0.8094405594405595, 'eval_f1': 0.8377563329312425, 'eval_accuracy': 0.9428112449799196, 'eval_runtime': 2.8781, 'eval_samples_per_second': 173.727, 'eval_steps_per_second': 5.559, 'epoch': 2.0} +{'loss': 0.1756, 'grad_norm': 3.8757948875427246, 'learning_rate': 5.955555555555555e-06, 'epoch': 2.2} +{'loss': 0.1646, 'grad_norm': 18.613061904907227, 'learning_rate': 4.4740740740740746e-06, 'epoch': 2.4} +{'loss': 0.177, 'grad_norm': 3.853347063064575, 'learning_rate': 2.992592592592593e-06, 'epoch': 2.6} +{'loss': 0.1657, 'grad_norm': 2.502561569213867, 'learning_rate': 1.5111111111111112e-06, 'epoch': 2.8} +{'loss': 0.1492, 'grad_norm': 2.574828624725342, 'learning_rate': 2.9629629629629632e-08, 'epoch': 3.0} +{'eval_loss': 0.19222867488861084, 'eval_precision': 0.847682119205298, 'eval_recall': 0.8205128205128205, 'eval_f1': 0.8338762214983713, 'eval_accuracy': 0.9481124497991967, 'eval_runtime': 3.0562, 'eval_samples_per_second': 163.604, 'eval_steps_per_second': 5.235, 'epoch': 3.0} +{'train_runtime': 152.1287, 'train_samples_per_second': 78.881, 'train_steps_per_second': 4.93, 'train_loss': 0.32680103063583377, 'epoch': 3.0} +New Data Upload : 100% 131M/131M [00:05<00:00, 25.1MB/s, 26.5MB/s ] + ...ic-bert/training_args.bin: 100% 5.91k/5.91k [00:04', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-indic-bert', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 17:13:38,525 INFO MainThread:18129 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 32856837 - > +2025-11-22 17:13:38,525 INFO MainThread:18129 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 32856837 None +2025-11-22 17:16:33,829 INFO MainThread:18129 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/7w55arw9 +2025-11-22 17:16:33,830 INFO MainThread:18129 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 17:16:33,830 INFO MainThread:18129 [wandb_run.py:_restore():2451] restore +2025-11-22 17:16:33,830 INFO MainThread:18129 [wandb_run.py:_restore():2457] restore done +2025-11-22 17:16:34,600 INFO MainThread:18129 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_171322-7w55arw9/run-7w55arw9.wandb b/wandb_logs/run-20251122_171322-7w55arw9/run-7w55arw9.wandb new file mode 100644 index 0000000000000000000000000000000000000000..76fbece21c475d7649b3e2bcbbf85e7b8fc506ee --- /dev/null +++ b/wandb_logs/run-20251122_171322-7w55arw9/run-7w55arw9.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a5a63a323e70de41e119860597dad6a8704f8d2affef927f1f98a2c79897a1 +size 175689 diff --git a/wandb_logs/run-20251122_172103-peygrszr/files/config.yaml b/wandb_logs/run-20251122_172103-peygrszr/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e9a90cb7c697cb9daedf31a02ea11e301738dde --- /dev/null +++ b/wandb_logs/run-20251122_172103-peygrszr/files/config.yaml @@ -0,0 +1,543 @@ +_name_or_path: + value: bert-base-multilingual-cased +_wandb: + value: + cli_version: 0.23.0 + e: + ddbafaz7ripke8vjqssbknxqxuagn5b3: + args: + - configs/bert_multilingual.json + codePath: train.py + codePathLocal: train.py + cpu_count: 1 + cpu_count_logical: 2 + cudaVersion: "12.4" + disk: + /: + total: "120942624768" + used: "95554514944" + email: vijaysinghkushwaha3737@gmail.com + executable: /usr/bin/python3 + git: + commit: de0702477fe9412790ab872e2d1361a32e068b2d + remote: https://h3110Fr13nd:@github.com/h3110Fr13nd/code-switch-identification-model.git + gpu: Tesla T4 + gpu_count: 1 + gpu_nvidia: + - architecture: Turing + cudaCores: 2560 + memoryTotal: "16106127360" + name: Tesla T4 + uuid: GPU-d740f133-e3a8-f36c-28a2-52b7659451f1 + host: f0da10c9a12b + memory: + total: "13605842944" + os: Linux-6.6.105+-x86_64-with-glibc2.35 + program: /content/code-switch-identification-model/train.py + python: CPython 3.12.12 + root: /content/code-switch-identification-model + startedAt: "2025-11-22T17:21:03.021588Z" + writerId: ddbafaz7ripke8vjqssbknxqxuagn5b3 + m: + - "1": train/global_step + "6": + - 3 + "7": [] + - "2": '*' + "5": 1 + "6": + - 1 + "7": [] + python_version: 3.12.12 + t: + "1": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "2": + - 1 + - 2 + - 3 + - 5 + - 11 + - 12 + - 41 + - 49 + - 51 + - 53 + - 71 + - 98 + - 100 + - 105 + "3": + - 2 + - 7 + - 13 + - 15 + - 16 + - 19 + - 62 + - 66 + "4": 3.12.12 + "5": 0.23.0 + "6": 4.57.1 + "9": + "1": transformers_trainer + "12": 0.23.0 + "13": linux-x86_64 +accelerator_config: + value: + dispatch_batches: null + even_batches: true + gradient_accumulation_kwargs: null + non_blocking: false + split_batches: false + use_seedable_sampler: true +adafactor: + value: false +adam_beta1: + value: 0.9 +adam_beta2: + value: 0.999 +adam_epsilon: + value: 1e-08 +add_cross_attention: + value: false +architectures: + value: + - BertForMaskedLM +attention_probs_dropout_prob: + value: 0.1 +auto_find_batch_size: + value: false +average_tokens_across_devices: + value: true +bad_words_ids: + value: null +batch_eval_metrics: + value: false +batch_size: + value: 16 +begin_suppress_tokens: + value: null +bf16: + value: false +bf16_full_eval: + value: false +bos_token_id: + value: null +chunk_size_feed_forward: + value: 0 +classifier_dropout: + value: null +cross_attention_hidden_size: + value: null +data_seed: + value: null +dataloader_drop_last: + value: false +dataloader_num_workers: + value: 0 +dataloader_persistent_workers: + value: false +dataloader_pin_memory: + value: true +dataloader_prefetch_factor: + value: null +ddp_backend: + value: null +ddp_broadcast_buffers: + value: null +ddp_bucket_cap_mb: + value: null +ddp_find_unused_parameters: + value: null +ddp_timeout: + value: 1800 +debug: + value: [] +decoder_start_token_id: + value: null +deepspeed: + value: null +directionality: + value: bidi +disable_tqdm: + value: false +diversity_penalty: + value: 0 +do_eval: + value: true +do_predict: + value: false +do_sample: + value: false +do_train: + value: false +dtype: + value: float32 +early_stopping: + value: false +encoder_no_repeat_ngram_size: + value: 0 +eos_token_id: + value: null +epochs: + value: 3 +eval_accumulation_steps: + value: null +eval_delay: + value: 0 +eval_do_concat_batches: + value: true +eval_on_start: + value: false +eval_steps: + value: null +eval_strategy: + value: epoch +eval_use_gather_object: + value: false +exponential_decay_length_penalty: + value: null +finetuning_task: + value: null +forced_bos_token_id: + value: null +forced_eos_token_id: + value: null +fp16: + value: false +fp16_backend: + value: auto +fp16_full_eval: + value: false +fp16_opt_level: + value: O1 +fsdp: + value: [] +fsdp_config: + value: + min_num_params: 0 + xla: false + xla_fsdp_grad_ckpt: false + xla_fsdp_v2: false +fsdp_min_num_params: + value: 0 +fsdp_transformer_layer_cls_to_wrap: + value: null +full_determinism: + value: false +gradient_accumulation_steps: + value: 1 +gradient_checkpointing: + value: false +gradient_checkpointing_kwargs: + value: null +greater_is_better: + value: true +group_by_length: + value: false +half_precision_backend: + value: auto +hidden_act: + value: gelu +hidden_dropout_prob: + value: 0.1 +hidden_size: + value: 768 +hub_always_push: + value: false +hub_model_id: + value: h3110Fr13nd/guj-eng-code-switch-bert-multilingual +hub_private_repo: + value: false +hub_revision: + value: null +hub_strategy: + value: end +hub_token: + value: +id2label: + value: + "0": O + "1": B-GUJ + "2": I-GUJ + "3": B-ENG + "4": I-ENG +ignore_data_skip: + value: false +include_for_metrics: + value: [] +include_inputs_for_metrics: + value: false +include_num_input_tokens_seen: + value: "no" +include_tokens_per_second: + value: false +initializer_range: + value: 0.02 +intermediate_size: + value: 3072 +is_decoder: + value: false +is_encoder_decoder: + value: false +jit_mode_eval: + value: false +label_names: + value: null +label_smoothing_factor: + value: 0 +label2id: + value: + B-ENG: 3 + B-GUJ: 1 + I-ENG: 4 + I-GUJ: 2 + O: 0 +layer_norm_eps: + value: 1e-12 +learning_rate: + value: 2e-05 +length_column_name: + value: length +length_penalty: + value: 1 +liger_kernel_config: + value: null +load_best_model_at_end: + value: true +local_rank: + value: 0 +log_level: + value: passive +log_level_replica: + value: warning +log_on_each_node: + value: true +logging_dir: + value: ./models/bert-multilingual/logs +logging_first_step: + value: true +logging_nan_inf_filter: + value: true +logging_steps: + value: 50 +logging_strategy: + value: steps +lr_scheduler_type: + value: linear +max_grad_norm: + value: 1 +max_length: + value: 20 +max_position_embeddings: + value: 512 +max_steps: + value: -1 +metric_for_best_model: + value: f1 +min_length: + value: 0 +model: + value: bert-base-multilingual-cased +model/num_parameters: + value: 177266693 +model_type: + value: bert +mp_parameters: + value: "" +neftune_noise_alpha: + value: null +no_cuda: + value: false +no_repeat_ngram_size: + value: 0 +num_attention_heads: + value: 12 +num_beam_groups: + value: 1 +num_beams: + value: 1 +num_hidden_layers: + value: 12 +num_return_sequences: + value: 1 +num_train_epochs: + value: 3 +optim: + value: adamw_torch_fused +optim_args: + value: null +optim_target_modules: + value: null +output_attentions: + value: false +output_dir: + value: ./models/bert-multilingual +output_hidden_states: + value: false +output_scores: + value: false +overwrite_output_dir: + value: true +pad_token_id: + value: 0 +parallelism_config: + value: null +past_index: + value: -1 +per_device_eval_batch_size: + value: 32 +per_device_train_batch_size: + value: 16 +per_gpu_eval_batch_size: + value: null +per_gpu_train_batch_size: + value: null +pooler_fc_size: + value: 768 +pooler_num_attention_heads: + value: 12 +pooler_num_fc_layers: + value: 3 +pooler_size_per_head: + value: 128 +pooler_type: + value: first_token_transform +position_embedding_type: + value: absolute +prediction_loss_only: + value: false +prefix: + value: null +problem_type: + value: null +project: + value: huggingface +push_to_hub: + value: true +push_to_hub_model_id: + value: guj-eng-code-switch-bert-multilingual +push_to_hub_organization: + value: null +push_to_hub_token: + value: +ray_scope: + value: last +remove_invalid_values: + value: false +remove_unused_columns: + value: true +repetition_penalty: + value: 1 +report_to: + value: + - tensorboard + - wandb +restore_callback_states_from_checkpoint: + value: false +resume_from_checkpoint: + value: null +return_dict: + value: true +return_dict_in_generate: + value: false +run_name: + value: null +save_on_each_node: + value: false +save_only_model: + value: false +save_safetensors: + value: true +save_steps: + value: null +save_strategy: + value: epoch +save_total_limit: + value: 3 +seed: + value: 42 +sep_token_id: + value: null +skip_memory_metrics: + value: true +suppress_tokens: + value: null +task_specific_params: + value: null +temperature: + value: 1 +tf_legacy_loss: + value: false +tf32: + value: null +tie_encoder_decoder: + value: false +tie_word_embeddings: + value: true +tokenizer_class: + value: null +top_k: + value: 50 +top_p: + value: 1 +torch_compile: + value: false +torch_compile_backend: + value: null +torch_compile_mode: + value: null +torch_empty_cache_steps: + value: null +torchdynamo: + value: null +torchscript: + value: false +tpu_metrics_debug: + value: false +tpu_num_cores: + value: null +trackio_space_id: + value: trackio +train_file: + value: synthetic_data_gen/data/train.txt +transformers_version: + value: 4.57.1 +type_vocab_size: + value: 2 +typical_p: + value: 1 +use_bfloat16: + value: false +use_cache: + value: true +use_cpu: + value: false +use_legacy_prediction_loop: + value: false +use_liger_kernel: + value: false +use_mps_device: + value: false +vocab_size: + value: 119547 +warmup_ratio: + value: 0.1 +warmup_steps: + value: 0 +weight_decay: + value: 0.01 diff --git a/wandb_logs/run-20251122_172103-peygrszr/files/output.log b/wandb_logs/run-20251122_172103-peygrszr/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..694f46d3cf09a71e0103e7effcb88fb03f1b131f --- /dev/null +++ b/wandb_logs/run-20251122_172103-peygrszr/files/output.log @@ -0,0 +1,82 @@ + + _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_| + _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_| + _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _| + _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_| +Enter your token (input will not be visible): + +Cannot authenticate through git-credential as no helper is defined on your machine. +You might have to re-authenticate when pushing to the Hugging Face Hub. +Run the following command in your terminal in case you want to set the 'store' credential helper as default. + +git config --global credential.helper store + +Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details. +Token has not been saved to git credential helper. +WARNING:huggingface_hub._login:Token has not been saved to git credential helper. +Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight'] +You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. +100% 750/750 [03:37<00:00, 3.45it/s] +{'loss': 1.6231, 'grad_norm': 9.33836555480957, 'learning_rate': 0.0, 'epoch': 0.0} +{'loss': 1.0493, 'grad_norm': 4.152083396911621, 'learning_rate': 1.3066666666666668e-05, 'epoch': 0.2} +{'loss': 0.4348, 'grad_norm': 5.053413391113281, 'learning_rate': 1.928888888888889e-05, 'epoch': 0.4} +{'loss': 0.289, 'grad_norm': 1.1555070877075195, 'learning_rate': 1.780740740740741e-05, 'epoch': 0.6} +{'loss': 0.2394, 'grad_norm': 9.618512153625488, 'learning_rate': 1.632592592592593e-05, 'epoch': 0.8} +{'loss': 0.2148, 'grad_norm': 1.7589733600616455, 'learning_rate': 1.4844444444444445e-05, 'epoch': 1.0} +Processing Files (3 / 3) : 100% 709M/709M [00:10<00:00, 67.9MB/s, 70.9MB/s ] +{'eval_loss': 0.22135691344738007, 'eval_precision': 0.8413559322033898, 'eval_recall': 0.7839545167403664, 'eval_f1': 0.8116415958142578, 'eval_accuracy': 0.9431575488746509, 'eval_runtime': 2.7824, 'eval_samples_per_second': 179.698, 'eval_steps_per_second': 5.75, 'epoch': 1.0} +{'loss': 0.1342, 'grad_norm': 6.0391130447387695, 'learning_rate': 1.3362962962962964e-05, 'epoch': 1.2} +{'loss': 0.1649, 'grad_norm': 4.984821319580078, 'learning_rate': 1.1881481481481482e-05, 'epoch': 1.4} +{'loss': 0.1696, 'grad_norm': 6.739714622497559, 'learning_rate': 1.04e-05, 'epoch': 1.6} +{'loss': 0.162, 'grad_norm': 3.3575751781463623, 'learning_rate': 8.91851851851852e-06, 'epoch': 1.8} +{'loss': 0.1215, 'grad_norm': 2.643911600112915, 'learning_rate': 7.437037037037038e-06, 'epoch': 2.0} +{'eval_loss': 0.15503820776939392, 'eval_precision': 0.8323773103887827, 'eval_recall': 0.8250157927984839, 'eval_f1': 0.8286802030456852, 'eval_accuracy': 0.9559717430589781, 'eval_runtime': 2.829, 'eval_samples_per_second': 176.741, 'eval_steps_per_second': 5.656, 'epoch': 2.0} +{'loss': 0.1055, 'grad_norm': 1.7310476303100586, 'learning_rate': 5.955555555555555e-06, 'epoch': 2.2} +{'loss': 0.1064, 'grad_norm': 5.77832555770874, 'learning_rate': 4.4740740740740746e-06, 'epoch': 2.4} +{'loss': 0.1093, 'grad_norm': 0.972324013710022, 'learning_rate': 2.992592592592593e-06, 'epoch': 2.6} +{'loss': 0.0858, 'grad_norm': 3.006495714187622, 'learning_rate': 1.5111111111111112e-06, 'epoch': 2.8} +{'loss': 0.0873, 'grad_norm': 0.5301690697669983, 'learning_rate': 2.9629629629629632e-08, 'epoch': 3.0} +{'eval_loss': 0.15199844539165497, 'eval_precision': 0.8580562659846548, 'eval_recall': 0.8477574226152874, 'eval_f1': 0.8528757546870035, 'eval_accuracy': 0.9608181370133071, 'eval_runtime': 2.787, 'eval_samples_per_second': 179.403, 'eval_steps_per_second': 5.741, 'epoch': 3.0} +{'train_runtime': 217.2222, 'train_samples_per_second': 55.243, 'train_steps_per_second': 3.453, 'train_loss': 0.23235161240895588, 'epoch': 3.0} +New Data Upload : 100% 709M/709M [00:10<00:00, 67.9MB/s, 70.9MB/s ] + ...lingual/training_args.bin: 100% 5.91k/5.91k [00:10', 'hub_private_repo': False, 'hub_always_push': False, 'hub_revision': None, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': 'guj-eng-code-switch-bert-multilingual', 'push_to_hub_organization': None, 'push_to_hub_token': '', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': 'no', 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'liger_kernel_config': None, 'eval_use_gather_object': False, 'average_tokens_across_devices': True} +2025-11-22 17:27:38,360 INFO MainThread:20205 [wandb_config.py:__setitem__():154] [no run ID] config set model/num_parameters = 177266693 - > +2025-11-22 17:27:38,361 INFO MainThread:20205 [wandb_run.py:_config_callback():1385] config_cb model/num_parameters 177266693 None +2025-11-22 17:32:02,106 INFO MainThread:20205 [wandb_run.py:_finish():2270] finishing run h3110fr13nd-esparkbiz-technologies-pvt/gujarati-english-code-switch/peygrszr +2025-11-22 17:32:02,108 INFO MainThread:20205 [wandb_run.py:_atexit_cleanup():2469] got exitcode: 0 +2025-11-22 17:32:02,109 INFO MainThread:20205 [wandb_run.py:_restore():2451] restore +2025-11-22 17:32:02,110 INFO MainThread:20205 [wandb_run.py:_restore():2457] restore done +2025-11-22 17:32:02,679 INFO MainThread:20205 [wandb_run.py:_footer_sync_info():3853] logging synced files diff --git a/wandb_logs/run-20251122_172103-peygrszr/run-peygrszr.wandb b/wandb_logs/run-20251122_172103-peygrszr/run-peygrszr.wandb new file mode 100644 index 0000000000000000000000000000000000000000..32d52c0d4299390aad2a28683418945a7be92b5b --- /dev/null +++ b/wandb_logs/run-20251122_172103-peygrszr/run-peygrszr.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352681e206c3b66c06d224ab504635d794f5a884f3db45709d702817b67ec7f0 +size 224181