Spaces:

trangngiosds
/

real-estate-price-demo

Running

App Files Files Community

real-estate-price-demo / models /mm-text-no-price /config.yaml

trangngiosds

initial commit

fadd436 over 1 year ago

raw

history blame contribute delete

3.51 kB

	model:
	names:
	- categorical_transformer
	- fusion_transformer
	- hf_text
	- numerical_transformer
	hf_text:
	checkpoint_name: local://hf_text
	gradient_checkpointing: false
	pooling_mode: cls
	data_types:
	- text
	tokenizer_name: hf_auto
	max_text_len: 512
	insert_sep: true
	low_cpu_mem_usage: false
	text_segment_num: 2
	stochastic_chunk: false
	text_aug_detect_length: 10
	text_trivial_aug_maxscale: 0.0
	text_train_augment_types: null
	categorical_transformer:
	out_features: 192
	d_token: 192
	ffn_d_hidden: 192
	num_trans_blocks: 0
	num_attn_heads: 8
	residual_dropout: 0.0
	attention_dropout: 0.2
	ffn_dropout: 0.1
	normalization: layer_norm
	ffn_activation: reglu
	head_activation: relu
	data_types:
	- categorical
	additive_attention: false
	share_qv_weights: false
	numerical_transformer:
	out_features: 192
	d_token: 192
	ffn_d_hidden: 192
	num_trans_blocks: 0
	num_attn_heads: 8
	residual_dropout: 0.0
	attention_dropout: 0.2
	ffn_dropout: 0.1
	normalization: layer_norm
	ffn_activation: reglu
	head_activation: relu
	data_types:
	- numerical
	embedding_arch:
	- linear
	- relu
	merge: concat
	additive_attention: false
	share_qv_weights: false
	fusion_transformer:
	hidden_size: 192
	n_blocks: 3
	attention_n_heads: 8
	adapt_in_features: max
	attention_dropout: 0.2
	residual_dropout: 0.0
	ffn_dropout: 0.1
	ffn_d_hidden: 192
	normalization: layer_norm
	ffn_activation: geglu
	head_activation: relu
	data_types: null
	additive_attention: false
	share_qv_weights: false
	data:
	image:
	missing_value_strategy: zero
	text:
	normalize_text: false
	categorical:
	minimum_cat_count: 100
	maximum_num_cat: 20
	convert_to_text: false
	numerical:
	convert_to_text: false
	scaler_with_mean: true
	scaler_with_std: true
	document:
	missing_value_strategy: zero
	label:
	numerical_label_preprocessing: standardscaler
	pos_label: null
	mixup:
	turn_on: false
	mixup_alpha: 0.8
	cutmix_alpha: 1.0
	cutmix_minmax: null
	prob: 1.0
	switch_prob: 0.5
	mode: batch
	turn_off_epoch: 5
	label_smoothing: 0.1
	templates:
	turn_on: false
	num_templates: 30
	template_length: 2048
	preset_templates:
	- super_glue
	- rte
	custom_templates: null
	optimization:
	optim_type: adamw
	learning_rate: 0.0001
	weight_decay: 0.001
	lr_choice: layerwise_decay
	lr_decay: 0.9
	lr_schedule: cosine_decay
	max_epochs: 10
	max_steps: -1
	warmup_steps: 0.1
	end_lr: 0
	lr_mult: 1
	patience: 10
	val_check_interval: 0.5
	check_val_every_n_epoch: 1
	skip_final_val: false
	gradient_clip_val: 1
	gradient_clip_algorithm: norm
	track_grad_norm: -1
	log_every_n_steps: 10
	top_k: 3
	top_k_average_method: greedy_soup
	efficient_finetune: null
	lora:
	module_filter: null
	filter:
	- query
	- value
	- ^q$
	- ^v$
	- ^k$
	- ^o$
	r: 8
	alpha: 8
	loss_function: auto
	focal_loss:
	alpha: null
	gamma: 2.0
	reduction: mean
	env:
	num_gpus: 1
	num_nodes: 1
	batch_size: 32
	per_gpu_batch_size: 8
	eval_batch_size_ratio: 4
	per_gpu_batch_size_evaluation: null
	precision: 16
	num_workers: 2
	num_workers_evaluation: 2
	fast_dev_run: false
	deterministic: false
	auto_select_gpus: true
	strategy: null
	deepspeed_allgather_size: 1000000000.0
	deepspeed_allreduce_size: 1000000000.0