Softmax回归的从零实现

https://zh.d2l.ai/chapter_linear-networks/softmax-regression-scratch.html

# 练习 1
print(tf.math.exp(50))
     
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Cell In[37], line 2
      1 # 练习 1
----> 2 print(tf.math.exp(50))

File /usr/local/lib/python3.11/dist-packages/tensorflow/python/ops/weak_tensor_ops.py:88, in weak_tensor_unary_op_wrapper.<locals>.wrapper(*args, **kwargs)
     86 def wrapper(*args, **kwargs):
     87   if not ops.is_auto_dtype_conversion_enabled():
---> 88     return op(*args, **kwargs)
     89   bound_arguments = signature.bind(*args, **kwargs)
     90   bound_arguments.apply_defaults()

File /usr/local/lib/python3.11/dist-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File /usr/local/lib/python3.11/dist-packages/tensorflow/python/framework/ops.py:6002, in raise_from_not_ok_status(e, name)
   6000 def raise_from_not_ok_status(e, name) -> NoReturn:
   6001   e.message += (" name: " + str(name if name is not None else ""))
-> 6002   raise core._status_to_exception(e) from None

InvalidArgumentError: Value for attr 'T' of int32 is not in the list of allowed values: bfloat16, half, float, double, complex64, complex128
	; NodeDef: {{node Exp}}; Op<name=Exp; signature=x:T -> y:T; attr=T:type,allowed=[DT_BFLOAT16, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128]> [Op:Exp] name: 
# 练习 2
print(-tf.math.log(0))
     
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Cell In[38], line 2
      1 # 练习 2
----> 2 print(-tf.math.log(0))

File /usr/local/lib/python3.11/dist-packages/tensorflow/python/ops/weak_tensor_ops.py:88, in weak_tensor_unary_op_wrapper.<locals>.wrapper(*args, **kwargs)
     86 def wrapper(*args, **kwargs):
     87   if not ops.is_auto_dtype_conversion_enabled():
---> 88     return op(*args, **kwargs)
     89   bound_arguments = signature.bind(*args, **kwargs)
     90   bound_arguments.apply_defaults()

File /usr/local/lib/python3.11/dist-packages/tensorflow/python/ops/gen_math_ops.py:5681, in log(x, name)
   5679   return _result
   5680 except _core._NotOkStatusException as e:
-> 5681   _ops.raise_from_not_ok_status(e, name)
   5682 except _core._FallbackException:
   5683   pass

File /usr/local/lib/python3.11/dist-packages/tensorflow/python/framework/ops.py:6002, in raise_from_not_ok_status(e, name)
   6000 def raise_from_not_ok_status(e, name) -> NoReturn:
   6001   e.message += (" name: " + str(name if name is not None else ""))
-> 6002   raise core._status_to_exception(e) from None

InvalidArgumentError: Value for attr 'T' of int32 is not in the list of allowed values: bfloat16, half, float, double, complex64, complex128
	; NodeDef: {{node Log}}; Op<name=Log; signature=x:T -> y:T; attr=T:type,allowed=[DT_BFLOAT16, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_COMPLEX128]> [Op:Log] name: 
# 练习 3
def cross_entropy(y_hat, y):
    # 对预测概率进行数值裁剪,避免对数计算时出现数值不稳定问题
    y_hat = tf.clip_by_value(y_hat, 1e-10, 1.0)
    # 获取每个样本对应真实标签的索引
    indices = tf.stack([tf.range(y.shape[0]), y], axis=1)
    # 根据索引从预测概率张量中提取相应的元素
    y_hat_selected = tf.gather_nd(y_hat, indices)
    # 计算交叉熵损失
    return -tf.math.log(y_hat_selected)

cross_entropy(y_hat, y)
     
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.3025851, 0.6931472], dtype=float32)>

练习 4

并不是,不同场景下可能会对分类设置不同的阈值。比如要判断一个人是否需要进一步接受检查,则可能对于某种病源检出的概率不足50%时,就非常有必要进行进一步检查。我们可能要根据这个概率给出“疑病”的诊断,而非“大概率不患病”。

练习 5

当单词数量过多时,one-hot 编码会变得非常稀疏,导致模型难以学习,并消耗大量内存。此时可能需要借助其他方法,如词嵌入(word embedding)来解决这个问题。