1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
import numpy as np
from scipy.optimize import minimize
import pandas as pd
# ============ 第1部分:生成模拟数据 ============
np.random.seed(42)
# 真实的参数(我们最后要估计这些)
theta_true = {
'alpha': -0.1, # 价格系数
'beta_X': 0.5, # 特征系数
'xi': np.array([0.3, -0.2, 0.1]) # 三个产品的不可观测特征
}
# 产品特征
X = np.array([
[2.0], # 产品1的特征值
[1.5], # 产品2的特征值
[2.5] # 产品3的特征值
])
# 产品价格
P = np.array([20, 25, 22])
# 生成数据:1000个消费者的选择
n_consumers = 1000
J = 3 # 3个产品
# 计算真实的市场份额
def compute_market_share(P, X, theta):
"""给定参数,计算市场份额"""
alpha = theta['alpha']
beta_X = theta['beta_X']
xi = theta['xi']
V = alpha * np.log(P) + beta_X * X.flatten() + xi
# Logit概率
exp_V = np.exp(V - np.max(V)) # 数值稳定性
share = exp_V / np.sum(exp_V)
return share
# 计算真实市场份额
true_share = compute_market_share(P, X, theta_true)
print(f"真实市场份额:{true_share}")
# 输出:[0.408, 0.287, 0.305]
# 模拟消费者的选择
choices = np.random.choice(J, n_consumers, p=true_share)
observed_share = np.array([np.sum(choices == j) for j in range(J)]) / n_consumers
print(f"观测市场份额:{observed_share}")
# 输出:[0.41, 0.28, 0.31](接近真实值)
# ============ 第2部分:参数估计 ============
def loglik_individual(theta, choices, P, X):
"""计算个体选择的对数似然"""
alpha = theta[0]
beta_X = theta[1]
xi = theta[2:]
V = alpha * np.log(P) + beta_X * X.flatten() + xi
# Logit概率
exp_V = np.exp(V - np.max(V))
prob = exp_V / np.sum(exp_V)
# 对数似然
ll = 0
for choice in choices:
ll += np.log(prob[choice])
return -ll # 最小化负对数似然
# 初始值
theta_init = np.array([-0.1, 0.5, 0.3, -0.2, 0.1])
# 优化
result = minimize(loglik_individual, theta_init,
args=(choices, P, X),
method='BFGS')
theta_est = result.x
print(f"估计的参数:{theta_est}")
print(f"真实参数:{np.array([-0.1, 0.5, 0.3, -0.2, 0.1])}")
# ============ 第3部分:验证映射 ============
print("\n验证参数空间到数据空间的映射:")
# 用估计的参数预测市场份额
alpha_est = theta_est[0]
beta_est = theta_est[1]
xi_est = theta_est[2:]
V_pred = alpha_est * np.log(P) + beta_est * X.flatten() + xi_est
exp_V = np.exp(V_pred - np.max(V_pred))
pred_share = exp_V / np.sum(exp_V)
print(f"观测市场份额:{observed_share}")
print(f"预测市场份额:{pred_share}")
print(f"误差:{np.abs(observed_share - pred_share)}")
# ============ 第4部分:反演不可观测特征 ============
print("\n反演不可观测特征:")
# 从市场份额反演δ
delta = np.log(observed_share)
# 减去观测特征的贡献
V_obs = alpha_est * np.log(P) + beta_est * X.flatten()
# 反演ξ
xi_inferred = delta - V_obs
print(f"真实ξ:{theta_true['xi']}")
print(f"推断ξ:{xi_inferred}")
# ============ 第5部分:反事实分析 ============
print("\n反事实:如果产品1降价10%")
P_new = P.copy()
P_new[0] = P[0] * 0.9 # 产品1降价10%
# 新的市场份额
V_new = alpha_est * np.log(P_new) + beta_est * X.flatten() + xi_inferred
exp_V_new = np.exp(V_new - np.max(V_new))
new_share = exp_V_new / np.sum(exp_V_new)
print(f"原市场份额:{pred_share}")
print(f"降价后市场份额:{new_share}")
print(f"产品1的份额变化:{(new_share[0]-pred_share[0])*100:.1f}%")
print(f"产品2的份额变化:{(new_share[1]-pred_share[1])*100:.1f}%")
print(f"产品3的份额变化:{{(new_share[2]-pred_share[2])*100:.1f}%")
|