File size: 5,334 Bytes
7812756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, Tuple

class KickstarterModel(nn.Module):
    """Kickstarter Project Success Prediction Model"""
    
    def __init__(
        self,
        desc_embedding_dim=768,
        blurb_embedding_dim=384,
        risk_embedding_dim=384,
        subcategory_embedding_dim=100,
        category_embedding_dim=15,
        country_embedding_dim=100,
        numerical_features_dim=9,
        hidden_dim=512,
        dropout_rate=0.3
    ):
        """
        Initialize the model
        
        Args:
            desc_embedding_dim: Description embedding vector dimension
            blurb_embedding_dim: Blurb embedding vector dimension
            risk_embedding_dim: Risk embedding vector dimension
            subcategory_embedding_dim: Subcategory embedding vector dimension
            category_embedding_dim: Category embedding vector dimension
            country_embedding_dim: Country embedding vector dimension
            numerical_features_dim: Numerical features dimension
            hidden_dim: Hidden layer dimension
            dropout_rate: Dropout rate
        """
        super(KickstarterModel, self).__init__()
        
        # Helper function to create feature processing layers
        def create_fc_block(input_dim, output_dim):
            return nn.Sequential(
                nn.Linear(input_dim, output_dim),
                nn.BatchNorm1d(output_dim),
                nn.ReLU(),
                nn.Dropout(dropout_rate)
            )
        
        # Feature processing layers
        self.desc_fc = create_fc_block(desc_embedding_dim, hidden_dim)
        self.blurb_fc = create_fc_block(blurb_embedding_dim, hidden_dim // 2)
        self.risk_fc = create_fc_block(risk_embedding_dim, hidden_dim // 2)
        self.subcategory_fc = create_fc_block(subcategory_embedding_dim, hidden_dim // 4)
        self.category_fc = create_fc_block(category_embedding_dim, hidden_dim // 8)
        self.country_fc = create_fc_block(country_embedding_dim, hidden_dim // 8)
        self.numerical_fc = create_fc_block(numerical_features_dim, hidden_dim // 4)
        
        # Combined features dimension
        concat_dim = (hidden_dim + 
                     hidden_dim // 2 + 
                     hidden_dim // 2 + 
                     hidden_dim // 4 + 
                     hidden_dim // 8 + 
                     hidden_dim // 8 + 
                     hidden_dim // 4)
        
        # Fully connected layers
        self.fc1 = create_fc_block(concat_dim, hidden_dim)
        self.fc2 = create_fc_block(hidden_dim, hidden_dim // 2)
        
        # Output layer
        self.output = nn.Linear(hidden_dim // 2, 1)
        
        # Input names for SHAP explanation
        self.input_names = [
            'description_embedding',
            'blurb_embedding',
            'risk_embedding',
            'subcategory_embedding',
            'category_embedding',
            'country_embedding',
            'numerical_features'
        ]
        
    def forward(self, inputs: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
        """
        Forward propagation
        
        Args:
            inputs: Dictionary containing all input features
            
        Returns:
            Prediction probability and intermediate feature representations
        """
        # Process embeddings
        desc_out = self.desc_fc(inputs['description_embedding'])
        blurb_out = self.blurb_fc(inputs['blurb_embedding'])
        risk_out = self.risk_fc(inputs['risk_embedding'])
        subcategory_out = self.subcategory_fc(inputs['subcategory_embedding'])
        category_out = self.category_fc(inputs['category_embedding'])
        country_out = self.country_fc(inputs['country_embedding'])
        numerical_out = self.numerical_fc(inputs['numerical_features'])
        
        # Concatenate all features
        combined = torch.cat([
            desc_out, 
            blurb_out, 
            risk_out, 
            subcategory_out, 
            category_out,
            country_out,
            numerical_out
        ], dim=1)
        
        # Fully connected layers
        x = self.fc1(combined)
        x = self.fc2(x)
        
        # Output layer
        logits = self.output(x)
        probs = torch.sigmoid(logits)
        
        # Store intermediate features for SHAP explanation
        intermediate_features = {
            'description_embedding': desc_out,
            'blurb_embedding': blurb_out,
            'risk_embedding': risk_out,
            'subcategory_embedding': subcategory_out,
            'category_embedding': category_out,
            'country_embedding': country_out,
            'numerical_features': numerical_out,
            'combined': combined,
            'fc1': x
        }
        
        return probs.squeeze(1), intermediate_features
    
    def predict(self, inputs: Dict[str, torch.Tensor]) -> torch.Tensor:
        """
        Prediction function
        
        Args:
            inputs: Dictionary containing all input features
            
        Returns:
            Prediction probability
        """
        self.eval()
        with torch.no_grad():
            probs, _ = self.forward(inputs)
        return probs