File size: 9,709 Bytes
e9e7e23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
#!/usr/bin/env python3
"""
Example usage of the quantized ONNX LazarusNLP IndoBERT model.
Demonstrates basic inference, batch processing, and similarity computation.
"""

import onnxruntime as ort
from transformers import AutoTokenizer
import numpy as np
import time
from sklearn.metrics.pairwise import cosine_similarity

def load_model(model_path="./"):
    """Load the quantized ONNX model and tokenizer."""
    print("Loading quantized ONNX model...")
    
    # Load ONNX session
    session = ort.InferenceSession(f"{model_path}/model.onnx")
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    print(f"βœ“ Model loaded successfully")
    print(f"βœ“ Tokenizer max length: {tokenizer.model_max_length}")
    
    return session, tokenizer

def get_embeddings(session, tokenizer, texts, pool_strategy="mean"):
    """
    Get embeddings for texts using the ONNX model.
    
    Args:
        session: ONNX inference session
        tokenizer: HuggingFace tokenizer
        texts: List of texts or single text
        pool_strategy: Pooling strategy ('mean', 'cls', 'max')
    
    Returns:
        numpy array of embeddings
    """
    if isinstance(texts, str):
        texts = [texts]
    
    # Tokenize
    inputs = tokenizer(texts, return_tensors="np", padding=True, truncation=True)
    
    # Run inference
    outputs = session.run(None, {
        'input_ids': inputs['input_ids'],
        'attention_mask': inputs['attention_mask']
    })
    
    # Extract embeddings
    hidden_states = outputs[0]  # Shape: [batch_size, seq_len, hidden_size]
    attention_mask = inputs['attention_mask']
    
    if pool_strategy == "mean":
        # Mean pooling with attention mask
        mask_expanded = np.expand_dims(attention_mask, axis=-1)
        masked_embeddings = hidden_states * mask_expanded
        sum_embeddings = np.sum(masked_embeddings, axis=1)
        sum_mask = np.sum(mask_expanded, axis=1)
        embeddings = sum_embeddings / np.maximum(sum_mask, 1e-9)
    elif pool_strategy == "cls":
        # Use [CLS] token embedding
        embeddings = hidden_states[:, 0, :]
    elif pool_strategy == "max":
        # Max pooling
        embeddings = np.max(hidden_states, axis=1)
    else:
        raise ValueError(f"Unknown pooling strategy: {pool_strategy}")
    
    return embeddings

def example_basic_usage():
    """Basic usage example."""
    print("\n" + "="*50)
    print("BASIC USAGE EXAMPLE")
    print("="*50)
    
    # Load model
    session, tokenizer = load_model()
    
    # Single text processing
    text = "Teknologi kecerdasan buatan berkembang sangat pesat di Indonesia."
    
    start_time = time.time()
    embeddings = get_embeddings(session, tokenizer, text)
    inference_time = time.time() - start_time
    
    print(f"Input text: {text}")
    print(f"Embedding shape: {embeddings.shape}")
    print(f"Inference time: {inference_time:.4f}s")
    print(f"Sample embedding values: {embeddings[0][:5]}")

def example_batch_processing():
    """Batch processing example."""
    print("\n" + "="*50)
    print("BATCH PROCESSING EXAMPLE")
    print("="*50)
    
    # Load model
    session, tokenizer = load_model()
    
    # Multiple texts
    texts = [
        "Saya suka makan nasi gudeg.",
        "Artificial intelligence adalah teknologi masa depan.",
        "Indonesia memiliki kebudayaan yang sangat beragam.",
        "Machine learning membantu menganalisis data besar.",
        "Pantai Bali sangat indah untuk berlibur."
    ]
    
    print(f"Processing {len(texts)} texts...")
    
    start_time = time.time()
    embeddings = get_embeddings(session, tokenizer, texts)
    batch_time = time.time() - start_time
    
    print(f"Batch embedding shape: {embeddings.shape}")
    print(f"Batch processing time: {batch_time:.4f}s")
    print(f"Average time per text: {batch_time/len(texts):.4f}s")
    
    return embeddings, texts

def example_similarity_search():
    """Similarity search example."""
    print("\n" + "="*50)
    print("SIMILARITY SEARCH EXAMPLE")
    print("="*50)
    
    # Load model
    session, tokenizer = load_model()
    
    # Documents for similarity search
    documents = [
        "AI dan machine learning mengubah cara kerja industri teknologi.",
        "Kecerdasan buatan membantu otomatisasi proses bisnis modern.", 
        "Nasi rendang adalah makanan tradisional Indonesia yang lezat.",
        "Kuliner Indonesia memiliki cita rasa yang unik dan beragam.",
        "Deep learning adalah subset dari machine learning yang powerful.",
        "Pantai Lombok menawarkan pemandangan yang menakjubkan.",
    ]
    
    query = "Teknologi AI untuk bisnis"
    
    print(f"Query: {query}")
    print(f"Searching in {len(documents)} documents...")
    
    # Get embeddings
    query_embedding = get_embeddings(session, tokenizer, query)
    doc_embeddings = get_embeddings(session, tokenizer, documents)
    
    # Calculate similarities
    similarities = cosine_similarity(query_embedding, doc_embeddings)[0]
    
    # Sort by similarity
    ranked_docs = sorted(zip(documents, similarities), key=lambda x: x[1], reverse=True)
    
    print("\nTop 3 most similar documents:")
    for i, (doc, sim) in enumerate(ranked_docs[:3]):
        print(f"{i+1}. Similarity: {sim:.4f}")
        print(f"   Document: {doc}")

def example_long_text_processing():
    """Long text processing example."""
    print("\n" + "="*50)
    print("LONG TEXT PROCESSING EXAMPLE")
    print("="*50)
    
    # Load model
    session, tokenizer = load_model()
    
    # Create long text
    long_text = """
    Perkembangan teknologi artificial intelligence di Indonesia menunjukkan tren yang sangat positif 
    dengan banyaknya startup dan perusahaan teknologi yang mulai mengadopsi solusi berbasis AI untuk 
    meningkatkan efisiensi operasional, customer experience, dan inovasi produk. Industri fintech, 
    e-commerce, dan healthcare menjadi sektor yang paling aktif dalam implementasi AI. Pemerintah 
    Indonesia juga mendukung ekosistem AI melalui berbagai program dan kebijakan yang mendorong 
    transformasi digital. Universitas dan institusi penelitian berkontribusi dalam pengembangan 
    talenta AI berkualitas. Tantangan yang dihadapi meliputi ketersediaan data berkualitas, 
    infrastruktur teknologi, dan regulasi yang mendukung inovasi namun tetap melindungi privasi 
    dan keamanan data. Kolaborasi antara pemerintah, industri, dan akademisi menjadi kunci sukses 
    pengembangan AI di Indonesia untuk mencapai visi Indonesia 2045 sebagai negara maju.
    """
    
    print(f"Processing long text ({len(long_text)} characters)...")
    
    # Process with different pooling strategies
    strategies = ["mean", "cls", "max"]
    
    for strategy in strategies:
        start_time = time.time()
        embeddings = get_embeddings(session, tokenizer, long_text.strip(), pool_strategy=strategy)
        process_time = time.time() - start_time
        
        print(f"Pooling: {strategy:4s} | Shape: {embeddings.shape} | Time: {process_time:.4f}s")

def example_performance_benchmark():
    """Performance benchmark example."""
    print("\n" + "="*50)
    print("PERFORMANCE BENCHMARK")
    print("="*50)
    
    # Load model
    session, tokenizer = load_model()
    
    # Test texts of different lengths
    test_cases = [
        ("Short", "Halo dunia!"),
        ("Medium", "Teknologi AI berkembang sangat pesat dan mengubah berbagai industri di seluruh dunia."),
        ("Long", " ".join(["Kalimat panjang dengan banyak kata untuk menguji performa model."] * 20))
    ]
    
    print("Benchmarking different text lengths...")
    
    for name, text in test_cases:
        times = []
        
        # Warm up
        get_embeddings(session, tokenizer, text)
        
        # Benchmark
        for _ in range(10):
            start_time = time.time()
            embeddings = get_embeddings(session, tokenizer, text)
            times.append(time.time() - start_time)
        
        avg_time = np.mean(times)
        std_time = np.std(times)
        token_count = len(tokenizer.encode(text))
        
        print(f"{name:6s} ({token_count:3d} tokens): {avg_time:.4f}s Β± {std_time:.4f}s")

def validate_model():
    """Validate model functionality."""
    print("\n" + "="*50)
    print("MODEL VALIDATION")
    print("="*50)
    
    try:
        # Load model
        session, tokenizer = load_model()
        
        # Test basic functionality
        test_text = "Tes validasi model ONNX."
        embeddings = get_embeddings(session, tokenizer, test_text)
        
        # Validation checks
        assert embeddings.shape[0] == 1, "Batch size should be 1"
        assert embeddings.shape[1] == 768, "Hidden size should be 768"
        assert not np.isnan(embeddings).any(), "No NaN values allowed"
        assert not np.isinf(embeddings).any(), "No Inf values allowed"
        
        print("βœ… Model validation passed!")
        print(f"βœ… Output shape: {embeddings.shape}")
        print(f"βœ… Output range: [{embeddings.min():.4f}, {embeddings.max():.4f}]")
        
    except Exception as e:
        print(f"❌ Model validation failed: {e}")
        raise

def main():
    """Run all examples."""
    print("πŸš€ LazarusNLP IndoBERT ONNX - Example Usage")
    
    # Validate model first
    validate_model()
    
    # Run examples
    example_basic_usage()
    example_batch_processing()
    example_similarity_search()
    example_long_text_processing()
    example_performance_benchmark()
    
    print("\n" + "="*50)
    print("πŸŽ‰ All examples completed successfully!")
    print("="*50)

if __name__ == "__main__":
    main()