1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/*
 *******************************************************************************
 *
 *   Copyright (C) 1999-2003, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 */

package sun.font;

/**
 * <code>ScriptRun</code> is used to find runs of characters in
 * the same script, as defined in the <code>Script</code> class.
 * It implements a simple iterator over an array of characters.
 * The iterator will assign <code>COMMON</code> and <code>INHERITED</code>
 * characters to the same script as the preceeding characters. If the
 * COMMON and INHERITED characters are first, they will be assigned to
 * the same script as the following characters.
 *
 * The iterator will try to match paired punctuation. If it sees an
 * opening punctuation character, it will remember the script that
 * was assigned to that character, and assign the same script to the
 * matching closing punctuation.
 *
 * No attempt is made to combine related scripts into a single run. In
 * particular, Hiragana, Katakana, and Han characters will appear in seperate
 * runs.

 * Here is an example of how to iterate over script runs:
 * <pre>
 * void printScriptRuns(char[] text)
 * {
 *     ScriptRun scriptRun = new ScriptRun(text, 0, text.length);
 *
 *     while (scriptRun.next()) {
 *         int start  = scriptRun.getScriptStart();
 *         int limit  = scriptRun.getScriptLimit();
 *         int script = scriptRun.getScriptCode();
 *
 *         System.out.println("Script \"" + Script.getName(script) + "\" from " +
 *                            start + " to " + limit + ".");
 *     }
 *  }
 * </pre>
 *
 */
public final class ScriptRun
{
    private char[] text;   // fixed once set by constructor
    private int textStart;
    private int textLimit;

    private int scriptStart;     // change during iteration
    private int scriptLimit;
    private int scriptCode;

    private int stack[];         // stack used to handle paired punctuation if encountered
    private int parenSP;

    public ScriptRun() {
        // must call init later or we die.
    }

    /**
     * Construct a <code>ScriptRun</code> object which iterates over a subrange
     * of the given characetrs.
     *
     * @param chars the array of characters over which to iterate.
     * @param start the index of the first character over which to iterate
     * @param count the number of characters over which to iterate
     */
    public ScriptRun(char[] chars, int start, int count)
    {
        init(chars, start, count);
    }

    public void init(char[] chars, int start, int count)
    {
        if (chars == null || start < 0 || count < 0 || count > chars.length - start) {
            throw new IllegalArgumentException();
        }

        text = chars;
        textStart = start;
        textLimit = start + count;

        scriptStart = textStart;
        scriptLimit = textStart;
        scriptCode = Script.INVALID_CODE;
        parenSP = 0;
    }

    /**
     * Get the starting index of the current script run.
     *
     * @return the index of the first character in the current script run.
     */
    public final int getScriptStart() {
        return scriptStart;
    }

    /**
     * Get the index of the first character after the current script run.
     *
     * @return the index of the first character after the current script run.
     */
    public final int getScriptLimit() {
        return scriptLimit;
    }

    /**
     * Get the script code for the script of the current script run.
     *
     * @return the script code for the script of the current script run.
     * @see #Script
     */
    public final int getScriptCode() {
        return scriptCode;
    }

    /**
     * Find the next script run. Returns <code>false</code> if there
     * isn't another run, returns <code>true</code> if there is.
     *
     * @return <code>false</code> if there isn't another run, <code>true</code> if there is.
     */
    public final boolean next() {
        int startSP  = parenSP;  // used to find the first new open character

        // if we've fallen off the end of the text, we're done
        if (scriptLimit >= textLimit) {
            return false;
        }
    
        scriptCode  = Script.COMMON;
        scriptStart = scriptLimit;
        
        int ch;
        
        while ((ch = nextCodePoint()) != DONE) {
            int sc = ScriptRunData.getScript(ch);
            int pairIndex = sc == Script.COMMON ? getPairIndex(ch) : -1;

            // Paired character handling:
            //
            // if it's an open character, push it onto the stack.
            // if it's a close character, find the matching open on the
            // stack, and use that script code. Any non-matching open
            // characters above it on the stack will be popped.
            if (pairIndex >= 0) {
                if ((pairIndex & 1) == 0) {
                    if (stack == null) {
                        stack = new int[32];
                    } else if (parenSP == stack.length) {
                        int[] newstack = new int[stack.length + 32];
                        System.arraycopy(stack, 0, newstack, 0, stack.length);
                        stack = newstack;
                    }

                    stack[parenSP++] = pairIndex;
                    stack[parenSP++] = scriptCode;
                } else if (parenSP > 0) {
                    int pi = pairIndex & ~1;

                    while ((parenSP -= 2) >= 0 && stack[parenSP] != pi);

                    if (parenSP >= 0) {
                        sc = stack[parenSP+1];
                    } else {
                      parenSP = 0;
                    }
                    if (parenSP < startSP) {
                        startSP = parenSP;
                    }
               }
            }

            if (sameScript(scriptCode, sc)) {
                if (scriptCode <= Script.INHERITED && sc > Script.INHERITED) {
                    scriptCode = sc;

                    // now that we have a final script code, fix any open
                    // characters we pushed before we knew the script code.
                    while (startSP < parenSP) {
                        stack[startSP+1] = scriptCode;
                        startSP += 2;
                    }
                }

                // if this character is a close paired character,
        // pop it from the stack
                if (pairIndex > 0 && (pairIndex & 1) != 0 && parenSP > 0) {
                    parenSP -= 2;
                }
            } else {
                // We've just seen the first character of
                // the next run. Back over it so we'll see
                // it again the next time.
                pushback(ch);

                // we're outta here
                break; 
            }
        }

        return true;
    }

    static final int SURROGATE_START = 0x10000;
    static final int LEAD_START = 0xd800;
    static final int LEAD_LIMIT = 0xdc00;
    static final int TAIL_START = 0xdc00;
    static final int TAIL_LIMIT = 0xe000;
    static final int LEAD_SURROGATE_SHIFT = 10;
    static final int SURROGATE_OFFSET = SURROGATE_START - (LEAD_START << LEAD_SURROGATE_SHIFT) - TAIL_START;

    static final int DONE = -1;

    private final int nextCodePoint() {
        if (scriptLimit >= textLimit) {
            return DONE;
        }
        int ch = text[scriptLimit++];
        if (ch >= LEAD_START && ch < LEAD_LIMIT && scriptLimit < textLimit) {
            int nch = text[scriptLimit];
            if (nch >= TAIL_START && nch < TAIL_LIMIT) {
                ++scriptLimit;
                ch = (ch << LEAD_SURROGATE_SHIFT) + nch + SURROGATE_OFFSET;
            }
        }
        return ch;
    }

    private final void pushback(int ch) {
        if (ch >= 0) {
            if (ch >= 0x10000) {
                scriptLimit -= 2;
            } else {
                scriptLimit -= 1;
            }
        }
    }

    /**
     * Compare two script codes to see if they are in the same script. If one script is
     * a strong script, and the other is INHERITED or COMMON, it will compare equal.
     *
     * @param scriptOne one of the script codes.
     * @param scriptTwo the other script code.
     * @return <code>true</code> if the two scripts are the same.
     * @see com.ibm.icu.lang.Script
     */
    private static boolean sameScript(int scriptOne, int scriptTwo) {
        return scriptOne == scriptTwo || scriptOne <= Script.INHERITED || scriptTwo <= Script.INHERITED;
    }

    /**
     * Find the highest bit that's set in a word. Uses a binary search through
     * the bits.
     *
     * @param n the word in which to find the highest bit that's set.
     * @return the bit number (counting from the low order bit) of the highest bit.
     */
    private static final byte highBit(int n)
    {
        if (n <= 0) {
            return -32;
        }

        byte bit = 0;

        if (n >= 1 << 16) {
            n >>= 16;
            bit += 16;
        }

        if (n >= 1 << 8) {
            n >>= 8;
            bit += 8;
        }

        if (n >= 1 << 4) {
            n >>= 4;
            bit += 4;
        }

        if (n >= 1 << 2) {
            n >>= 2;
            bit += 2;
        }

        if (n >= 1 << 1) {
            n >>= 1;
            bit += 1;
        }

        return bit;
    }

    /**
     * Search the pairedChars array for the given character.
     *
     * @param ch the character for which to search.
     * @return the index of the character in the table, or -1 if it's not there.
     */
    private static int getPairIndex(int ch)
    {
        int probe = pairedCharPower;
        int index = 0;

        if (ch >= pairedChars[pairedCharExtra]) {
            index = pairedCharExtra;
        }

        while (probe > (1 << 0)) {
            probe >>= 1;

            if (ch >= pairedChars[index + probe]) {
                index += probe;
            }
        }

        if (pairedChars[index] != ch) {
            index = -1;
        }

        return index;
    }

    // all common
    private static int pairedChars[] = {
        0x0028, 0x0029, // ascii paired punctuation  // common
        0x003c, 0x003e, // common
        0x005b, 0x005d, // common
        0x007b, 0x007d, // common
        0x00ab, 0x00bb, // guillemets // common
        0x2018, 0x2019, // general punctuation // common
        0x201c, 0x201d, // common
        0x2039, 0x203a, // common
        0x3008, 0x3009, // chinese paired punctuation // common
        0x300a, 0x300b,
        0x300c, 0x300d,
        0x300e, 0x300f,
        0x3010, 0x3011,
        0x3014, 0x3015,
        0x3016, 0x3017,
        0x3018, 0x3019,
        0x301a, 0x301b
    };

    private static final int pairedCharPower = 1 << highBit(pairedChars.length);
    private static final int pairedCharExtra = pairedChars.length - pairedCharPower;

}

			
			

Browsed Source: [clear]