diff dmd/doc.c @ 1587:def7a1d494fd

Merge DMD 1.051
author Christian Kamm <kamm incasoftware de>
date Fri, 06 Nov 2009 23:58:01 +0100
parents eeb8b95ea92e
children 207a8a438dea
line wrap: on
line diff
--- a/dmd/doc.c	Fri Nov 06 21:51:41 2009 +0100
+++ b/dmd/doc.c	Fri Nov 06 23:58:01 2009 +0100
@@ -1,6 +1,6 @@
 
 // Compiler implementation of the D programming language
-// Copyright (c) 1999-2008 by Digital Mars
+// Copyright (c) 1999-2009 by Digital Mars
 // All Rights Reserved
 // written by Walter Bright
 // http://www.digitalmars.com
@@ -33,6 +33,7 @@
 #include "hdrgen.h"
 #include "doc.h"
 #include "mtype.h"
+#include "utf.h"
 
 struct Escape
 {
@@ -89,14 +90,18 @@
 int icmp(const char *stringz, void *s, size_t slen);
 int isDitto(unsigned char *comment);
 unsigned char *skipwhitespace(unsigned char *p);
-unsigned skiptoident(OutBuffer *buf, unsigned i);
-unsigned skippastident(OutBuffer *buf, unsigned i);
-unsigned skippastURL(OutBuffer *buf, unsigned i);
+unsigned skiptoident(OutBuffer *buf, size_t i);
+unsigned skippastident(OutBuffer *buf, size_t i);
+unsigned skippastURL(OutBuffer *buf, size_t i);
 void highlightText(Scope *sc, Dsymbol *s, OutBuffer *buf, unsigned offset);
 void highlightCode(Scope *sc, Dsymbol *s, OutBuffer *buf, unsigned offset);
 void highlightCode2(Scope *sc, Dsymbol *s, OutBuffer *buf, unsigned offset);
 Argument *isFunctionParameter(Dsymbol *s, unsigned char *p, unsigned len);
 
+int isIdStart(unsigned char *p);
+int isIdTail(unsigned char *p);
+int utfStride(unsigned char *p);
+
 static unsigned char ddoc_default[] = "\
 DDOC =	<html><head>\n\
 	<META http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n\
@@ -238,7 +243,7 @@
     // Generate predefined macros
 
     // Set the title to be the name of the module
-    {	char *p = toPrettyChars();
+    {	const char *p = toPrettyChars();
 	Macro::define(&macrotable, (unsigned char *)"TITLE", 5, (unsigned char *)p, strlen(p));
     }
 
@@ -674,7 +679,7 @@
 	    buf->writestring("const ");
 #if DMDV2
 	if (d->isInvariant())
-	    buf->writestring("invariant ");
+	    buf->writestring("immutable ");
 #endif
 	if (d->isFinal())
 	    buf->writestring("final ");
@@ -948,7 +953,6 @@
 {   unsigned char *p;
     unsigned char *pstart;
     unsigned char *pend;
-    unsigned char *q;
     unsigned char *idstart;
     unsigned idlen;
 
@@ -963,17 +967,32 @@
 	pstart = p;
 
 	/* Find end of section, which is ended by one of:
-	 *	'identifier:'
+	 *	'identifier:' (but not inside a code section)
 	 *	'\0'
 	 */
 	idlen = 0;
+	int inCode = 0;
 	while (1)
 	{
-	    if (isalpha(*p) || *p == '_')
+	    // Check for start/end of a code section
+	    if (*p == '-')
 	    {
-		q = p + 1;
-		while (isalnum(*q) || *q == '_')
-		    q++;
+		int numdash = 0;
+		while (*p == '-')
+		{
+		    ++numdash;
+		    p++;
+		}
+		// BUG: handle UTF PS and LS too
+		if (!*p || *p == '\r' || *p == '\n' && numdash >= 3)
+		    inCode ^= 1;
+	    }
+
+	    if (!inCode && isIdStart(p))
+	    {
+		unsigned char *q = p + utfStride(p);
+		while (isIdTail(q))
+		    q += utfStride(q);
 		if (*q == ':')	// identifier: ends it
 		{   idlen = q - p;
 		    idstart = p;
@@ -1141,12 +1160,13 @@
     while (p < pend)
     {
 	// Skip to start of macro
-	for (; 1; p++)
+	while (1)
 	{
 	    switch (*p)
 	    {
 		case ' ':
 		case '\t':
+		    p++;
 		    continue;
 
 		case '\n':
@@ -1154,20 +1174,18 @@
 		    goto Lcont;
 
 		default:
-		    if (!(isalpha(*p) || *p == '_'))
-		    {
-			if (namelen)
-			    goto Ltext;		// continuation of prev macro
-			goto Lskipline;
-		    }
-		    break;
+		    if (isIdStart(p))
+			break;
+		    if (namelen)
+			goto Ltext;		// continuation of prev macro
+		    goto Lskipline;
 	    }
 	    break;
 	}
 	tempstart = p;
 
-	while (isalnum(*p) || *p == '_')
-	    p++;
+	while (isIdTail(p))
+	    p += utfStride(p);
 	templen = p - tempstart;
 
 	while (*p == ' ' || *p == '\t')
@@ -1269,7 +1287,7 @@
     while (p < pend)
     {
 	// Skip to start of macro
-	for (; 1; p++)
+	while (1)
 	{
 	    if (p >= pend)
 		goto Ldone;
@@ -1277,6 +1295,7 @@
 	    {
 		case ' ':
 		case '\t':
+		    p++;
 		    continue;
 
 		case '\n':
@@ -1284,13 +1303,11 @@
 		    goto Lcont;
 
 		default:
-		    if (!(isalpha(*p) || *p == '_'))
-		    {
-			if (namelen)
-			    goto Ltext;		// continuation of prev macro
-			goto Lskipline;
-		    }
-		    break;
+		    if (isIdStart(p))
+			break;
+		    if (namelen)
+			goto Ltext;		// continuation of prev macro
+		    goto Lskipline;
 	    }
 	    break;
 	}
@@ -1300,9 +1317,9 @@
 	{
 	    if (p >= pend)
 		goto Ldone;
-	    if (!(isalnum(*p) || *p == '_'))
+	    if (!isIdTail(p))
 		break;
-	    p++;
+	    p += utfStride(p);
 	}
 	templen = p - tempstart;
 
@@ -1486,16 +1503,25 @@
  *	end of buf
  */
 
-unsigned skiptoident(OutBuffer *buf, unsigned i)
+unsigned skiptoident(OutBuffer *buf, size_t i)
 {
-    for (; i < buf->offset; i++)
-    {
-	// BUG: handle unicode alpha's
-	unsigned char c = buf->data[i];
-	if (isalpha(c) || c == '_')
+    while (i < buf->offset)
+    {	dchar_t c;
+
+	size_t oi = i;
+	if (utf_decodeChar((unsigned char *)buf->data, buf->offset, &i, &c))
+	    /* Ignore UTF errors, but still consume input
+	     */
 	    break;
-	if (c == '\n')
-	    break;
+	if (c >= 0x80)
+	{
+	    if (!isUniAlpha(c))
+		continue;
+	}
+	else if (!(isalpha(c) || c == '_' || c == '\n'))
+	    continue;
+	i = oi;
+	break;
     }
     return i;
 }
@@ -1504,14 +1530,25 @@
  * Scan forward past end of identifier.
  */
 
-unsigned skippastident(OutBuffer *buf, unsigned i)
+unsigned skippastident(OutBuffer *buf, size_t i)
 {
-    for (; i < buf->offset; i++)
-    {
-	// BUG: handle unicode alpha's
-	unsigned char c = buf->data[i];
-	if (!(isalnum(c) || c == '_'))
+    while (i < buf->offset)
+    {	dchar_t c;
+
+	size_t oi = i;
+	if (utf_decodeChar((unsigned char *)buf->data, buf->offset, &i, &c))
+	    /* Ignore UTF errors, but still consume input
+	     */
 	    break;
+	if (c >= 0x80)
+	{
+	    if (isUniAlpha(c))
+		continue;
+	}
+	else if (isalnum(c) || c == '_')
+	    continue;
+	i = oi;
+	break;
     }
     return i;
 }
@@ -1525,7 +1562,7 @@
  *	index just past it if it is a URL
  */
 
-unsigned skippastURL(OutBuffer *buf, unsigned i)
+unsigned skippastURL(OutBuffer *buf, size_t i)
 {   unsigned length = buf->offset - i;
     unsigned char *p = &buf->data[i];
     unsigned j;
@@ -1810,7 +1847,7 @@
 
 	    default:
 		leadingBlank = 0;
-		if (sc && !inCode && (isalpha(c) || c == '_'))
+		if (sc && !inCode && isIdStart(&buf->data[i]))
 		{   unsigned j;
 
 		    j = skippastident(buf, i);
@@ -1881,7 +1918,7 @@
 	    i = buf->insert(i, se, len);
 	    i--;		// point to ';'
 	}
-	else if (isalpha(c) || c == '_')
+	else if (isIdStart(&buf->data[i]))
 	{   unsigned j;
 
 	    j = skippastident(buf, i);
@@ -2017,3 +2054,54 @@
     return s;
 }
 
+/****************************************
+ * Determine if p points to the start of an identifier.
+ */
+
+int isIdStart(unsigned char *p)
+{
+    unsigned c = *p;
+    if (isalpha(c) || c == '_')
+	return 1;
+    if (c >= 0x80)
+    {	size_t i = 0;
+	if (utf_decodeChar(p, 4, &i, &c))
+	    return 0;	// ignore errors
+	if (isUniAlpha(c))
+	    return 1;
+    }
+    return 0;
+}
+
+/****************************************
+ * Determine if p points to the rest of an identifier.
+ */
+
+int isIdTail(unsigned char *p)
+{
+    unsigned c = *p;
+    if (isalnum(c) || c == '_')
+	return 1;
+    if (c >= 0x80)
+    {	size_t i = 0;
+	if (utf_decodeChar(p, 4, &i, &c))
+	    return 0;	// ignore errors
+	if (isUniAlpha(c))
+	    return 1;
+    }
+    return 0;
+}
+
+/*****************************************
+ * Return number of bytes in UTF character.
+ */
+
+int utfStride(unsigned char *p)
+{
+    unsigned c = *p;
+    if (c < 0x80)
+	return 1;
+    size_t i = 0;
+    utf_decodeChar(p, 4, &i, &c);	// ignore errors, but still consume input
+    return i;
+}