projects/ldc: dmd/doc.c comparison

comparison dmd/doc.c @ 1587:def7a1d494fd

Merge DMD 1.051

author	Christian Kamm <kamm incasoftware de>
date	Fri, 06 Nov 2009 23:58:01 +0100
parents	eeb8b95ea92e
children	207a8a438dea

comparison

equal deleted inserted replaced

-:7f728c52e63c
+:def7a1d494fd
 // Compiler implementation of the D programming language
-// Copyright (c) 1999-2008 by Digital Mars
+// Copyright (c) 1999-2009 by Digital Mars
 // All Rights Reserved
 // written by Walter Bright
 // http://www.digitalmars.com
 // License for redistribution is by either the Artistic License
 // in artistic.txt, or the GNU General Public License in gnu.txt.
 #include "module.h"
 #include "scope.h"
 #include "hdrgen.h"
 #include "doc.h"
 #include "mtype.h"
+#include "utf.h"
 struct Escape
 {
 const char *strings[256];
 int cmp(const char *stringz, void *s, size_t slen);
 int icmp(const char *stringz, void *s, size_t slen);
 int isDitto(unsigned char *comment);
 unsigned char *skipwhitespace(unsigned char *p);
-unsigned skiptoident(OutBuffer *buf, unsigned i);
+unsigned skiptoident(OutBuffer *buf, size_t i);
-unsigned skippastident(OutBuffer *buf, unsigned i);
+unsigned skippastident(OutBuffer *buf, size_t i);
-unsigned skippastURL(OutBuffer *buf, unsigned i);
+unsigned skippastURL(OutBuffer *buf, size_t i);
 void highlightText(Scope *sc, Dsymbol *s, OutBuffer *buf, unsigned offset);
 void highlightCode(Scope *sc, Dsymbol *s, OutBuffer *buf, unsigned offset);
 void highlightCode2(Scope *sc, Dsymbol *s, OutBuffer *buf, unsigned offset);
 Argument *isFunctionParameter(Dsymbol *s, unsigned char *p, unsigned len);
+int isIdStart(unsigned char *p);
+int isIdTail(unsigned char *p);
+int utfStride(unsigned char *p);
 static unsigned char ddoc_default[] = "\
 DDOC =	<html><head>\n\
 	<META http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n\
 	<title>$(TITLE)</title>\n\
 dc->pescapetable = &escapetable;
 // Generate predefined macros
 // Set the title to be the name of the module
-{	char *p = toPrettyChars();
+{	const char *p = toPrettyChars();
 	Macro::define(&macrotable, (unsigned char *)"TITLE", 5, (unsigned char *)p, strlen(p));
 }
 time_t t;
 time(&t);
 	    buf->writestring("static ");
 	if (d->isConst())
 	    buf->writestring("const ");
 #if DMDV2
 	if (d->isInvariant())
-	    buf->writestring("invariant ");
+	    buf->writestring("immutable ");
 #endif
 	if (d->isFinal())
 	    buf->writestring("final ");
 	if (d->isSynchronized())
 	    buf->writestring("synchronized ");
 void DocComment::parseSections(unsigned char *comment)
 {   unsigned char *p;
 unsigned char *pstart;
 unsigned char *pend;
-unsigned char *q;
 unsigned char *idstart;
 unsigned idlen;
 unsigned char *name = NULL;
 unsigned namelen = 0;
 {
 	p = skipwhitespace(p);
 	pstart = p;
 	/* Find end of section, which is ended by one of:
-	 *	'identifier:'
+	 *	'identifier:' (but not inside a code section)
 	 *	'\0'
 	 */
 	idlen = 0;
+	int inCode = 0;
 	while (1)
 	{
-	    if (isalpha(*p) || *p == '_')
+	    // Check for start/end of a code section
+	    if (*p == '-')
 	    {
-		q = p + 1;
+		int numdash = 0;
-		while (isalnum(*q) || *q == '_')
+		while (*p == '-')
-		    q++;
+		{
+		    ++numdash;
+		    p++;
+		}
+		// BUG: handle UTF PS and LS too
+		if (!*p || *p == '\r' || *p == '\n' && numdash >= 3)
+		    inCode ^= 1;
+	    }
+	    if (!inCode && isIdStart(p))
+	    {
+		unsigned char *q = p + utfStride(p);
+		while (isIdTail(q))
+		    q += utfStride(q);
 		if (*q == ':')	// identifier: ends it
 		{   idlen = q - p;
 		    idstart = p;
 		    for (pend = p; pend > pstart; pend--)
 		    {	if (pend[-1] == '\n')
 buf->writestring("$(DDOC_PARAMS \n");
 while (p < pend)
 {
 	// Skip to start of macro
-	for (; 1; p++)
+	while (1)
 	{
 	    switch (*p)
 	    {
 		case ' ':
 		case '\t':
+		    p++;
 		    continue;
 		case '\n':
 		    p++;
 		    goto Lcont;
 		default:
-		    if (!(isalpha(*p) || *p == '_'))
+		    if (isIdStart(p))
-		    {
+			break;
-			if (namelen)
+		    if (namelen)
-			    goto Ltext;		// continuation of prev macro
+			goto Ltext;		// continuation of prev macro
-			goto Lskipline;
+		    goto Lskipline;
-		    }
-		    break;
 	    }
 	    break;
 	}
 	tempstart = p;
-	while (isalnum(*p) || *p == '_')
+	while (isIdTail(p))
-	    p++;
+	    p += utfStride(p);
 	templen = p - tempstart;
 	while (*p == ' ' || *p == '\t')
 	    p++;
 unsigned textlen;
 while (p < pend)
 {
 	// Skip to start of macro
-	for (; 1; p++)
+	while (1)
 	{
 	    if (p >= pend)
 		goto Ldone;
 	    switch (*p)
 	    {
 		case ' ':
 		case '\t':
+		    p++;
 		    continue;
 		case '\n':
 		    p++;
 		    goto Lcont;
 		default:
-		    if (!(isalpha(*p) || *p == '_'))
+		    if (isIdStart(p))
-		    {
+			break;
-			if (namelen)
+		    if (namelen)
-			    goto Ltext;		// continuation of prev macro
+			goto Ltext;		// continuation of prev macro
-			goto Lskipline;
+		    goto Lskipline;
-		    }
-		    break;
 	    }
 	    break;
 	}
 	tempstart = p;
 	while (1)
 	{
 	    if (p >= pend)
 		goto Ldone;
-	    if (!(isalnum(*p) || *p == '_'))
+	    if (!isIdTail(p))
 		break;
-	    p++;
+	    p += utfStride(p);
 	}
 	templen = p - tempstart;
 	while (1)
 	{
 *	start of identifier
 *	beginning of next line
 *	end of buf
 */
-unsigned skiptoident(OutBuffer *buf, unsigned i)
+unsigned skiptoident(OutBuffer *buf, size_t i)
 {
-for (; i < buf->offset; i++)
+while (i < buf->offset)
-{
+{	dchar_t c;
-	// BUG: handle unicode alpha's
-	unsigned char c = buf->data[i];
+	size_t oi = i;
-	if (isalpha(c) || c == '_')
+	if (utf_decodeChar((unsigned char *)buf->data, buf->offset, &i, &c))
+	    /* Ignore UTF errors, but still consume input
+	     */
 	    break;
-	if (c == '\n')
+	if (c >= 0x80)
-	    break;
+	{
+	    if (!isUniAlpha(c))
+		continue;
+	}
+	else if (!(isalpha(c) || c == '_' || c == '\n'))
+	    continue;
+	i = oi;
+	break;
 }
 return i;
 }
 /************************************************
 * Scan forward past end of identifier.
 */
-unsigned skippastident(OutBuffer *buf, unsigned i)
+unsigned skippastident(OutBuffer *buf, size_t i)
 {
-for (; i < buf->offset; i++)
+while (i < buf->offset)
-{
+{	dchar_t c;
-	// BUG: handle unicode alpha's
-	unsigned char c = buf->data[i];
+	size_t oi = i;
-	if (!(isalnum(c) || c == '_'))
+	if (utf_decodeChar((unsigned char *)buf->data, buf->offset, &i, &c))
+	    /* Ignore UTF errors, but still consume input
+	     */
 	    break;
+	if (c >= 0x80)
+	{
+	    if (isUniAlpha(c))
+		continue;
+	}
+	else if (isalnum(c) || c == '_')
+	    continue;
+	i = oi;
+	break;
 }
 return i;
 }
 * Returns:
 *	i if not a URL
 *	index just past it if it is a URL
 */
-unsigned skippastURL(OutBuffer *buf, unsigned i)
+unsigned skippastURL(OutBuffer *buf, size_t i)
 {   unsigned length = buf->offset - i;
 unsigned char *p = &buf->data[i];
 unsigned j;
 unsigned sawdot = 0;
 		}
 		break;
 	    default:
 		leadingBlank = 0;
-		if (sc && !inCode && (isalpha(c) || c == '_'))
+		if (sc && !inCode && isIdStart(&buf->data[i]))
 		{   unsigned j;
 		    j = skippastident(buf, i);
 		    if (j > i)
 		    {
 	    size_t len = strlen(se);
 	    buf->remove(i, 1);
 	    i = buf->insert(i, se, len);
 	    i--;		// point to ';'
 	}
-	else if (isalpha(c) || c == '_')
+	else if (isIdStart(&buf->data[i]))
 	{   unsigned j;
 	    j = skippastident(buf, i);
 	    if (j > i)
 	    {
 	    break;
 }
 return s;
 }
+/****************************************
+* Determine if p points to the start of an identifier.
+*/
+int isIdStart(unsigned char *p)
+{
+unsigned c = *p;
+if (isalpha(c) || c == '_')
+	return 1;
+if (c >= 0x80)
+{	size_t i = 0;
+	if (utf_decodeChar(p, 4, &i, &c))
+	    return 0;	// ignore errors
+	if (isUniAlpha(c))
+	    return 1;
+}
+return 0;
+}
+/****************************************
+* Determine if p points to the rest of an identifier.
+*/
+int isIdTail(unsigned char *p)
+{
+unsigned c = *p;
+if (isalnum(c) || c == '_')
+	return 1;
+if (c >= 0x80)
+{	size_t i = 0;
+	if (utf_decodeChar(p, 4, &i, &c))
+	    return 0;	// ignore errors
+	if (isUniAlpha(c))
+	    return 1;
+}
+return 0;
+}
+/*****************************************
+* Return number of bytes in UTF character.
+*/
+int utfStride(unsigned char *p)
+{
+unsigned c = *p;
+if (c < 0x80)
+	return 1;
+size_t i = 0;
+utf_decodeChar(p, 4, &i, &c);	// ignore errors, but still consume input
+return i;
+}

Mercurial > projects > ldc

comparison dmd/doc.c @ 1587:def7a1d494fd