changeset 82:ac1e3fd07275

New ssi file format. (De)serializer now supports non-ascii wide characters (encoded to UTF-8) and no longer supports non-ascii 8-bit chars which would result in bad UTF-8. Moved/renamed a few things left over from the last commit.
author Diggory Hardy <diggory.hardy@gmail.com>
date Sat, 30 Aug 2008 09:37:35 +0100
parents d8fccaa45d5f
children e0f1ec7fe73a
files codeDoc/file/formats-overview.txt codeDoc/file/mergetag/file-format-binary.txt codeDoc/file/mergetag/file-format-requirements.txt codeDoc/file/mergetag/file-format-text.txt codeDoc/file/mergetag/issues.txt codeDoc/file/mergetag/new-models.vym codeDoc/mergetag/file-format-binary.txt codeDoc/mergetag/file-format-requirements.txt codeDoc/mergetag/file-format-text.txt codeDoc/mergetag/issues.txt codeDoc/mergetag/new-models.vym mde/file/deserialize.d mde/file/mergetag/DataSet.d mde/file/mergetag/Reader.d mde/file/mergetag/Writer.d mde/file/mergetag/mdeUT.d mde/file/serialize.d mde/file/ssi.d mde/gui/widget/Floating.d mde/input/Config.d mde/lookup/Options.d mde/mde.d
diffstat 22 files changed, 433 insertions(+), 298 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/codeDoc/file/formats-overview.txt	Sat Aug 30 09:37:35 2008 +0100
@@ -0,0 +1,25 @@
+Part of mde: a Modular D game-oriented Engine
+Copyright © 2007-2008 Diggory Hardy
+
+This program is free software: you can redistribute it and/or modify it under the terms
+of the GNU General Public License as published by the Free Software Foundation, either
+version 2 of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+File headers - these are like magic numbers, but following a Byte-Order-Mark.
+
+For mde files, they should have the form: mdeXXXNN where XXX is a three-character ASCII identifier
+for the specific file type and NN is a version number (only to be changed when the base file
+format changes, not for changes to the serialization).
+
+Identifier      File extension  File format
+−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−−
+{MT01}          .mtt            Mergetag text
+mdessi00        .ssi            Single Serialized Item
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/codeDoc/file/mergetag/file-format-binary.txt	Sat Aug 30 09:37:35 2008 +0100
@@ -0,0 +1,18 @@
+Copyright © 2007-2008 Diggory Hardy
+License: GNU General Public License version 2 or later (see COPYING)
+
+
+No file format is set yet; this basically includes possibilities. The file format may or may not be compatible across platforms; if not it may just be used as a cache (i.e. open .mtt/.mtb, whichever is newest, and if it's .mtt then save a .mtb version).
+
+
+This is the file format for mergetag binary files. The unit size is a byte. Most numbers to do with the layout (i.e. not stored data) should be stored as a 32-bit uint.
+
+
+BOM  ---  a Byte Order Mark should be used to determin endianness (MT01 (or other version) in bytes, but converted to two ushorts to detect endianness?)
+
+
+File should then consist of sections:
+
+Header data including an address for the header section data if included.
+
+Sections list. Include a list of sections with identifiers and addresses, sorted by identifier and in a suitible format to easily be converted to a D hash-map. Addresses for each section should consist of both a start and an end address; the end address should be checked upon reading the section. In addition the start address must be checked against the end of file to avoid security vulnerabilities with reading other memory blocks.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/codeDoc/file/mergetag/file-format-requirements.txt	Sat Aug 30 09:37:35 2008 +0100
@@ -0,0 +1,33 @@
+Copyright © 2007-2008 Diggory Hardy
+License: GNU General Public License version 2 or later (see COPYING)
+
+
+Requirements:
+
+---	Config - named entries (map associated by strings; sections by strings)
+Merging: chosing one entry over another
+
+---	Data - list of entries, each of custom compound type (e.g. list of 3-vector over reals)
+As sub blocks within a standard item (mergable: combine the blocks of multiple items).
+
++++	Global type for handling all this:
+File consists of sections.
+Each section consists of items.
+Items are sorted by ID and not by type, i.e. if two items with the same ID but different types exist, merging rules are used to choose between them.
+Items have a custom type, which can be a compound of:
++	Basic types:
+++		bool
+++		int (int+uint)
+++		real (or float or double? no.)
+++		string (char)
+++		binary (ubyte[])
++	Strings (of char, wchar or dchar)
++	Fixed-length arrays (single type)
++	Variable-length arrays (single type)
++	Fixed format tuples (multiple types which are prespecified)
++	The top-most type may be a "data list", which is identical to a variable-length array accept that merging items with identical types will combine their lists instead of choosing one over the other.
+To access an item, it should be found by ID, its type should be checked, and then it may be accessed.
+Types are specific to items. As an optimisation, a binary format may have a list of types and index them.
+
++++	Basic types:
+All D base types, including void, with support for writing strings.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/codeDoc/file/mergetag/file-format-text.txt	Sat Aug 30 09:37:35 2008 +0100
@@ -0,0 +1,183 @@
+Part of mde: a Modular D game-oriented Engine
+Copyright © 2007-2008 Diggory Hardy
+
+This program is free software: you can redistribute it and/or modify it under the terms
+of the GNU General Public License as published by the Free Software Foundation, either
+version 2 of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+
+This is the file format for mergetag text files.
+Version: 0.1 unfinalised
+
+
+The encoding should be unicode UTF-8, UTF-16 or UTF-32, and for anything other than UTF-8 must include a BOM.
+
+
+Hierarchy:
++	Sections	(special section: see header)
+++	Data Tags
+
+
+IDs:
+IDs are used for several purposes; they are UTF-8 strings. They are stored in text files as unquoted strings; escape sequences are not supported and the strings should not contain the following characters, although this is not checked: <|=>{}
+All characters between the appropriate markers are consumed into the ID, hence whitespace is meaningful.
+Multiple section or data tags with the same ID are allowed; see the "Merging rules" section.
+
+
+Outside of tags only whitespace or valid tags is allowed. Whitespace is ignored.
+The following tags are valid (see below for details):
+tag		purpose
+{...}		section identifiers
+<...>		data items
+!{...}		simple comment block
+!<...>		comment block parsed the same as <...>
+Within tags, type specifications or data items whitespace is allowed between symbols.
+
+
+Section identifier tags:
+Format: {ID}
+The ID is the section identifier/name. The ID type is DefaultData unless overriden by the code using the reader.
+A section identifier marks the beginning of a new section, extending until the next section identifier or the end of the file.
+
+
+Data item tags:
+Format: <tp|ID=dt>
+A data item with type tp, identifier ID and data dt. If the data does not fit the given type it is an error and the tag is ignored. Once split into a type string, ID and data string, the contents are passed to an addTag() function within the DataSection class which will parse tags of a recognised format and either ignore or print a warning about other tags.
+
+
+Data item tags: Type format:
+Note:
+	The type is read as a single token terminated by any of these characters:	<>|=
+	There must not be spaces within the type, e.g. "char []".
+	Of course any character other than a | terminating the token is an error.
+Format:
+	tp		a basic type
+	tp[]		a dynamic list of sub-type tp
+	t1[t2]		an associative array with key-type t2
+Possible future additions:
+	tp()		a dynamic merging list of sub-type tp (only valid as the primary type, ie <subtype()|...>, not a sub-type of a tuple or another dynamic list)
+	{t1,t2,...,tn}	a tuple with sub-types t1, t2, ..., tn
+
+Basic types (only items with a + are currently supported, items with * are in DefaultData):
+	name
+	
+	void	--- less useful type
++*	bool	--- integer types
++*	byte
++*	ubyte
++*	short
++*	ushort
++*	int
++*	uint
++*	long
++*	ulong
+	cent
+	ucent
+	
++*	binary	--- alias for ubyte[]
+	
++*	float	--- floating point types
++*	double
++*	real
+	ifloat
+	idouble
+	ireal
+	cfloat
+	cdouble
+	creal
+	
++*	char	--- single character types (actually these CANNOT support UTF8 symbols with length > 1)
+	wchar
+	dchar
++*	string	--- alias for char[] --- (DOES support UTF8)
+	wstring	--- alias for wchar[]
+	dstring	--- alias for dchar[]
+
+
+Data item tags: Data format:
+Valid chars:	[](){},+-.0-9eEixXa-fA-F '.' ".*"
+Format:
+	[d1,d2,...,dn]	data all of type t corresponding to t[]
+	(d1,d2,...,dn)	data all of type t corresponding to t()
+	{d1,d2,...,dn}	data corresponding to a type declaration of {t1,t2,...,tn}
+	d		a single data element
+
+Single data elements:
+	z		an integer number (regexp: [+-]?[0-9]+)
+	z		a floating point number (rough regexp: [+-]?[0-9]*[.]?[0-9]*(e[+-]?[0-9]+)?)
+	zi		an imaginary floating point number (z is a floating point number)
+	y+zi, y-zi	a complex number (4+0i may be written as 4, etc) (y, z are f.p.s)
+	0xz, -0xz	a hexadecimal integer z (composed of chars 0-9,a-f,A-F)
+	'c'		a char/wchar/dchar character, depending on the type specified (c may be any single character except ' or an escape sequence)
+	"string"	equivalent to ['s','t','r','i','n','g'] --- may contain the following escape sequences as defined in D: \" \' \\ \a \b \f \n \r \t \v
+	XX...XX		Binary (ubyte[]); each pair of chars is read as a hex ubyte
+	<void>		void "data" has no symbols
+
+
+Data format: Escape sequences:
+To be created and written.
+
+
+Comment tags (there are no line comments):
+Simple comment blocks:
+Format: !{...}
+This is a simple comment block, and only curly braces ({,}) are treated specially. A {, whether or not it is preceded by a !, starts an embedded comment block, and a } ends either an embedded block or the actual comment block. Note: beware commenting out anything containing curly braces which aren't in matching pairs.
+Commented data tags:
+Format: !<tp|ID=dt>
+Basically a commented out data tag. Conformance to the above spec may not be checked as strictly as normal, but the dt section is checked for strings so that a > within a string won't end the tag.
+
+
+Merging rules:
+if, when a data item is read, a data item with the same identifier
+within the same section exists in the DataSet being read into:
++	if the types are identical:
+++		if the primary type is a tp() mergeable dynamic list:
++++			the entries from the item being read are concatenated to those in the item
++++			in the DataSet
+++		else:
+++-			the item already in the DataSet takes priority and is left untouched
++	else:
++-		a warning is issued, and the data item within the DataSet is left untouched
+This allows merging some config settings in a user config file with the remaining settings in a
+complete system config file and some support for modifications overriding or adding to some data.
+
+
+Header:
+The header is a standard section which is mandatory and must be the first section. Its section identifier must start at the beginning of the file with no whitespace, declared with:
+	{MTXY}		where XY is a two digit CAPITAL HEX version number representing the mergetag format version, e.g. {MT01} .
+If these are not the first 6 characters of the file the file will not be regarded as valid.
+This formatting is very strict to allow reliable low-level parsing.
+
+
+The data tags within the header have no special meaning; any may be used such as the following:
+	<string|"Author"="...">
+	<string|"Name"="...">
+	<string|"Description"="...">
+	<string|"Program"="...">	(which program created/uses this?)
+	<*|"Version"=...>		(use any supported type)
+	<string|"Date"="YYYYMMDD">	(reverse date format; optionally "YYYYMMDDhhmmss")
+	<{u16,u8,u8}|"Date"={YYYY,MM,DD}>	(actually this type probably won't be supported by a standard section)
+	<string|"Copyright"=...>
+
+
+Example:	!THIS IS NO LONGER VALID!
+{MT01}
+{example section}
+<u32|"num"=5>
+<{u32,UTF8[]}()|"DATA"=(
+	{1,['a']},
+	{59,['w','o','r','d']},
+	{2,"strings can be written like this"} )>
+<wchar[]|"name"="This string is stored in UTF16, regardless of the file's encoding.">
+<{u32,UTF8[]}()|"DATA"=(
+	{3,"this is appended to the previous 'DATA' item"} )>
+{"section: section identifiers and tuples are not confused since tuples only occur inside <...> items"}
+<void|Empty tag= >
+!{this is a comment {containing a comment}}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/codeDoc/file/mergetag/issues.txt	Sat Aug 30 09:37:35 2008 +0100
@@ -0,0 +1,20 @@
+Copyright © 2007-2008 Diggory Hardy
+License: GNU General Public License version 2 or later (see COPYING)
+
+
+This is mostly just a list of potential minor issues noticed while coding but not seen worth throwing an error about.
+
+Overall:
+
+read.d:
+	Support partially loading a file.
+	parseSection(): as mentioned at end of function
+	formatting errors could be more informative; in particular say where the error is
+	No binary support.
+
+write.d:
+	Threading support?
+	There is currently no way to specify the base in which numbers are written (in text form).
+
+format.d:
+	No support for ulong where val > long.max.
Binary file codeDoc/file/mergetag/new-models.vym has changed
--- a/codeDoc/mergetag/file-format-binary.txt	Fri Aug 29 11:59:43 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-Copyright © 2007-2008 Diggory Hardy
-License: GNU General Public License version 2 or later (see COPYING)
-
-
-No file format is set yet; this basically includes possibilities. The file format may or may not be compatible across platforms; if not it may just be used as a cache (i.e. open .mtt/.mtb, whichever is newest, and if it's .mtt then save a .mtb version).
-
-
-This is the file format for mergetag binary files. The unit size is a byte. Most numbers to do with the layout (i.e. not stored data) should be stored as a 32-bit uint.
-
-
-BOM  ---  a Byte Order Mark should be used to determin endianness (MT01 (or other version) in bytes, but converted to two ushorts to detect endianness?)
-
-
-File should then consist of sections:
-
-Header data including an address for the header section data if included.
-
-Sections list. Include a list of sections with identifiers and addresses, sorted by identifier and in a suitible format to easily be converted to a D hash-map. Addresses for each section should consist of both a start and an end address; the end address should be checked upon reading the section. In addition the start address must be checked against the end of file to avoid security vulnerabilities with reading other memory blocks.
--- a/codeDoc/mergetag/file-format-requirements.txt	Fri Aug 29 11:59:43 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-Copyright © 2007-2008 Diggory Hardy
-License: GNU General Public License version 2 or later (see COPYING)
-
-
-Requirements:
-
----	Config - named entries (map associated by strings; sections by strings)
-Merging: chosing one entry over another
-
----	Data - list of entries, each of custom compound type (e.g. list of 3-vector over reals)
-As sub blocks within a standard item (mergable: combine the blocks of multiple items).
-
-+++	Global type for handling all this:
-File consists of sections.
-Each section consists of items.
-Items are sorted by ID and not by type, i.e. if two items with the same ID but different types exist, merging rules are used to choose between them.
-Items have a custom type, which can be a compound of:
-+	Basic types:
-++		bool
-++		int (int+uint)
-++		real (or float or double? no.)
-++		string (char)
-++		binary (ubyte[])
-+	Strings (of char, wchar or dchar)
-+	Fixed-length arrays (single type)
-+	Variable-length arrays (single type)
-+	Fixed format tuples (multiple types which are prespecified)
-+	The top-most type may be a "data list", which is identical to a variable-length array accept that merging items with identical types will combine their lists instead of choosing one over the other.
-To access an item, it should be found by ID, its type should be checked, and then it may be accessed.
-Types are specific to items. As an optimisation, a binary format may have a list of types and index them.
-
-+++	Basic types:
-All D base types, including void, with support for writing strings.
--- a/codeDoc/mergetag/file-format-text.txt	Fri Aug 29 11:59:43 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,183 +0,0 @@
-Part of mde: a Modular D game-oriented Engine
-Copyright © 2007-2008 Diggory Hardy
-
-This program is free software: you can redistribute it and/or modify it under the terms
-of the GNU General Public License as published by the Free Software Foundation, either
-version 2 of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
-without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-
-This is the file format for mergetag text files.
-Version: 0.1 unfinalised
-
-
-The encoding should be unicode UTF-8, UTF-16 or UTF-32, and for anything other than UTF-8 must include a BOM.
-
-
-Hierarchy:
-+	Sections	(special section: see header)
-++	Data Tags
-
-
-IDs:
-IDs are used for several purposes; they are UTF-8 strings. They are stored in text files as unquoted strings; escape sequences are not supported and the strings should not contain the following characters, although this is not checked: <|=>{}
-All characters between the appropriate markers are consumed into the ID, hence whitespace is meaningful.
-Multiple section or data tags with the same ID are allowed; see the "Merging rules" section.
-
-
-Outside of tags only whitespace or valid tags is allowed. Whitespace is ignored.
-The following tags are valid (see below for details):
-tag		purpose
-{...}		section identifiers
-<...>		data items
-!{...}		simple comment block
-!<...>		comment block parsed the same as <...>
-Within tags, type specifications or data items whitespace is allowed between symbols.
-
-
-Section identifier tags:
-Format: {ID}
-The ID is the section identifier/name. The ID type is DefaultData unless overriden by the code using the reader.
-A section identifier marks the beginning of a new section, extending until the next section identifier or the end of the file.
-
-
-Data item tags:
-Format: <tp|ID=dt>
-A data item with type tp, identifier ID and data dt. If the data does not fit the given type it is an error and the tag is ignored. Once split into a type string, ID and data string, the contents are passed to an addTag() function within the DataSection class which will parse tags of a recognised format and either ignore or print a warning about other tags.
-
-
-Data item tags: Type format:
-Note:
-	The type is read as a single token terminated by any of these characters:	<>|=
-	There must not be spaces within the type, e.g. "char []".
-	Of course any character other than a | terminating the token is an error.
-Format:
-	tp		a basic type
-	tp[]		a dynamic list of sub-type tp
-	t1[t2]		an associative array with key-type t2
-Possible future additions:
-	tp()		a dynamic merging list of sub-type tp (only valid as the primary type, ie <subtype()|...>, not a sub-type of a tuple or another dynamic list)
-	{t1,t2,...,tn}	a tuple with sub-types t1, t2, ..., tn
-
-Basic types (only items with a + are currently supported, items with * are in DefaultData):
-	name
-	
-	void	--- less useful type
-+*	bool	--- integer types
-+*	byte
-+*	ubyte
-+*	short
-+*	ushort
-+*	int
-+*	uint
-+*	long
-+*	ulong
-	cent
-	ucent
-	
-+*	binary	--- alias for ubyte[]
-	
-+*	float	--- floating point types
-+*	double
-+*	real
-	ifloat
-	idouble
-	ireal
-	cfloat
-	cdouble
-	creal
-	
-+*	char	--- single character types (actually these CANNOT support UTF8 symbols with length > 1)
-	wchar
-	dchar
-+*	string	--- alias for char[] --- (DOES support UTF8)
-	wstring	--- alias for wchar[]
-	dstring	--- alias for dchar[]
-
-
-Data item tags: Data format:
-Valid chars:	[](){},+-.0-9eEixXa-fA-F '.' ".*"
-Format:
-	[d1,d2,...,dn]	data all of type t corresponding to t[]
-	(d1,d2,...,dn)	data all of type t corresponding to t()
-	{d1,d2,...,dn}	data corresponding to a type declaration of {t1,t2,...,tn}
-	d		a single data element
-
-Single data elements:
-	z		an integer number (regexp: [+-]?[0-9]+)
-	z		a floating point number (rough regexp: [+-]?[0-9]*[.]?[0-9]*(e[+-]?[0-9]+)?)
-	zi		an imaginary floating point number (z is a floating point number)
-	y+zi, y-zi	a complex number (4+0i may be written as 4, etc) (y, z are f.p.s)
-	0xz, -0xz	a hexadecimal integer z (composed of chars 0-9,a-f,A-F)
-	'c'		a char/wchar/dchar character, depending on the type specified (c may be any single character except ' or an escape sequence)
-	"string"	equivalent to ['s','t','r','i','n','g'] --- may contain the following escape sequences as defined in D: \" \' \\ \a \b \f \n \r \t \v
-	XX...XX		Binary (ubyte[]); each pair of chars is read as a hex ubyte
-	<void>		void "data" has no symbols
-
-
-Data format: Escape sequences:
-To be created and written.
-
-
-Comment tags (there are no line comments):
-Simple comment blocks:
-Format: !{...}
-This is a simple comment block, and only curly braces ({,}) are treated specially. A {, whether or not it is preceded by a !, starts an embedded comment block, and a } ends either an embedded block or the actual comment block. Note: beware commenting out anything containing curly braces which aren't in matching pairs.
-Commented data tags:
-Format: !<tp|ID=dt>
-Basically a commented out data tag. Conformance to the above spec may not be checked as strictly as normal, but the dt section is checked for strings so that a > within a string won't end the tag.
-
-
-Merging rules:
-if, when a data item is read, a data item with the same identifier
-within the same section exists in the DataSet being read into:
-+	if the types are identical:
-++		if the primary type is a tp() mergeable dynamic list:
-+++			the entries from the item being read are concatenated to those in the item
-+++			in the DataSet
-++		else:
-++-			the item already in the DataSet takes priority and is left untouched
-+	else:
-+-		a warning is issued, and the data item within the DataSet is left untouched
-This allows merging some config settings in a user config file with the remaining settings in a
-complete system config file and some support for modifications overriding or adding to some data.
-
-
-Header:
-The header is a standard section which is mandatory and must be the first section. Its section identifier must start at the beginning of the file with no whitespace, declared with:
-	{MTXY}		where XY is a two digit CAPITAL HEX version number representing the mergetag format version, e.g. {MT01} .
-If these are not the first 6 characters of the file the file will not be regarded as valid.
-This formatting is very strict to allow reliable low-level parsing.
-
-
-The data tags within the header have no special meaning; any may be used such as the following:
-	<string|"Author"="...">
-	<string|"Name"="...">
-	<string|"Description"="...">
-	<string|"Program"="...">	(which program created/uses this?)
-	<*|"Version"=...>		(use any supported type)
-	<string|"Date"="YYYYMMDD">	(reverse date format; optionally "YYYYMMDDhhmmss")
-	<{u16,u8,u8}|"Date"={YYYY,MM,DD}>	(actually this type probably won't be supported by a standard section)
-	<string|"Copyright"=...>
-
-
-Example:	!THIS IS NO LONGER VALID!
-{MT01}
-{example section}
-<u32|"num"=5>
-<{u32,UTF8[]}()|"DATA"=(
-	{1,['a']},
-	{59,['w','o','r','d']},
-	{2,"strings can be written like this"} )>
-<wchar[]|"name"="This string is stored in UTF16, regardless of the file's encoding.">
-<{u32,UTF8[]}()|"DATA"=(
-	{3,"this is appended to the previous 'DATA' item"} )>
-{"section: section identifiers and tuples are not confused since tuples only occur inside <...> items"}
-<void|Empty tag= >
-!{this is a comment {containing a comment}}
--- a/codeDoc/mergetag/issues.txt	Fri Aug 29 11:59:43 2008 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-Copyright © 2007-2008 Diggory Hardy
-License: GNU General Public License version 2 or later (see COPYING)
-
-
-This is mostly just a list of potential minor issues noticed while coding but not seen worth throwing an error about.
-
-Overall:
-
-read.d:
-	Support partially loading a file.
-	parseSection(): as mentioned at end of function
-	formatting errors could be more informative; in particular say where the error is
-	No binary support.
-
-write.d:
-	Threading support?
-	There is currently no way to specify the base in which numbers are written (in text form).
-
-format.d:
-	No support for ulong where val > long.max.
Binary file codeDoc/mergetag/new-models.vym has changed
--- a/mde/file/deserialize.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/file/deserialize.d	Sat Aug 30 09:37:35 2008 +0100
@@ -17,7 +17,8 @@
  * Generic deserialization templated function.
  *
  * Supports:
- *  Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types.
+ *  Associative arrays, dynamic arrays (with usual formatting of strings), structs, char types,
+ *  bool, int types, float types.
  *
  * There are also some public utility functions with their own documentation.
  *
@@ -204,12 +205,25 @@
     throw new ParseException ("Invalid char: '\\'");
 }
 // Basic unicode convertions for wide-chars.
-// Assumes value is <= 127 as does deserialize!(char).
 T deserialize(T : wchar) (char[] src) {
-    return cast(T) deserialize!(char) (src);
+    src = Util.trim(src);
+    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
+        throw new ParseException ("Invalid char: not 'x' or '\\x'");
+    T[] t = Utf.toString16 (src[1..$-1]);
+    if (t.length == 1)
+        return t[0];
+    else
+        throw new ParseException ("Invalid char: not one character");
 }
 T deserialize(T : dchar) (char[] src) {
-    return cast(T) deserialize!(char) (src);
+    src = Util.trim(src);
+    if (src.length < 3 || src[0] != '\'' || src[$-1] != '\'')
+        throw new ParseException ("Invalid char: not 'x' or '\\x'");
+    T[] t = Utf.toString32 (src[1..$-1]);
+    if (t.length == 1)
+        return t[0];
+    else
+        throw new ParseException ("Invalid char: not one character");
 }
 
 // Bool
@@ -496,12 +510,12 @@
 }
 //END Utility funcs
 
-debug (UnitTest) {
+debug (mdeUnitTest) {
     import tango.util.log.Log : Log, Logger;
     
     private Logger logger;
     static this() {
-        logger = Log.getLogger ("text.deserialize");
+        logger = Log.getLogger ("mde.file.deserialize");
     }
 unittest {
     // Utility
@@ -511,7 +525,7 @@
             dg();
         } catch (Exception e) {
             r = true;
-            logger.info ("Exception caught: "~e.msg);
+            logger.trace ("Exception caught: "~e.msg);
         }
         return r;
     }
@@ -566,6 +580,8 @@
     assert (deserialize!(char) ("'\\\''") == '\'');
     assert (deserialize!(wchar) ("'X'") == 'X');
     assert (deserialize!(dchar) ("'X'") == 'X');
+    assert (deserialize!(wchar) ("'£'") == '£');
+    assert (deserialize!(dchar) ("'£'") == '£');
     assert (throws ({ deserialize!(char) ("'\\'"); }));
     assert (throws ({ deserialize!(char) ("'£'"); }));        // non-ascii
     assert (throws ({ deserialize!(char) ("''"); }));
--- a/mde/file/mergetag/DataSet.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/file/mergetag/DataSet.d	Sat Aug 30 09:37:35 2008 +0100
@@ -55,7 +55,7 @@
 
     private Logger logger;
     static this() {
-        logger = Log.getLogger ("mde.mergetag.DataSet");
+        logger = Log.getLogger ("mde.file.mergetag.DataSet");
     }
     
     unittest {	// Only covers DataSet really.
--- a/mde/file/mergetag/Reader.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/file/mergetag/Reader.d	Sat Aug 30 09:37:35 2008 +0100
@@ -38,7 +38,7 @@
 
 private Logger logger;
 static this() {
-    logger = Log.getLogger ("mde.mergetag.Reader");
+    logger = Log.getLogger ("mde.file.mergetag.Reader");
 }
 
 // TODO: allow compressing with zlib for both binary and text? (.mtz, .mtt, .mtb extensions)
@@ -149,8 +149,6 @@
     }
     
 private:
-    static Logger logger;
-    
     // Non-static symbols:
     final char[] ErrFile;		// added after ErrInFile to do the same without the "in " bit.
     final char[] ErrInFile;		// something like "in \"path/file.mtt\""
@@ -183,10 +181,6 @@
 //END DATA
     
 //BEGIN METHODS: CTOR / DTOR
-    static this () {
-        logger = Log.getLogger ("mde.mergetag.read.Reader");
-    }
-    
     /** Tries to open file path and read it into a buffer.
      *
      * Params:
--- a/mde/file/mergetag/Writer.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/file/mergetag/Writer.d	Sat Aug 30 09:37:35 2008 +0100
@@ -44,7 +44,7 @@
 
 private Logger logger;
 static this () {
-    logger = Log.getLogger ("mde.mergetag.Writer");
+    logger = Log.getLogger ("mde.file.mergetag.Writer");
 }
 
 
--- a/mde/file/mergetag/mdeUT.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/file/mergetag/mdeUT.d	Sat Aug 30 09:37:35 2008 +0100
@@ -29,7 +29,7 @@
     
     private Logger logger;
     static this() {
-        logger = Log.getLogger ("mde.mergetag.unittest");
+        logger = Log.getLogger ("mde.file.mergetag.mdeUT");
     }
     
     unittest {
--- a/mde/file/serialize.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/file/serialize.d	Sat Aug 30 09:37:35 2008 +0100
@@ -17,7 +17,8 @@
  * Generic serialization templated function.
  *
  * Supports:
- *  Associative arrays, arrays (inc. strings), structs, char types, bool, int types, float types.
+ *  Associative arrays, dynamic arrays (with usual formatting of strings), structs, char types,
+ *  bool, int types, float types.
  *
  * Examples:
  * ------------------------------------------------------------------------------------------------
@@ -51,6 +52,8 @@
  *************************************************************************************************/
 //NOTE: in case of multiple formats, make this a dummy module importing both serialize modules,
 // or put all the code here.
+//FIXME: Optimize by using a slicing buffer. Put everything in a struct containing this buffer to
+// make it thread-safe.
 module mde.file.serialize;
 // Since serialize is never used in a module where deserialize is not used, save an import:
 public import mde.file.deserialize;
@@ -181,8 +184,8 @@
     }
     // Basic types
     else static if (is(U == char)) {            // char (UTF-8 byte)
-        // Note: if (val > 127) "is invalid UTF-8 single char".  However we don't know
-        // what this is for, in particular if it will be recombined with other chars later.
+        if (val > 127)      // outputing invalid utf-8 could corrupt the output stream
+            throw new IllegalArgumentException ("Not a valid UTF-8 character");
         
         // Can't return reference to static array; so making it dynamic is cheaper than copying.
         char[] ret = new char[4];	// max length for an escaped char
@@ -200,11 +203,16 @@
         }
     } else static if (is(U == wchar) ||
                       is(U == dchar)) {         // wchar or dchar (UTF-16/32 single char)
-        // Note: only ascii can be converted. NOTE: convert to UTF-8 (multibyte) char?
         if (val <= 127u)
             return serialize!(char) (cast(char) val);  // ASCII
-        else throw new UnicodeException (
-            "Error: unicode non-ascii character cannot be converted to a single UTF-8 char", 0);
+        else {  // convert to a multi-byte UTF-8 char
+            // NOTE: suboptimal
+            char[] t,ret;
+            t = Utf.toString([val]);
+            ret.length = t.length + 2;
+            ret = '\'' ~ t ~ '\'';
+            return ret;
+        }
     } else static if (is (U == bool)) {         // boolean
         static if (BINARY_AS_WORDS) {
             if (val)
@@ -288,12 +296,12 @@
 
 
 
-debug (UnitTest) {
+debug (mdeUnitTest) {
     import tango.util.log.Log : Log, Logger;
 
     private Logger logger;
     static this() {
-        logger = Log.getLogger ("text.serialize");
+        logger = Log.getLogger ("mde.file.serialize");
     }
 unittest {
     // Utility
@@ -303,7 +311,7 @@
             dg();
         } catch (Exception e) {
             r = true;
-            logger.info ("Exception caught: "~e.msg);
+            logger.trace ("Exception caught: "~e.msg);
         }
         return r;
     }
@@ -351,8 +359,9 @@
     assert (serialize!(char) ('\'') == "\'\\\'\'");
     assert (serialize!(wchar) ('X') == "'X'");
     assert (serialize!(dchar) ('X') == "'X'");
-    assert (throws ({ char[] r = serialize!(wchar) ('£');   /* unicode U+00A3 */ }));
-    assert (throws ({ char[] r = serialize!(dchar) ('£'); }));
+    assert (serialize!(wchar) ('£') == "'£'");  // unicode U+00A3 i.e. a multi-byte UTF-8 char
+    assert (serialize!(dchar) ('£') == "'£'");
+    assert (throws ({ serialize!(char) ('£'); }));      // compiler converts £ to char, but it's not valid UTF-8
     
     // Bool
     static if (BINARY_AS_WORDS)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mde/file/ssi.d	Sat Aug 30 09:37:35 2008 +0100
@@ -0,0 +1,84 @@
+/* LICENSE BLOCK
+Part of mde: a Modular D game-oriented Engine
+Copyright © 2007-2008 Diggory Hardy
+
+This program is free software: you can redistribute it and/or modify it under the terms
+of the GNU General Public License as published by the Free Software Foundation, either
+version 2 of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+/**************************************************************************************************
+ * mde SSI (Single Seriazied Item) format - functions to read a file into a struct and vice-versa.
+ * 
+ * This is a very simple format capable of handling any type supported by the (de)serializer.
+ *************************************************************************************************/
+module mde.file.ssi;
+
+import mde.file.exception;
+import mde.file.serialize;
+
+import tango.io.UnicodeFile;
+
+S read(S) (FilePath path) {
+    char[] buf;
+    try {
+        scope file = new UnicodeFile!(char) (path, Encoding.Unknown);   // from BOM or use UTF-8
+        buf = cast(char[]) file.read;
+    } catch (Exception e) {
+        throw new ioException ("While reading \""~path.toString~"\": "~e.msg);
+    }
+    
+    // Read header. Note: may be followed by new-line, but serializer strips white-space anyway.
+    if (buf.length < 8 || buf[0..8] != "mdessi00")
+        throw new parseException (path.toString ~ " is not a recognized mde ssi file: it doesn't start mdessi00");
+    
+    try {
+        return deserialize!(S) (buf[8..$]);
+    } catch (Exception e) {
+        throw new parseException ("Failed to read mde ssi file: "~e.msg);
+    }
+}
+
+void write(S) (FilePath path, S content) {
+    try {
+        scope file = new UnicodeFile!(char) (path, Encoding.UTF_8N);
+        file.write ("mdessi00\n"~serialize(content), true);
+    } catch (Exception e) {
+        throw new ioException ("Unable to write file "~path.toString~": "~e.msg);
+    }
+}
+
+debug (mdeUnitTest) {
+    import tango.util.log.Log : Log, Logger;
+    import tango.io.FilePath;
+    
+    private Logger logger;
+    static this() {
+        logger = Log.getLogger ("mde.file.ssi");
+    }
+    
+    unittest {
+        struct A {
+            float x;
+            dchar y;
+            long z;
+        }
+        A a;
+        a.x = 0.0f;
+        a.y = '搀';
+        a.z = (cast(long) uint.max) + 1;
+        
+        FilePath path = FilePath ("SSIUnitTest.ssi");
+        write (path, a);
+        assert (a == read!(A)(path));
+        path.remove;    // get rid of the file
+        
+        logger.info ("Unittest complete.");
+    }
+}
--- a/mde/gui/widget/Floating.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/gui/widget/Floating.d	Sat Aug 30 09:37:35 2008 +0100
@@ -31,7 +31,7 @@
 
 private Logger logger;
 static this () {
-    logger = Log.getLogger ("mde.gui.widget.Window");
+    logger = Log.getLogger ("mde.gui.widget.Floating");
 }
 //FIXME - documentation
 
--- a/mde/input/Config.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/input/Config.d	Sat Aug 30 09:37:35 2008 +0100
@@ -119,7 +119,7 @@
     
     private static Logger logger;
     static this() {
-        logger = Log.getLogger ("mde.input.config.Config");
+        logger = Log.getLogger ("mde.input.Config");
     }
     
 //BEGIN File loading/saving code
--- a/mde/lookup/Options.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/lookup/Options.d	Sat Aug 30 09:37:35 2008 +0100
@@ -182,13 +182,27 @@
     
         private Logger logger;
         static this() {
-            logger = Log.getLogger ("mde.options");
+            logger = Log.getLogger ("mde.lookup.Options");
         }
     }
     //END Static
     
     
     //BEGIN Non-static
+    /+ NOTE: according to spec: "Templates cannot be used to add non-static members or virtual
+    functions to classes." However, this appears to work (but linking problems did occur).
+    Alternative: use mixins. From OptionsChanges:
+        // setT (used to be a template, but:
+        // Templates cannot be used to add non-static members or virtual functions to classes. )
+        template setMixin(A...) {
+            static if (A.length) {
+                const char[] setMixin = `void set`~TName!(A[0])~` (ID id, `~A[0].stringof~` x) {
+                    `~TName!(T)~`s[id] = x;
+                }
+                ` ~ setMixin!(A[1..$]);
+            } else
+                const char[] setMixin = ``;
+        }+/
     /** Set option symbol of an Options sub-class to val.
      *
      * Due to the way options are handled generically, string IDs must be used to access the options
@@ -197,13 +211,11 @@
     void set(T) (char[] symbol, T val) {
         static assert (TIsIn!(T,TYPES), "Options does not support type "~T.stringof);
         
-        mixin (`alias opts`~TName!(T)~` optsVars;`);
-        
         changed = true;     // something got set (don't bother checking this isn't what it already was)
         
         try {
-            *(optsVars[cast(ID) symbol]) = val;
-            optionChanges.set!(T) (cast(ID) symbol, val);
+            mixin (`*(opts`~TName!(T)~`[cast(ID) symbol]) = val;`);
+            mixin (`optionChanges.`~TName!(T)~`s[symbol] = val;`);
         } catch (ArrayBoundsException) {
             // log and ignore:
             logger.error ("Options.set: invalid symbol");
@@ -398,7 +410,6 @@
             } else
                 const char[] writeAllMixin = ``;
         }
-        
     }
     //END Templates
     // These store the actual values, but are never accessed directly except when initially added.
@@ -407,13 +418,6 @@
     
     this () {}
     
-    void set(T) (ID id, T x) {
-        static assert (Options.TIsIn!(T,TYPES), "Options does not support type "~T.stringof);
-        
-        mixin (`alias `~TName!(T)~`s vars;`);
-        vars[id] = x;
-    }
-    
     //BEGIN Mergetag loading/saving code
     // HIGH_LOW priority: only load symbols not currently existing
     void addTag (char[] tp, ID id, char[] dt) {
--- a/mde/mde.d	Fri Aug 29 11:59:43 2008 +0100
+++ b/mde/mde.d	Sat Aug 30 09:37:35 2008 +0100
@@ -35,7 +35,10 @@
 import tango.core.Thread : Thread;	// Thread.sleep()
 import tango.time.Clock;                // Clock.now()
 import tango.util.log.Log : Log, Logger;
-debug (mdeUnitTest) import mde.file.mergetag.mdeUT;
+debug (mdeUnitTest) {
+    import mde.file.ssi;
+    import mde.file.mergetag.mdeUT;
+}
 
 int main(char[][] args)
 {