summaryrefslogtreecommitdiffstats
path: root/Documentation/mtd/nand_ecc.txt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 11:25:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 11:25:54 -0800
commitac53b2e053fffc74372da94e734b92f37e70d32c (patch)
treecda82af0fcded5d230e9f56104d3988b7a75c8aa /Documentation/mtd/nand_ecc.txt
parentcf09112d160e6db21ff8427ce696f819b957423b (diff)
parent9146cbd52b11d4ade62dba8f238ec5e421c3fa2b (diff)
downloadop-kernel-dev-ac53b2e053fffc74372da94e734b92f37e70d32c.zip
op-kernel-dev-ac53b2e053fffc74372da94e734b92f37e70d32c.tar.gz
Merge tag 'for-linus-20160112' of git://git.infradead.org/linux-mtd
Pull MTD updates from Brian Norris: "Generic MTD: - populate the MTD device 'of_node' field (and get a proper 'of_node' symlink in sysfs) This yielded some new helper functions, and changes across a variety of drivers - partitioning cleanups, to prepare for better device-tree based partitioning in the future Eliminate a lot of boilerplate for drivers that want to use OF-based partition parsing The DT bindings for this didn't settle yet, so most non-cleanup portions are deferred for a future release NAND: - embed a struct mtd_info inside struct nand_chip This is really long overdue; too many drivers have to do the same silly boilerplate to allocate and link up two "independent" structs, when in fact, everyone is assuming there is an exact 1:1 relationship between a NAND chips struct and its underlying MTD. This aids improved helpers and should make certain abstractions easier in the future. Also causes a lot of churn, helped along by some automated code transformations - add more core support for detecting (and "correcting") bitflips in erased pages; requires opt-in by drivers, but at least we kill a few bad implementations and hopefully stave off future ones - pxa3xx_nand: cleanups, a few fixes, and PM improvements - new JZ4780 NAND driver SPI NOR: - provide default erase function, for controllers that just want to send the SECTOR_ERASE command directly - fix some module auto-loading issues with device tree ("jedec,spi-nor") - error handling fixes - new Mediatek QSPI flash driver Other: - cfi: force valid geometry Kconfig (finally!) This one used to trip up randconfigs occasionally, since bots aren't deterred by big scary "advanced configuration" menus More? Probably. See the commit logs" * tag 'for-linus-20160112' of git://git.infradead.org/linux-mtd: (168 commits) mtd: jz4780_nand: replace if/else blocks with switch/case mtd: nand: jz4780: Update ecc correction error codes mtd: nandsim: use nand_get_controller_data() mtd: jz4780_nand: remove useless mtd->priv = chip assignment staging: mt29f_spinand: make use of nand_set/get_controller_data() helpers mtd: nand: make use of nand_set/get_controller_data() helpers ARM: make use of nand_set/get_controller_data() helpers mtd: nand: add helpers to access ->priv mtd: nand: jz4780: driver for NAND devices on JZ4780 SoCs mtd: nand: jz4740: remove custom 'erased check' implementation mtd: nand: diskonchip: remove custom 'erased check' implementation mtd: nand: davinci: remove custom 'erased check' implementation mtd: nand: use nand_check_erased_ecc_chunk in default ECC read functions mtd: nand: return consistent error codes in ecc.correct() implementations doc: dt: mtd: new binding for jz4780-{nand,bch} mtd: cfi_cmdset_0001: fixing memory leak and handling failed kmalloc mtd: spi-nor: wait until lock/unlock operations are ready mtd: tests: consolidate kmalloc/memset 0 call to kzalloc jffs2: use to_delayed_work mtd: nand: assign reasonable default name for NAND drivers ...
Diffstat (limited to 'Documentation/mtd/nand_ecc.txt')
-rw-r--r--Documentation/mtd/nand_ecc.txt58
1 files changed, 29 insertions, 29 deletions
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
index e129b24..f8c3284 100644
--- a/Documentation/mtd/nand_ecc.txt
+++ b/Documentation/mtd/nand_ecc.txt
@@ -107,7 +107,7 @@ for (i = 0; i < 256; i++)
if (i & 0x01)
rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
else
- rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+ rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp0;
if (i & 0x02)
rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
else
@@ -127,7 +127,7 @@ for (i = 0; i < 256; i++)
if (i & 0x20)
rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
else
- rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+ rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
if (i & 0x40)
rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
else
@@ -158,7 +158,7 @@ the values in any order. So instead of calculating all the bits
individually, let us try to rearrange things.
For the column parity this is easy. We can just xor the bytes and in the
end filter out the relevant bits. This is pretty nice as it will bring
-all cp calculation out of the if loop.
+all cp calculation out of the for loop.
Similarly we can first xor the bytes for the various rows.
This leads to:
@@ -271,11 +271,11 @@ to write our code in such a way that we process data in 32 bit chunks.
Of course this means some modification as the row parity is byte by
byte. A quick analysis:
for the column parity we use the par variable. When extending to 32 bits
-we can in the end easily calculate p0 and p1 from it.
+we can in the end easily calculate rp0 and rp1 from it.
(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
-respectively)
+respectively, from MSB to LSB)
also rp2 and rp3 can be easily retrieved from par as rp3 covers the
-first two bytes and rp2 the last two bytes.
+first two MSBs and rp2 covers the last two LSBs.
Note that of course now the loop is executed only 64 times (256/4).
And note that care must taken wrt byte ordering. The way bytes are
@@ -387,11 +387,11 @@ Analysis 2
The code (of course) works, and hurray: we are a little bit faster than
the linux driver code (about 15%). But wait, don't cheer too quickly.
-THere is more to be gained.
+There is more to be gained.
If we look at e.g. rp14 and rp15 we see that we either xor our data with
rp14 or with rp15. However we also have par which goes over all data.
This means there is no need to calculate rp14 as it can be calculated from
-rp15 through rp14 = par ^ rp15;
+rp15 through rp14 = par ^ rp15, because par = rp14 ^ rp15;
(or if desired we can avoid calculating rp15 and calculate it from
rp14). That is why some places refer to inverse parity.
Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
@@ -419,12 +419,12 @@ with
if (i & 0x20) rp15 ^= cur;
and outside the loop added:
- rp4 = par ^ rp5;
- rp6 = par ^ rp7;
- rp8 = par ^ rp9;
- rp10 = par ^ rp11;
- rp12 = par ^ rp13;
- rp14 = par ^ rp15;
+ rp4 = par ^ rp5;
+ rp6 = par ^ rp7;
+ rp8 = par ^ rp9;
+ rp10 = par ^ rp11;
+ rp12 = par ^ rp13;
+ rp14 = par ^ rp15;
And after that the code takes about 30% more time, although the number of
statements is reduced. This is also reflected in the assembly code.
@@ -524,12 +524,12 @@ THe code within the for loop was changed to:
cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
- cur = *bp++; tmppar ^= cur; rp4 ^= cur;
- cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
- cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
- cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
cur = *bp++; tmppar ^= cur; rp8 ^= cur;
cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
@@ -537,7 +537,7 @@ THe code within the for loop was changed to:
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
cur = *bp++; tmppar ^= cur;
- par ^= tmppar;
+ par ^= tmppar;
if ((i & 0x1) == 0) rp12 ^= tmppar;
if ((i & 0x2) == 0) rp14 ^= tmppar;
}
@@ -548,8 +548,8 @@ to rp12 and rp14.
While making the changes I also found that I could exploit that tmppar
contains the running parity for this iteration. So instead of having:
-rp4 ^= cur; rp6 = cur;
-I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+rp4 ^= cur; rp6 ^= cur;
+I removed the rp6 ^= cur; statement and did rp6 ^= tmppar; on next
statement. A similar change was done for rp8 and rp10
@@ -593,22 +593,22 @@ The new code now looks like:
cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
- cur = *bp++; tmppar ^= cur; rp4 ^= cur;
- cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
- notrp8 = tmppar;
- cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ notrp8 = tmppar;
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
- cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
cur = *bp++; tmppar ^= cur;
- rp8 = rp8 ^ tmppar ^ notrp8;
+ rp8 = rp8 ^ tmppar ^ notrp8;
cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
cur = *bp++; tmppar ^= cur; rp6 ^= cur;
cur = *bp++; tmppar ^= cur; rp4 ^= cur;
cur = *bp++; tmppar ^= cur;
- par ^= tmppar;
+ par ^= tmppar;
if ((i & 0x1) == 0) rp12 ^= tmppar;
if ((i & 0x2) == 0) rp14 ^= tmppar;
}
@@ -700,7 +700,7 @@ Conclusion
The gain when calculating the ecc is tremendous. Om my development hardware
a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
embedded system with a MIPS core a factor 7 was obtained.
-On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
5 (big endian mode, gcc 4.1.2, -O3)
For correction not much gain could be obtained (as bitflips are rare). Then
again there are also much less cycles spent there.
OpenPOWER on IntegriCloud