/* scheduler.c

Copyright (C) 2006-2007 by Malte Marwedel

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "main.h"

/*We need the .noinit so that we can look which thread was the last
  one before a Watchdog reset occurs.
*/
u08 volatile sched_thread __attribute__ ((section (".noinit")));

/* As reminder from scheduler.h:
struct threadpoint{
void (*execute) (void);
u08 prev_add_stack;
};
*/

/* Please note that the stack size defined by prev_add_stack in this list will
be added to the SP of the the new thread. Meaning, that the previous function
will have this size as maximum additional stack to the stack they already use.
So if the previous function does not need any additional stack, you could even
set the value to zero.
Note: The calling of sched_init_thread() reserves ~6 byte additional stack
anyway.
Moreover the value of the first function is useless because the stack of this
function directly starts after (or before if you think of the stack growing to
the lower addresses) the current stack.
How much additional stack will I need?
1. It depends if your thread call sched() the first time deep inside your
program or at the flat top. In the first case the SP might be as low as he ever
will get so you do not need much additional stack. In the second case sched()
will not know how big the stack went while your thread was running -> you should
reserve a bigger stack in this case.
2. Note that local variables and addresses for a 'ret' call will be stored on
the stack. A recursive calling of a function within a thread with some local
variables inside might need a big stack up to many hundred bytes. Other threads
without recursion and big local variables (think of arrays) could be quiet happy
with 20-50 bytes.
3. Beware of interrupts. An interrupt (if enabled) could happen at any position
within a thread. The interrupt will need some stack too. As result you must add
the stack size of an interrupt to the additional stack size of EVERY thread.
Enabling interrupts within interrupts make it even worse.
*/

/* According to my my observations, these stack values should result in 16 bytes
   of free (= never used) stack for each task in practice. */
struct threadpoint schedlist[SCHED_THREADS] PROGMEM = {
  {SCHED_FIRSTTASK, 8},		//is the valid_thread
  {da_control_thread, 39},	//19 previous
  {rs232_thread, 19},
  {ad_thread, 33},
  {keys_thread, 24},
  {lcd_thread, 26},
  {control_thread, 22},
};

//List of stackpointer for the threads
u16 volatile schedlist_p[SCHED_THREADS];

//list for the start stack pointer, this is for control (validator) only
u16 volatile schedlist_s[SCHED_THREADS];

/*
These variables are only global because the gcc 4.x ignores the register
connections on local variables. So the registers r0, r2 and r3 may be used
as always outside of the sched() function.
Moreover they have to defined before every function definition, because
otherwise it does not compile without optimization (-O0).
*/
register unsigned char sreg asm("r0");
register unsigned char sreg2 asm("r3");
register unsigned char sched_thread_l asm ("r2");

void sched_init(void) {
/*Well there is no real init. sched() will create the stack on demand
  simply start with the first thread... */

/*Because this variable is declared .noinit and is used to indicate if the
  scheduler is running or not -> This varible has to be initializes before the
 first sched() is called! */
sched_thread = 0;
/* the following is deactivated because it needs a lot of flash
void (*subprog) (void);
memcpy_P(&subprog,&schedlist[sched_thread].execute,sizeof(subprog));
subprog();
*/
//Simple replacement:
SCHED_FIRSTTASK();
}

/*Note: We need to save and restore r2-r17, r28-r29 and SREG -> 19 Byte Stack
 r1 has to be zero.
 Currently the the whole sched() procedure needs around 125 cycles.
 The first round will need some more time because of the stack init.
 This value might vary sightly depending on the compiler settings,
 compiler and avr-libc version.
*/
void sched(void) __attribute__ ((naked));
void sched(void) {
//sreg = SREG;
asm volatile ("in r0, %0" : : "I" (_SFR_IO_ADDR(SREG)):"r0");
asm volatile ("push r0");
asm volatile ("push r2");
asm volatile ("push r3");
asm volatile ("push r4");
asm volatile ("push r5");
asm volatile ("push r6");
asm volatile ("push r7");
asm volatile ("push r8");
asm volatile ("push r9");
asm volatile ("push r10");
asm volatile ("push r11");
asm volatile ("push r12");
asm volatile ("push r13");
asm volatile ("push r14");
asm volatile ("push r15");
asm volatile ("push r16");
asm volatile ("push r17");
asm volatile ("push r28");
asm volatile ("push r29");
//save the current stack address
sched_thread_l = sched_thread;		//local variable for faster access
schedlist_p[sched_thread_l] = SP; 	//save current stack pointer
//determine new thread number
sched_thread_l++;			//increase thread number
if (sched_thread_l == SCHED_THREADS) {	//faster than modulo in software
  sched_thread_l = 0;			//new thread number = 0
}
sched_thread = sched_thread_l;		//save new thread number in global var
//load the new stack address
if (schedlist_p[sched_thread_l] == 0) {	//oops, not initialized
  sched_init_thread();
}
//restore the stack pointer. writing SP consists of two out commands!
sreg2 = SREG;
cli();
SP = schedlist_p[sched_thread_l];
SREG = sreg2;
//restore all important registers
asm volatile ("pop r29");
asm volatile ("pop r28");
asm volatile ("pop r17");
asm volatile ("pop r16");
asm volatile ("pop r15");
asm volatile ("pop r14");
asm volatile ("pop r13");
asm volatile ("pop r12");
asm volatile ("pop r11");
asm volatile ("pop r10");
asm volatile ("pop r9");
asm volatile ("pop r8");
asm volatile ("pop r7");
asm volatile ("pop r6");
asm volatile ("pop r5");
asm volatile ("pop r4");
asm volatile ("pop r3");
asm volatile ("pop r2");
asm volatile ("clr r1");
asm volatile ("pop r0");
SREG = sreg;
asm volatile ("ret");
}

void sched_init_thread(void) __attribute__ ((noinline));
void sched_init_thread(void) {
  /* Please note: We read the stack out here, after this the
  pgm_read_byte() and memcpy_P() will need some additional stack (two in my
  case), so it looks like there were not enough bytes reserved, but this is not
  the case because we call them only once and never again at this stack position
  -> the bytes are safe to use for the new stack.
  */
  u08 sched_thread_l = sched_thread;
  u16 newp = SP - pgm_read_byte(&schedlist[sched_thread_l].prev_add_stack);
  schedlist_s[sched_thread_l] = newp;
  void (*subprog) (void);
  memcpy_P(&subprog,&schedlist[sched_thread_l].execute,sizeof(subprog));
  asm volatile ("clr r1");
  u08 sreg = SREG;
  cli();
  SP = newp;				//set stack pointer
  SREG = sreg;
  subprog();				//call new thread as common function
  while(1);
  /*Well, the threads may not return, otherwise they could pop
   from uninitialized memory, resulting in an undefined program behaviour. */
}

